From afd38f0c617d6f89b2b4532c6c44f116617e2b6f Mon Sep 17 00:00:00 2001
From: Felix Thomasmathibalan <felixjohnny.thomasmathibalan@arm.com>
Date: Wed, 27 Sep 2023 17:46:17 +0100
Subject: Apply clang-format on repository

Code is formatted as per a revised clang format configuration
file(not part of this delivery). Version 14.0.6 is used.

Exclusion List:
- files with .cl extension
- files that are not strictly C/C++ (e.g. Android.bp, Sconscript ...)
And the following directories
- compute_kernel_writer/validation/
- tests/
- include/
- src/core/NEON/kernels/convolution/
- src/core/NEON/kernels/arm_gemm/
- src/core/NEON/kernels/arm_conv/
- data/

There will be a follow up for formatting of .cl files and the
files under tests/ and compute_kernel_writer/validation/.

Signed-off-by: Felix Thomasmathibalan <felixjohnny.thomasmathibalan@arm.com>
Change-Id: Ib7eb1fcf4e7537b9feaefcfc15098a804a3fde0a
Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/10391
Benchmark: Arm Jenkins <bsgcomp@arm.com>
Tested-by: Arm Jenkins <bsgcomp@arm.com>
Reviewed-by: Gunes Bayir <gunes.bayir@arm.com>
---
 arm_compute/core/CL/CLCompileContext.h             |  17 +-
 arm_compute/core/CL/CLDevice.h                     |  16 +-
 arm_compute/core/CL/CLHelpers.h                    |   8 +-
 arm_compute/core/CL/CLTypes.h                      |   6 +-
 arm_compute/core/CL/ICLArray.h                     |   5 +-
 arm_compute/core/CL/ICLTensor.h                    |   7 +-
 arm_compute/core/CL/OpenCL.h                       |   9 +-
 arm_compute/core/CPP/CPPTypes.h                    |  12 +-
 arm_compute/core/CPP/ICPPKernel.h                  |   4 +-
 .../CPPBoxWithNonMaximaSuppressionLimitKernel.h    |  18 +-
 .../CPP/kernels/CPPNonMaximumSuppressionKernel.h   |  18 +-
 arm_compute/core/CPP/kernels/CPPTopKVKernel.h      |   3 +-
 arm_compute/core/Coordinates.h                     |   5 +-
 arm_compute/core/CoreTypes.h                       |  26 +-
 arm_compute/core/Dimensions.h                      |  15 +-
 arm_compute/core/Error.h                           | 134 +++---
 arm_compute/core/Helpers.h                         |  35 +-
 arm_compute/core/Helpers.inl                       |  57 +--
 arm_compute/core/IAccessWindow.h                   |  15 +-
 arm_compute/core/IArray.h                          |  11 +-
 arm_compute/core/IKernel.h                         |   2 +-
 arm_compute/core/ITensor.h                         |   4 +-
 arm_compute/core/ITensorInfo.h                     |  13 +-
 arm_compute/core/ITensorPack.h                     |  12 +-
 arm_compute/core/KernelDescriptors.h               | 178 ++++----
 arm_compute/core/Log.h                             |  16 +-
 arm_compute/core/PixelValue.h                      |  83 ++--
 arm_compute/core/QuantizationInfo.h                |  52 +--
 arm_compute/core/Rounding.h                        |   2 +-
 arm_compute/core/Size2D.h                          |   6 +-
 arm_compute/core/Size3D.h                          |   3 +-
 arm_compute/core/Steps.h                           |   5 +-
 arm_compute/core/Strides.h                         |   3 +-
 arm_compute/core/SubTensorInfo.h                   |   9 +-
 arm_compute/core/TensorInfo.h                      |  69 +--
 arm_compute/core/TensorShape.h                     |  31 +-
 arm_compute/core/Types.h                           | 294 ++++++++-----
 arm_compute/core/Utils.h                           |  63 ++-
 arm_compute/core/Validate.h                        | 480 ++++++++++++---------
 arm_compute/core/Version.h                         |   2 +-
 arm_compute/core/Window.h                          |  16 +-
 arm_compute/core/Window.inl                        |  52 +--
 arm_compute/core/WindowIterator.h                  |  22 +-
 arm_compute/core/experimental/Types.h              |  20 +-
 arm_compute/core/utils/ActivationFunctionUtils.h   |   2 +-
 arm_compute/core/utils/DataLayoutUtils.h           |   2 +-
 arm_compute/core/utils/DataTypeUtils.h             |  46 +-
 arm_compute/core/utils/FormatUtils.h               |  30 +-
 arm_compute/core/utils/InterpolationPolicyUtils.h  |   2 +-
 arm_compute/core/utils/StringUtils.h               |   2 +-
 arm_compute/core/utils/helpers/AdjustVecSize.h     |   6 +-
 arm_compute/core/utils/helpers/tensor_transform.h  |  33 +-
 arm_compute/core/utils/logging/FilePrinter.h       |   3 +-
 arm_compute/core/utils/logging/Helpers.h           |   3 +-
 arm_compute/core/utils/logging/IPrinter.h          |   3 +-
 arm_compute/core/utils/logging/LogMsgDecorators.h  |   5 +-
 arm_compute/core/utils/logging/Logger.h            |   6 +-
 arm_compute/core/utils/logging/LoggerRegistry.h    |  13 +-
 arm_compute/core/utils/logging/Macros.h            |  16 +-
 arm_compute/core/utils/logging/Types.h             |   6 +-
 arm_compute/core/utils/math/Math.h                 |   2 +-
 arm_compute/core/utils/math/SafeOps.h              |  23 +-
 arm_compute/core/utils/misc/InfoHelpers.h          |  54 +--
 arm_compute/core/utils/misc/Macros.h               |   9 +-
 arm_compute/core/utils/misc/ShapeCalculator.h      | 373 +++++++++-------
 arm_compute/core/utils/misc/Traits.h               |   1 +
 arm_compute/core/utils/misc/Utility.h              |  35 +-
 arm_compute/core/utils/quantization/AsymmHelpers.h |  24 +-
 68 files changed, 1460 insertions(+), 1097 deletions(-)

(limited to 'arm_compute/core')

diff --git a/arm_compute/core/CL/CLCompileContext.h b/arm_compute/core/CL/CLCompileContext.h
index 60e0f95f83..dcd3b45670 100644
--- a/arm_compute/core/CL/CLCompileContext.h
+++ b/arm_compute/core/CL/CLCompileContext.h
@@ -250,8 +250,12 @@ public:
      *
      * @return The created kernel.
      */
-    Kernel create_kernel(const std::string &kernel_name, const std::string &program_name, const std::string &program_source,
-                         const std::string &kernel_path, const StringSet &build_options_set, bool is_binary) const;
+    Kernel create_kernel(const std::string &kernel_name,
+                         const std::string &program_name,
+                         const std::string &program_source,
+                         const std::string &kernel_path,
+                         const StringSet   &build_options_set,
+                         bool               is_binary) const;
 
     /** Clear the library's cache of binary programs
      */
@@ -323,7 +327,8 @@ private:
      * @param[in] program_source Source of the program.
      * @param[in] is_binary      Flag to indicate if the program source is binary.
      */
-    const Program &load_program(const std::string &program_name, const std::string &program_source, bool is_binary) const;
+    const Program &
+    load_program(const std::string &program_name, const std::string &program_source, bool is_binary) const;
 
     /** Generates the build options given a string of user defined ones
      *
@@ -343,11 +348,11 @@ private:
      */
     std::string stringify_set(const StringSet &s, const std::string &kernel_path) const;
 
-    cl::Context _context;                                             /**< Underlying CL context. */
-    CLDevice    _device;                                              /**< Underlying CL device. */
+    cl::Context                                  _context;            /**< Underlying CL context. */
+    CLDevice                                     _device;             /**< Underlying CL device. */
     mutable std::map<std::string, const Program> _programs_map;       /**< Map with all already loaded program data. */
     mutable std::map<std::string, cl::Program>   _built_programs_map; /**< Map with all already built program data. */
-    bool _is_wbsm_supported;                                          /**< Support of worksize batch size modifier support boolean*/
+    bool _is_wbsm_supported; /**< Support of worksize batch size modifier support boolean*/
 };
 } // namespace arm_compute
 #endif /* ARM_COMPUTE_CLCOMPILECONTEXT_H */
diff --git a/arm_compute/core/CL/CLDevice.h b/arm_compute/core/CL/CLDevice.h
index 5e0f86e6d9..ded6bb8493 100644
--- a/arm_compute/core/CL/CLDevice.h
+++ b/arm_compute/core/CL/CLDevice.h
@@ -44,8 +44,7 @@ class CLDevice : public IDevice
 {
 public:
     /** Default Constructor */
-    CLDevice()
-        : _device(cl::Device()), _options()
+    CLDevice() : _device(cl::Device()), _options()
     {
     }
 
@@ -53,8 +52,7 @@ public:
      *
      * @param[in] cl_device OpenCL device
      */
-    CLDevice(const cl::Device &cl_device)
-        : _device(), _options()
+    CLDevice(const cl::Device &cl_device) : _device(), _options()
     {
         _device = cl_device;
 
@@ -66,13 +64,13 @@ public:
         std::string extensions = _device.getInfo<CL_DEVICE_EXTENSIONS>();
 
         std::istringstream iss(extensions);
-        for(std::string s; iss >> s;)
+        for (std::string s; iss >> s;)
         {
             _options.extensions.insert(s);
         }
 
         // SW workaround for G76
-        if(_options.gpu_target == GPUTarget::G76)
+        if (_options.gpu_target == GPUTarget::G76)
         {
             _options.extensions.insert("cl_arm_integer_dot_product_int8");
         }
@@ -153,15 +151,15 @@ public:
      */
     std::tuple<bool, std::string> is_non_uniform_workgroup_supported() const
     {
-        if(version() == CLVersion::CL30 && get_cl_non_uniform_work_group_supported(_device))
+        if (version() == CLVersion::CL30 && get_cl_non_uniform_work_group_supported(_device))
         {
             return {true, " -cl-std=CL3.0 "};
         }
-        else if(version() == CLVersion::CL20)
+        else if (version() == CLVersion::CL20)
         {
             return {true, " -cl-std=CL2.0 "};
         }
-        else if(supported("cl_arm_non_uniform_work_group_size"))
+        else if (supported("cl_arm_non_uniform_work_group_size"))
         {
             return {true, " -cl-arm-non-uniform-work-group-size "};
         }
diff --git a/arm_compute/core/CL/CLHelpers.h b/arm_compute/core/CL/CLHelpers.h
index 20d93df5a1..1a639e47f9 100644
--- a/arm_compute/core/CL/CLHelpers.h
+++ b/arm_compute/core/CL/CLHelpers.h
@@ -179,7 +179,9 @@ bool dot8_acc_supported(const cl::Device &device);
  *
  * @return True if the configuration is supported
  */
-bool cl_winograd_convolution_layer_supported(const Size2D &output_tile, const Size2D &kernel_size, DataLayout data_layout);
+bool cl_winograd_convolution_layer_supported(const Size2D &output_tile,
+                                             const Size2D &kernel_size,
+                                             DataLayout    data_layout);
 
 /** Helper function to get the preferred native vector width size for built-in scalar types that can be put into vectors
  *
@@ -215,7 +217,9 @@ bool image2d_from_buffer_supported(const cl::Device &device);
  *
  * @return An opencl kernel
  */
-cl::Kernel create_kernel(const CLCompileContext &ctx, const std::string &kernel_name, const std::set<std::string> &build_opts = std::set<std::string>());
+cl::Kernel create_kernel(const CLCompileContext      &ctx,
+                         const std::string           &kernel_name,
+                         const std::set<std::string> &build_opts = std::set<std::string>());
 
 /** Creates a suitable LWS hint object for parallel implementations. Sets the number of WG based on the input size.
  *  If input width is smaller than 128 we can use fewer threads than 8.
diff --git a/arm_compute/core/CL/CLTypes.h b/arm_compute/core/CL/CLTypes.h
index 00b7cda2e1..0f088e2b10 100644
--- a/arm_compute/core/CL/CLTypes.h
+++ b/arm_compute/core/CL/CLTypes.h
@@ -63,15 +63,13 @@ struct CLDeviceOptions
 struct CLQuantization
 {
     /** Default Constructor */
-    CLQuantization()
-        : scale(nullptr), offset(nullptr) {};
+    CLQuantization() : scale(nullptr), offset(nullptr){};
     /** Constructor
      *
      * @param[in] scale  OpenCL scale array
      * @param[in] offset OpenCL offset array
      */
-    CLQuantization(const ICLFloatArray *scale, const ICLInt32Array *offset)
-        : scale(scale), offset(offset) {};
+    CLQuantization(const ICLFloatArray *scale, const ICLInt32Array *offset) : scale(scale), offset(offset){};
 
     const ICLFloatArray *scale;  /**< Quantization scale array */
     const ICLInt32Array *offset; /**< Quantization offset array */
diff --git a/arm_compute/core/CL/ICLArray.h b/arm_compute/core/CL/ICLArray.h
index 57f842b6f9..a2b2baa5b3 100644
--- a/arm_compute/core/CL/ICLArray.h
+++ b/arm_compute/core/CL/ICLArray.h
@@ -40,8 +40,7 @@ public:
      * @param[in] max_num_values Maximum size of the array.
      *
      */
-    explicit ICLArray(size_t max_num_values)
-        : IArray<T>(max_num_values), _mapping(nullptr)
+    explicit ICLArray(size_t max_num_values) : IArray<T>(max_num_values), _mapping(nullptr)
     {
     }
 
@@ -125,5 +124,5 @@ using ICLInt16Array = ICLArray<cl_short>;
 using ICLInt32Array = ICLArray<cl_int>;
 /** Interface for OpenCL Array of floats. */
 using ICLFloatArray = ICLArray<cl_float>;
-}
+} // namespace arm_compute
 #endif /*ARM_COMPUTE_ICLARRAY_H*/
diff --git a/arm_compute/core/CL/ICLTensor.h b/arm_compute/core/CL/ICLTensor.h
index 78d3757e59..8de5423762 100644
--- a/arm_compute/core/CL/ICLTensor.h
+++ b/arm_compute/core/CL/ICLTensor.h
@@ -24,9 +24,8 @@
 #ifndef ARM_COMPUTE_ICLTENSOR_H
 #define ARM_COMPUTE_ICLTENSOR_H
 
-#include "arm_compute/core/ITensor.h"
-
 #include "arm_compute/core/CL/CLTypes.h"
+#include "arm_compute/core/ITensor.h"
 
 #include <cstdint>
 
@@ -34,7 +33,7 @@ namespace cl
 {
 class Buffer;
 class CommandQueue;
-}
+} // namespace cl
 
 namespace arm_compute
 {
@@ -113,5 +112,5 @@ private:
 };
 
 using ICLImage = ICLTensor;
-}
+} // namespace arm_compute
 #endif /*ARM_COMPUTE_ICLTENSOR_H */
diff --git a/arm_compute/core/CL/OpenCL.h b/arm_compute/core/CL/OpenCL.h
index f86d55a9ea..a5c4e39df2 100644
--- a/arm_compute/core/CL/OpenCL.h
+++ b/arm_compute/core/CL/OpenCL.h
@@ -31,8 +31,8 @@
 #ifndef ARM_COMPUTE_NO_EXCEPTIONS
 #define CL_HPP_ENABLE_EXCEPTIONS
 #endif // ARM_COMPUTE_NO_EXCEPTIONS
-#define CL_TARGET_OPENCL_VERSION 300
-#define CL_HPP_TARGET_OPENCL_VERSION 110
+#define CL_TARGET_OPENCL_VERSION      300
+#define CL_HPP_TARGET_OPENCL_VERSION  110
 #define CL_HPP_MINIMUM_OPENCL_VERSION 110
 #pragma GCC diagnostic push
 #pragma GCC diagnostic ignored "-Weffc++"
@@ -40,7 +40,7 @@
 #pragma GCC diagnostic ignored "-Wunused-parameter"
 #if defined(__GNUG__) && __GNUG__ >= 8
 #pragma GCC diagnostic ignored "-Wcatch-value"
-#endif // defined(__GNUG__) && __GNUG__ >= 8
+#endif                   // defined(__GNUG__) && __GNUG__ >= 8
 #include <CL/opencl.hpp> // include new hpp header instead of cl2.hpp
 #pragma GCC diagnostic pop
 
@@ -88,8 +88,7 @@ public:
      */
     bool load_default();
 
-#define DECLARE_FUNCTION_PTR(func_name) \
-    std::function<decltype(func_name)> func_name##_ptr = nullptr
+#define DECLARE_FUNCTION_PTR(func_name) std::function<decltype(func_name)> func_name##_ptr = nullptr
 
     DECLARE_FUNCTION_PTR(clCreateContext);
     DECLARE_FUNCTION_PTR(clCreateContextFromType);
diff --git a/arm_compute/core/CPP/CPPTypes.h b/arm_compute/core/CPP/CPPTypes.h
index e4cbd9ff9b..b080a86938 100644
--- a/arm_compute/core/CPP/CPPTypes.h
+++ b/arm_compute/core/CPP/CPPTypes.h
@@ -78,10 +78,10 @@ public:
 
     /* Delete move and copy constructors and assignment operator
     s */
-    CPUInfo(CPUInfo const &) = delete;            // Copy construct
-    CPUInfo(CPUInfo &&)      = delete;            // Move construct
+    CPUInfo(CPUInfo const &)            = delete; // Copy construct
+    CPUInfo(CPUInfo &&)                 = delete; // Move construct
     CPUInfo &operator=(CPUInfo const &) = delete; // Copy assign
-    CPUInfo &operator=(CPUInfo &&) = delete;      // Move assign
+    CPUInfo &operator=(CPUInfo &&)      = delete; // Move assign
 
     /** Checks if the cpu model supports fp16.
      *
@@ -179,9 +179,9 @@ private:
 /** Information about executing thread and CPU. */
 struct ThreadInfo
 {
-    int            thread_id{ 0 };
-    int            num_threads{ 1 };
-    const CPUInfo *cpu_info{ nullptr };
+    int            thread_id{0};
+    int            num_threads{1};
+    const CPUInfo *cpu_info{nullptr};
 };
 } // namespace arm_compute
 #endif /* ARM_COMPUTE_CPP_TYPES_H */
diff --git a/arm_compute/core/CPP/ICPPKernel.h b/arm_compute/core/CPP/ICPPKernel.h
index 00a10555e3..03967a536d 100644
--- a/arm_compute/core/CPP/ICPPKernel.h
+++ b/arm_compute/core/CPP/ICPPKernel.h
@@ -25,9 +25,9 @@
 #define ARM_COMPUTE_ICPPKERNEL_H
 
 #include "arm_compute/core/CPP/CPPTypes.h"
+#include "arm_compute/core/experimental/Types.h"
 #include "arm_compute/core/IKernel.h"
 #include "arm_compute/core/Types.h"
-#include "arm_compute/core/experimental/Types.h"
 
 namespace arm_compute
 {
@@ -38,7 +38,7 @@ class ITensor;
 class ICPPKernel : public IKernel
 {
 public:
-    static constexpr size_t default_mws       = 1; /* Default minimum workload size value  - no impact */
+    static constexpr size_t default_mws = 1; /* Default minimum workload size value  - no impact */
 
     /** Default destructor */
     virtual ~ICPPKernel() = default;
diff --git a/arm_compute/core/CPP/kernels/CPPBoxWithNonMaximaSuppressionLimitKernel.h b/arm_compute/core/CPP/kernels/CPPBoxWithNonMaximaSuppressionLimitKernel.h
index 068b37d80c..dd91595ea6 100644
--- a/arm_compute/core/CPP/kernels/CPPBoxWithNonMaximaSuppressionLimitKernel.h
+++ b/arm_compute/core/CPP/kernels/CPPBoxWithNonMaximaSuppressionLimitKernel.h
@@ -63,8 +63,16 @@ public:
      * @param[out] keeps_size       (Optional) Number of filtered indices per class tensor of size [num_classes]. Data types supported: U32
      * @param[in]  info             (Optional) BoxNMSLimitInfo information.
      */
-    void configure(const ITensor *scores_in, const ITensor *boxes_in, const ITensor *batch_splits_in, ITensor *scores_out, ITensor *boxes_out, ITensor *classes,
-                   ITensor *batch_splits_out = nullptr, ITensor *keeps = nullptr, ITensor *keeps_size = nullptr, const BoxNMSLimitInfo info = BoxNMSLimitInfo());
+    void configure(const ITensor        *scores_in,
+                   const ITensor        *boxes_in,
+                   const ITensor        *batch_splits_in,
+                   ITensor              *scores_out,
+                   ITensor              *boxes_out,
+                   ITensor              *classes,
+                   ITensor              *batch_splits_out = nullptr,
+                   ITensor              *keeps            = nullptr,
+                   ITensor              *keeps_size       = nullptr,
+                   const BoxNMSLimitInfo info             = BoxNMSLimitInfo());
 
     // Inherited methods overridden:
     void run(const Window &window, const ThreadInfo &info) override;
@@ -74,9 +82,9 @@ public:
     void run_nmslimit();
 
 private:
-    const ITensor *_scores_in;
-    const ITensor *_boxes_in;
-    const ITensor *_batch_splits_in;
+    const ITensor  *_scores_in;
+    const ITensor  *_boxes_in;
+    const ITensor  *_batch_splits_in;
     ITensor        *_scores_out;
     ITensor        *_boxes_out;
     ITensor        *_classes;
diff --git a/arm_compute/core/CPP/kernels/CPPNonMaximumSuppressionKernel.h b/arm_compute/core/CPP/kernels/CPPNonMaximumSuppressionKernel.h
index e32b5d8f7b..d1f7f8670f 100644
--- a/arm_compute/core/CPP/kernels/CPPNonMaximumSuppressionKernel.h
+++ b/arm_compute/core/CPP/kernels/CPPNonMaximumSuppressionKernel.h
@@ -24,9 +24,8 @@
 #ifndef ARM_COMPUTE_CPP_NONMAXIMUMSUPPRESSIONKERNEL_LAYER_H
 #define ARM_COMPUTE_CPP_NONMAXIMUMSUPPRESSIONKERNEL_LAYER_H
 
-#include "arm_compute/runtime/CPP/ICPPSimpleFunction.h"
-
 #include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/CPP/ICPPSimpleFunction.h"
 
 namespace arm_compute
 {
@@ -65,7 +64,12 @@ public:
      * @param[in]  iou_threshold   The threshold used in non maximum suppression.
      *
      */
-    void configure(const ITensor *input_bboxes, const ITensor *input_scores, ITensor *output_indices, unsigned int max_output_size, const float score_threshold, const float iou_threshold);
+    void configure(const ITensor *input_bboxes,
+                   const ITensor *input_scores,
+                   ITensor       *output_indices,
+                   unsigned int   max_output_size,
+                   const float    score_threshold,
+                   const float    iou_threshold);
 
     /** Static function to check if given arguments will lead to a valid configuration of @ref CPPNonMaximumSuppressionKernel
      *
@@ -77,8 +81,12 @@ public:
      * @param[in]  iou_threshold   The threshold used in non maximum suppression.
      *
      */
-    static Status validate(const ITensorInfo *input_bboxes, const ITensorInfo *input_scores, const ITensorInfo *output_indices, unsigned int max_output_size,
-                           const float score_threshold, const float iou_threshold);
+    static Status validate(const ITensorInfo *input_bboxes,
+                           const ITensorInfo *input_scores,
+                           const ITensorInfo *output_indices,
+                           unsigned int       max_output_size,
+                           const float        score_threshold,
+                           const float        iou_threshold);
 
     // Inherited methods overridden:
     void run(const Window &window, const ThreadInfo &info) override;
diff --git a/arm_compute/core/CPP/kernels/CPPTopKVKernel.h b/arm_compute/core/CPP/kernels/CPPTopKVKernel.h
index 1245dbc14c..7326a10e2f 100644
--- a/arm_compute/core/CPP/kernels/CPPTopKVKernel.h
+++ b/arm_compute/core/CPP/kernels/CPPTopKVKernel.h
@@ -69,7 +69,8 @@ public:
      *
      * @return a status
      */
-    static Status validate(const ITensorInfo *predictions, const ITensorInfo *targets, ITensorInfo *output, const unsigned int k);
+    static Status
+    validate(const ITensorInfo *predictions, const ITensorInfo *targets, ITensorInfo *output, const unsigned int k);
 
     // Inherited methods overridden:
     void run(const Window &window, const ThreadInfo &info) override;
diff --git a/arm_compute/core/Coordinates.h b/arm_compute/core/Coordinates.h
index f6e1f4d282..d1240bb10a 100644
--- a/arm_compute/core/Coordinates.h
+++ b/arm_compute/core/Coordinates.h
@@ -42,8 +42,7 @@ public:
      * @param[in] coords Values to initialize the dimensions.
      */
     template <typename... Ts>
-    constexpr Coordinates(Ts... coords)
-        : Dimensions{ coords... }
+    constexpr Coordinates(Ts... coords) : Dimensions{coords...}
     {
     }
     /** Allow instances of this class to be copy constructed */
@@ -57,5 +56,5 @@ public:
     /** Default destructor */
     ~Coordinates() = default;
 };
-}
+} // namespace arm_compute
 #endif /*ARM_COMPUTE_COORDINATES_H*/
diff --git a/arm_compute/core/CoreTypes.h b/arm_compute/core/CoreTypes.h
index 4a48a36651..1a9db1937c 100644
--- a/arm_compute/core/CoreTypes.h
+++ b/arm_compute/core/CoreTypes.h
@@ -25,6 +25,7 @@
 #define ACL_ARM_COMPUTE_CORE_CORETYPES
 
 #include "arm_compute/core/Strides.h"
+
 #include "support/Half.h"
 
 /** CoreTypes.h groups together essential small types that are used across functions */
@@ -146,9 +147,11 @@ public:
      * @param[in] pad_y    (Optional) Padding, in elements, across y. Defaults to 0.
      * @param[in] round    (Optional) Dimensions rounding. Defaults to @ref DimensionRoundingType::FLOOR.
      */
-    PadStrideInfo(unsigned int stride_x = 1, unsigned int stride_y = 1,
-                  unsigned int pad_x = 0, unsigned int pad_y = 0,
-                  DimensionRoundingType round = DimensionRoundingType::FLOOR)
+    PadStrideInfo(unsigned int          stride_x = 1,
+                  unsigned int          stride_y = 1,
+                  unsigned int          pad_x    = 0,
+                  unsigned int          pad_y    = 0,
+                  DimensionRoundingType round    = DimensionRoundingType::FLOOR)
         : _stride(std::make_pair(stride_x, stride_y)),
           _pad_left(pad_x),
           _pad_top(pad_y),
@@ -167,9 +170,12 @@ public:
      * @param[in] pad_bottom Padding across y on the bottom, in elements.
      * @param[in] round      Dimensions rounding.
      */
-    PadStrideInfo(unsigned int stride_x, unsigned int stride_y,
-                  unsigned int pad_left, unsigned int pad_right,
-                  unsigned int pad_top, unsigned int pad_bottom,
+    PadStrideInfo(unsigned int          stride_x,
+                  unsigned int          stride_y,
+                  unsigned int          pad_left,
+                  unsigned int          pad_right,
+                  unsigned int          pad_top,
+                  unsigned int          pad_bottom,
                   DimensionRoundingType round)
         : _stride(std::make_pair(stride_x, stride_y)),
           _pad_left(pad_left),
@@ -243,10 +249,10 @@ public:
 
 private:
     std::pair<unsigned int, unsigned int> _stride;
-    unsigned int _pad_left;
-    unsigned int _pad_top;
-    unsigned int _pad_right;
-    unsigned int _pad_bottom;
+    unsigned int                          _pad_left;
+    unsigned int                          _pad_top;
+    unsigned int                          _pad_right;
+    unsigned int                          _pad_bottom;
 
     DimensionRoundingType _round_type;
 };
diff --git a/arm_compute/core/Dimensions.h b/arm_compute/core/Dimensions.h
index 2ebfcd7f83..bb8692d70a 100644
--- a/arm_compute/core/Dimensions.h
+++ b/arm_compute/core/Dimensions.h
@@ -50,8 +50,7 @@ public:
      * @param[in] dims Values to initialize the dimensions.
      */
     template <typename... Ts>
-    explicit Dimensions(Ts... dims)
-        : _id{ { static_cast<T>(dims)... } }, _num_dimensions{ sizeof...(dims) }
+    explicit Dimensions(Ts... dims) : _id{{static_cast<T>(dims)...}}, _num_dimensions{sizeof...(dims)}
     {
     }
 
@@ -78,7 +77,7 @@ public:
         ARM_COMPUTE_ERROR_ON(dimension >= num_max_dimensions);
         _id[dimension] = value;
         // Don't increase the number of dimensions if the new dimension is 1
-        if(increase_dim_unit || value != 1)
+        if (increase_dim_unit || value != 1)
         {
             _num_dimensions = std::max(_num_dimensions, dimension + 1);
         }
@@ -108,7 +107,7 @@ public:
     void increment(size_t dim, T step = 1)
     {
         ARM_COMPUTE_ERROR_ON(dim >= _num_dimensions);
-        if((std::numeric_limits<T>::max() - _id[dim]) >= step)
+        if ((std::numeric_limits<T>::max() - _id[dim]) >= step)
         {
             _id[dim] += step;
         }
@@ -162,7 +161,7 @@ public:
 
         const size_t last = std::min(_num_dimensions, first + n);
 
-        if(last > (first + 1))
+        if (last > (first + 1))
         {
             // Collapse dimensions into the first
             _id[first] = std::accumulate(&_id[first], &_id[last], 1, std::multiplies<T>());
@@ -196,7 +195,7 @@ public:
     void remove(size_t idx)
     {
         ARM_COMPUTE_ERROR_ON(_num_dimensions < 1);
-        if(idx >= _num_dimensions)
+        if (idx >= _num_dimensions)
         {
             return;
         }
@@ -262,7 +261,7 @@ protected:
     ~Dimensions() = default;
 
     std::array<T, num_max_dimensions> _id;
-    size_t _num_dimensions{ 0 };
+    size_t                            _num_dimensions{0};
 };
 
 /** Check that given dimensions are equal.
@@ -289,5 +288,5 @@ inline bool operator!=(const Dimensions<T> &lhs, const Dimensions<T> &rhs)
 {
     return !(lhs == rhs);
 }
-}
+} // namespace arm_compute
 #endif /*ARM_COMPUTE_DIMENSIONS_H*/
diff --git a/arm_compute/core/Error.h b/arm_compute/core/Error.h
index 0854f2c527..7a7033805a 100644
--- a/arm_compute/core/Error.h
+++ b/arm_compute/core/Error.h
@@ -53,8 +53,7 @@ class Status
 {
 public:
     /** Default Constructor **/
-    Status()
-        : _code(ErrorCode::OK), _error_description(" ")
+    Status() : _code(ErrorCode::OK), _error_description(" ")
     {
     }
     /** Default Constructor
@@ -101,7 +100,7 @@ public:
     /** Throws a runtime exception in case it contains a valid error status */
     void throw_if_error() const
     {
-        if(!bool(*this))
+        if (!bool(*this))
         {
             internal_throw_on_error();
         }
@@ -141,7 +140,7 @@ Status create_error_msg(ErrorCode error_code, const char *func, const char *file
  * @param[in] err Error status
  */
 [[noreturn]] void throw_error(Status err);
-}
+} // namespace arm_compute
 /** To avoid unused variables warnings
  *
  * This is useful if for example a variable is only used
@@ -156,7 +155,8 @@ Status create_error_msg(ErrorCode error_code, const char *func, const char *file
  * @param[in] error_code Error code.
  * @param[in] msg        Message to encapsulate.
  */
-#define ARM_COMPUTE_CREATE_ERROR(error_code, msg) arm_compute::create_error_msg(error_code, __func__, __FILE__, __LINE__, msg)
+#define ARM_COMPUTE_CREATE_ERROR(error_code, msg) \
+    arm_compute::create_error_msg(error_code, __func__, __FILE__, __LINE__, msg)
 
 /** Creates an error on location with a given message
  *
@@ -166,7 +166,8 @@ Status create_error_msg(ErrorCode error_code, const char *func, const char *file
  * @param[in] line       Line in which the error occurred.
  * @param[in] msg        Message to display before abandoning.
  */
-#define ARM_COMPUTE_CREATE_ERROR_LOC(error_code, func, file, line, msg) arm_compute::create_error_msg(error_code, func, file, line, msg)
+#define ARM_COMPUTE_CREATE_ERROR_LOC(error_code, func, file, line, msg) \
+    arm_compute::create_error_msg(error_code, func, file, line, msg)
 
 /** Creates an error on location with a given message. Accepts a message format
  *  and a variable list of arguments matching the format description.
@@ -178,14 +179,14 @@ Status create_error_msg(ErrorCode error_code, const char *func, const char *file
  * @param[in] msg        Error description message format.
  * @param[in] ...        List of arguments matching the format description.
  */
-#define ARM_COMPUTE_CREATE_ERROR_LOC_VAR(error_code, func, file, line, msg, ...)          \
-    do                                                                                    \
-    {                                                                                     \
-        std::array<char, 512> out{ 0 };                                                   \
-        int offset = snprintf(out.data(), out.size(), "in %s %s:%d: ", func, file, line); \
-        snprintf(out.data() + offset, out.size() - offset, msg, __VA_ARGS__);             \
-        arm_compute::create_error(error_code, std::string(out.data()));                   \
-    } while(false)
+#define ARM_COMPUTE_CREATE_ERROR_LOC_VAR(error_code, func, file, line, msg, ...)                            \
+    do                                                                                                      \
+    {                                                                                                       \
+        std::array<char, 512> out{0};                                                                       \
+        int                   offset = snprintf(out.data(), out.size(), "in %s %s:%d: ", func, file, line); \
+        snprintf(out.data() + offset, out.size() - offset, msg, __VA_ARGS__);                               \
+        arm_compute::create_error(error_code, std::string(out.data()));                                     \
+    } while (false)
 
 /** An error is returned with the given description.
  *
@@ -195,7 +196,7 @@ Status create_error_msg(ErrorCode error_code, const char *func, const char *file
     do                                                                                       \
     {                                                                                        \
         return ARM_COMPUTE_CREATE_ERROR(arm_compute::ErrorCode::RUNTIME_ERROR, __VA_ARGS__); \
-    } while(false)
+    } while (false)
 
 /** Checks if a status contains an error and returns it
  *
@@ -205,18 +206,17 @@ Status create_error_msg(ErrorCode error_code, const char *func, const char *file
     do                                      \
     {                                       \
         const auto s = status;              \
-        if(!bool(s))                        \
+        if (!bool(s))                       \
         {                                   \
             return s;                       \
         }                                   \
-    } while(false)
+    } while (false)
 
 /** Checks if an error value is valid if not throws an exception with the error
  *
  * @param[in] error Error value to check.
  */
-#define ARM_COMPUTE_THROW_ON_ERROR(error) \
-    error.throw_if_error();
+#define ARM_COMPUTE_THROW_ON_ERROR(error) error.throw_if_error();
 
 /** If the condition is true, an error is returned. Accepts a message format
  *  and a variable list of arguments matching the format description.
@@ -228,28 +228,29 @@ Status create_error_msg(ErrorCode error_code, const char *func, const char *file
 #define ARM_COMPUTE_RETURN_ERROR_ON_MSG_VAR(cond, msg, ...)                                                   \
     do                                                                                                        \
     {                                                                                                         \
-        if(cond)                                                                                              \
+        if (cond)                                                                                             \
         {                                                                                                     \
-            std::array<char, 512> out{ 0 };                                                                   \
+            std::array<char, 512> out{0};                                                                     \
             int offset = snprintf(out.data(), out.size(), "in %s %s:%d: ", __func__, __FILE__, __LINE__);     \
             snprintf(out.data() + offset, out.size() - offset, msg, __VA_ARGS__);                             \
             return arm_compute::create_error(arm_compute::ErrorCode::RUNTIME_ERROR, std::string(out.data())); \
         }                                                                                                     \
-    } while(false)
+    } while (false)
 
 /** If the condition is true, an error is returned
  *
  * @param[in] cond Condition to evaluate.
  * @param[in] msg  Error description message
  */
-#define ARM_COMPUTE_RETURN_ERROR_ON_MSG(cond, msg)                                                                          \
-    do                                                                                                                      \
-    {                                                                                                                       \
-        if(cond)                                                                                                            \
-        {                                                                                                                   \
-            return arm_compute::create_error_msg(arm_compute::ErrorCode::RUNTIME_ERROR, __func__, __FILE__, __LINE__, msg); \
-        }                                                                                                                   \
-    } while(false)
+#define ARM_COMPUTE_RETURN_ERROR_ON_MSG(cond, msg)                                                                    \
+    do                                                                                                                \
+    {                                                                                                                 \
+        if (cond)                                                                                                     \
+        {                                                                                                             \
+            return arm_compute::create_error_msg(arm_compute::ErrorCode::RUNTIME_ERROR, __func__, __FILE__, __LINE__, \
+                                                 msg);                                                                \
+        }                                                                                                             \
+    } while (false)
 
 /** If the condition is true, an error is thrown. Accepts a message format
  *  and a variable list of arguments matching the format description.
@@ -261,17 +262,17 @@ Status create_error_msg(ErrorCode error_code, const char *func, const char *file
  * @param[in] msg  Error description message format.
  * @param[in] ...  List of arguments matching the format description.
  */
-#define ARM_COMPUTE_RETURN_ERROR_ON_LOC_MSG_VAR(cond, func, file, line, msg, ...)                \
-    do                                                                                           \
-    {                                                                                            \
-        if(cond)                                                                                 \
-        {                                                                                        \
-            std::array<char, 512> out{ 0 };                                                      \
-            int offset = snprintf(out.data(), out.size(), "in %s %s:%d: ", func, file, line);    \
-            snprintf(out.data() + offset, out.size() - offset, msg, __VA_ARGS__);                \
-            return arm_compute::create_error(ErrorCode::RUNTIME_ERROR, std::string(out.data())); \
-        }                                                                                        \
-    } while(false)
+#define ARM_COMPUTE_RETURN_ERROR_ON_LOC_MSG_VAR(cond, func, file, line, msg, ...)                               \
+    do                                                                                                          \
+    {                                                                                                           \
+        if (cond)                                                                                               \
+        {                                                                                                       \
+            std::array<char, 512> out{0};                                                                       \
+            int                   offset = snprintf(out.data(), out.size(), "in %s %s:%d: ", func, file, line); \
+            snprintf(out.data() + offset, out.size() - offset, msg, __VA_ARGS__);                               \
+            return arm_compute::create_error(ErrorCode::RUNTIME_ERROR, std::string(out.data()));                \
+        }                                                                                                       \
+    } while (false)
 
 /** If the condition is true, an error is thrown.
  *
@@ -284,18 +285,17 @@ Status create_error_msg(ErrorCode error_code, const char *func, const char *file
 #define ARM_COMPUTE_RETURN_ERROR_ON_LOC_MSG(cond, func, file, line, msg)                           \
     do                                                                                             \
     {                                                                                              \
-        if(cond)                                                                                   \
+        if (cond)                                                                                  \
         {                                                                                          \
             return arm_compute::create_error_msg(ErrorCode::RUNTIME_ERROR, func, file, line, msg); \
         }                                                                                          \
-    } while(false)
+    } while (false)
 
 /** If the condition is true, an error is returned
  *
  * @param[in] cond Condition to evaluate
  */
-#define ARM_COMPUTE_RETURN_ERROR_ON(cond) \
-    ARM_COMPUTE_RETURN_ERROR_ON_MSG(cond, #cond)
+#define ARM_COMPUTE_RETURN_ERROR_ON(cond) ARM_COMPUTE_RETURN_ERROR_ON_MSG(cond, #cond)
 
 /** If the condition is true, an error is returned
  *
@@ -314,11 +314,12 @@ Status create_error_msg(ErrorCode error_code, const char *func, const char *file
  * @param[in] line Line in which the error occurred.
  * @param[in] msg  Message to display.
  */
-#define ARM_COMPUTE_THROW_ERROR(func, file, line, msg)                                                                         \
-    do                                                                                                                         \
-    {                                                                                                                          \
-        arm_compute::throw_error(arm_compute::create_error_msg(arm_compute::ErrorCode::RUNTIME_ERROR, func, file, line, msg)); \
-    } while(false)
+#define ARM_COMPUTE_THROW_ERROR(func, file, line, msg)                                                    \
+    do                                                                                                    \
+    {                                                                                                     \
+        arm_compute::throw_error(                                                                         \
+            arm_compute::create_error_msg(arm_compute::ErrorCode::RUNTIME_ERROR, func, file, line, msg)); \
+    } while (false)
 
 /** Print the given message then throw an std::runtime_error. Accepts a message format
  *  and a variable list of arguments matching the format description.
@@ -332,11 +333,11 @@ Status create_error_msg(ErrorCode error_code, const char *func, const char *file
 #define ARM_COMPUTE_THROW_ERROR_VAR(func, file, line, msg, ...)                                                        \
     do                                                                                                                 \
     {                                                                                                                  \
-        std::array<char, 512> out{ 0 };                                                                                \
-        int offset = snprintf(out.data(), out.size(), "in %s %s:%d: ", func, file, line);                              \
+        std::array<char, 512> out{0};                                                                                  \
+        int                   offset = snprintf(out.data(), out.size(), "in %s %s:%d: ", func, file, line);            \
         snprintf(out.data() + offset, out.size() - offset, msg, __VA_ARGS__);                                          \
         arm_compute::throw_error(arm_compute::Status(arm_compute::ErrorCode::RUNTIME_ERROR, std::string(out.data()))); \
-    } while(false)
+    } while (false)
 
 /** Print the given message then throw an std::runtime_error. Accepts a message format
  *  and a variable list of arguments matching the format description.
@@ -361,7 +362,8 @@ Status create_error_msg(ErrorCode error_code, const char *func, const char *file
  * @param[in] msg  Error description message format.
  * @param[in] ...  List of arguments matching the format description.
  */
-#define ARM_COMPUTE_ERROR_LOC_VAR(func, file, line, msg, ...) ARM_COMPUTE_THROW_ERROR_VAR(func, file, line, msg, __VA_ARGS__) // NOLINT
+#define ARM_COMPUTE_ERROR_LOC_VAR(func, file, line, msg, ...) \
+    ARM_COMPUTE_THROW_ERROR_VAR(func, file, line, msg, __VA_ARGS__) // NOLINT
 
 /** Print the given message then throw an std::runtime_error.
  *
@@ -380,11 +382,11 @@ Status create_error_msg(ErrorCode error_code, const char *func, const char *file
 #define ARM_COMPUTE_EXIT_ON_MSG(cond, msg) \
     do                                     \
     {                                      \
-        if(cond)                           \
+        if (cond)                          \
         {                                  \
             ARM_COMPUTE_ERROR(msg);        \
         }                                  \
-    } while(false)
+    } while (false)
 
 /** If the condition is true, the given message is printed and program exits. Accepts a message format
  *  and a variable list of arguments matching the format description.
@@ -396,27 +398,25 @@ Status create_error_msg(ErrorCode error_code, const char *func, const char *file
 #define ARM_COMPUTE_EXIT_ON_MSG_VAR(cond, msg, ...)  \
     do                                               \
     {                                                \
-        if(cond)                                     \
+        if (cond)                                    \
         {                                            \
             ARM_COMPUTE_ERROR_VAR(msg, __VA_ARGS__); \
         }                                            \
-    } while(false)
+    } while (false)
 
 #ifdef ARM_COMPUTE_ASSERTS_ENABLED
 /** Checks if a status value is valid if not throws an exception with the error
  *
  * @param[in] status Status value to check.
  */
-#define ARM_COMPUTE_ERROR_THROW_ON(status) \
-    status.throw_if_error()
+#define ARM_COMPUTE_ERROR_THROW_ON(status) status.throw_if_error()
 
 /** If the condition is true, the given message is printed and an exception is thrown
  *
  * @param[in] cond Condition to evaluate.
  * @param[in] msg  Message to display.
  */
-#define ARM_COMPUTE_ERROR_ON_MSG(cond, msg) \
-    ARM_COMPUTE_EXIT_ON_MSG(cond, msg)
+#define ARM_COMPUTE_ERROR_ON_MSG(cond, msg) ARM_COMPUTE_EXIT_ON_MSG(cond, msg)
 
 /** If the condition is true, the given message is printed and an exception is thrown. Accepts a message format
  *  and a variable list of arguments matching the format description.
@@ -425,8 +425,7 @@ Status create_error_msg(ErrorCode error_code, const char *func, const char *file
  * @param[in] msg  Error description message format.
  * @param[in] ...  List of arguments matching the format description.
  */
-#define ARM_COMPUTE_ERROR_ON_MSG_VAR(cond, msg, ...) \
-    ARM_COMPUTE_EXIT_ON_MSG_VAR(cond, msg, __VA_ARGS__)
+#define ARM_COMPUTE_ERROR_ON_MSG_VAR(cond, msg, ...) ARM_COMPUTE_EXIT_ON_MSG_VAR(cond, msg, __VA_ARGS__)
 
 /** If the condition is true, the given message is printed and an exception is thrown.
  *
@@ -439,11 +438,11 @@ Status create_error_msg(ErrorCode error_code, const char *func, const char *file
 #define ARM_COMPUTE_ERROR_ON_LOC_MSG(cond, func, file, line, ...)     \
     do                                                                \
     {                                                                 \
-        if(cond)                                                      \
+        if (cond)                                                     \
         {                                                             \
             ARM_COMPUTE_ERROR_LOC_VAR(func, file, line, __VA_ARGS__); \
         }                                                             \
-    } while(false)
+    } while (false)
 
 /** If the condition is true, the given message is printed and an exception is thrown, otherwise value is returned
  *
@@ -464,8 +463,7 @@ Status create_error_msg(ErrorCode error_code, const char *func, const char *file
  *
  * @param[in] cond Condition to evaluate.
  */
-#define ARM_COMPUTE_ERROR_ON(cond) \
-    ARM_COMPUTE_ERROR_ON_MSG(cond, #cond)
+#define ARM_COMPUTE_ERROR_ON(cond) ARM_COMPUTE_ERROR_ON_MSG(cond, #cond)
 
 /** If the condition is true then an error message is printed and an exception thrown
  *
diff --git a/arm_compute/core/Helpers.h b/arm_compute/core/Helpers.h
index f19e1e12e0..960201510a 100644
--- a/arm_compute/core/Helpers.h
+++ b/arm_compute/core/Helpers.h
@@ -96,7 +96,6 @@ public:
     void reset(size_t dimension);
 
 private:
-
     /** Initialize a container iterator for the tensor with the specified number of dimensions, stride, buffer pointer and window.
      *
      * @param[in] num_dims The number of dimensions.
@@ -112,8 +111,7 @@ private:
     class Dimension
     {
     public:
-        constexpr Dimension()
-            : _dim_start(0), _stride(0)
+        constexpr Dimension() : _dim_start(0), _stride(0)
         {
         }
 
@@ -133,7 +131,7 @@ private:
  * @param[in,out] iterators       Tensor iterators which will be updated by this function before calling lambda_function.
  */
 template <typename L, typename... Ts>
-inline void execute_window_loop(const Window &w, L &&lambda_function, Ts &&... iterators);
+inline void execute_window_loop(const Window &w, L &&lambda_function, Ts &&...iterators);
 
 /** Permutes given Dimensions according to a permutation vector
  *
@@ -146,7 +144,7 @@ template <typename T>
 inline void permute(Dimensions<T> &dimensions, const PermutationVector &perm)
 {
     auto dimensions_copy = utility::make_array<Dimensions<T>::num_max_dimensions>(dimensions.begin(), dimensions.end());
-    for(unsigned int i = 0; i < perm.num_dimensions(); ++i)
+    for (unsigned int i = 0; i < perm.num_dimensions(); ++i)
     {
         T dimension_val = (perm[i] < dimensions.num_dimensions()) ? dimensions_copy[perm[i]] : 0;
         dimensions.set(i, dimension_val);
@@ -163,7 +161,7 @@ inline void permute(Dimensions<T> &dimensions, const PermutationVector &perm)
 inline void permute(TensorShape &shape, const PermutationVector &perm)
 {
     TensorShape shape_copy = shape;
-    for(unsigned int i = 0; i < perm.num_dimensions(); ++i)
+    for (unsigned int i = 0; i < perm.num_dimensions(); ++i)
     {
         size_t dimension_val = (perm[i] < shape.num_dimensions()) ? shape_copy[perm[i]] : 1;
         shape.set(i, dimension_val, false, false); // Avoid changes in _num_dimension
@@ -180,8 +178,11 @@ inline void permute(TensorShape &shape, const PermutationVector &perm)
  *
  * @return The corresponding valid region
  */
-ValidRegion calculate_valid_region_scale(const ITensorInfo &src_info, const TensorShape &dst_shape,
-                                         InterpolationPolicy interpolate_policy, SamplingPolicy sampling_policy, bool border_undefined);
+ValidRegion calculate_valid_region_scale(const ITensorInfo  &src_info,
+                                         const TensorShape  &dst_shape,
+                                         InterpolationPolicy interpolate_policy,
+                                         SamplingPolicy      sampling_policy,
+                                         bool                border_undefined);
 
 /** Convert a linear index into n-dimensional coordinates.
  *
@@ -224,7 +225,8 @@ const std::map<DataLayout, std::vector<DataLayoutDimension>> &get_layout_map();
  *
  * @return The int conversion of the requested data layout index.
  */
-inline size_t get_data_layout_dimension_index(const DataLayout &data_layout, const DataLayoutDimension &data_layout_dimension);
+inline size_t get_data_layout_dimension_index(const DataLayout          &data_layout,
+                                              const DataLayoutDimension &data_layout_dimension);
 
 /** Get the DataLayoutDimension of a given index and layout.
  *
@@ -245,10 +247,17 @@ inline DataLayoutDimension get_index_data_layout_dimension(const DataLayout &dat
  *
  * @return the number of output tiles along the x and y directions of size "output_tile_size"
  */
-inline Size2D compute_winograd_convolution_tiles(const Size2D &in_dims, const Size2D &kernel_size, const Size2D &output_tile_size, const PadStrideInfo &conv_info)
+inline Size2D compute_winograd_convolution_tiles(const Size2D        &in_dims,
+                                                 const Size2D        &kernel_size,
+                                                 const Size2D        &output_tile_size,
+                                                 const PadStrideInfo &conv_info)
 {
-    int num_tiles_x = std::ceil((in_dims.width - (kernel_size.width - 1) + conv_info.pad_left() + conv_info.pad_right()) / static_cast<float>(output_tile_size.width));
-    int num_tiles_y = std::ceil((in_dims.height - (kernel_size.height - 1) + conv_info.pad_top() + conv_info.pad_bottom()) / static_cast<float>(output_tile_size.height));
+    int num_tiles_x =
+        std::ceil((in_dims.width - (kernel_size.width - 1) + conv_info.pad_left() + conv_info.pad_right()) /
+                  static_cast<float>(output_tile_size.width));
+    int num_tiles_y =
+        std::ceil((in_dims.height - (kernel_size.height - 1) + conv_info.pad_top() + conv_info.pad_bottom()) /
+                  static_cast<float>(output_tile_size.height));
 
     // Clamp in case we provide paddings but we have 1D convolution
     num_tiles_x = std::min(num_tiles_x, static_cast<int>(in_dims.width));
@@ -277,7 +286,7 @@ inline T wrap_around(T x, T m)
  */
 inline Coordinates &convert_negative_axis(Coordinates &coords, int max_value)
 {
-    for(unsigned int i = 0; i < coords.num_dimensions(); ++i)
+    for (unsigned int i = 0; i < coords.num_dimensions(); ++i)
     {
         coords[i] = wrap_around(coords[i], max_value);
     }
diff --git a/arm_compute/core/Helpers.inl b/arm_compute/core/Helpers.inl
index ff902bba20..60a21e9418 100644
--- a/arm_compute/core/Helpers.inl
+++ b/arm_compute/core/Helpers.inl
@@ -32,12 +32,9 @@ template <size_t dimension>
 struct IncrementIterators
 {
     template <typename T, typename... Ts>
-    static void unroll(T &&it, Ts &&... iterators)
+    static void unroll(T &&it, Ts &&...iterators)
     {
-        auto increment = [](T && it)
-        {
-            it.increment(dimension);
-        };
+        auto increment = [](T &&it) { it.increment(dimension); };
         utility::for_each(increment, std::forward<T>(it), std::forward<Ts>(iterators)...);
     }
     static void unroll()
@@ -50,14 +47,14 @@ template <size_t dim>
 struct ForEachDimension
 {
     template <typename L, typename... Ts>
-    static void unroll(const Window &w, Coordinates &id, L &&lambda_function, Ts &&... iterators)
+    static void unroll(const Window &w, Coordinates &id, L &&lambda_function, Ts &&...iterators)
     {
         const auto &d = w[dim - 1];
 
-        for(auto v = d.start(); v < d.end(); v += d.step(), IncrementIterators < dim - 1 >::unroll(iterators...))
+        for (auto v = d.start(); v < d.end(); v += d.step(), IncrementIterators<dim - 1>::unroll(iterators...))
         {
             id.set(dim - 1, v);
-            ForEachDimension < dim - 1 >::unroll(w, id, lambda_function, iterators...);
+            ForEachDimension<dim - 1>::unroll(w, id, lambda_function, iterators...);
         }
     }
 };
@@ -66,7 +63,7 @@ template <>
 struct ForEachDimension<0>
 {
     template <typename L, typename... Ts>
-    static void unroll(const Window &w, Coordinates &id, L &&lambda_function, Ts &&... iterators)
+    static void unroll(const Window &w, Coordinates &id, L &&lambda_function, Ts &&...iterators)
     {
         ARM_COMPUTE_UNUSED(w, iterators...);
         lambda_function(id);
@@ -74,31 +71,31 @@ struct ForEachDimension<0>
 };
 
 template <typename L, typename... Ts>
-inline void execute_window_loop(const Window &w, L &&lambda_function, Ts &&... iterators)
+inline void execute_window_loop(const Window &w, L &&lambda_function, Ts &&...iterators)
 {
     w.validate();
 
-    for(unsigned int i = 0; i < Coordinates::num_max_dimensions; ++i)
+    for (unsigned int i = 0; i < Coordinates::num_max_dimensions; ++i)
     {
         ARM_COMPUTE_ERROR_ON(w[i].step() == 0);
     }
 
     Coordinates id;
-    ForEachDimension<Coordinates::num_max_dimensions>::unroll(w, id, std::forward<L>(lambda_function), std::forward<Ts>(iterators)...);
+    ForEachDimension<Coordinates::num_max_dimensions>::unroll(w, id, std::forward<L>(lambda_function),
+                                                              std::forward<Ts>(iterators)...);
 }
 
-inline constexpr Iterator::Iterator()
-    : _ptr(nullptr), _dims()
+inline constexpr Iterator::Iterator() : _ptr(nullptr), _dims()
 {
 }
 
-inline Iterator::Iterator(const ITensor *tensor, const Window &win)
-    : Iterator()
+inline Iterator::Iterator(const ITensor *tensor, const Window &win) : Iterator()
 {
     ARM_COMPUTE_ERROR_ON(tensor == nullptr);
     ARM_COMPUTE_ERROR_ON(tensor->info() == nullptr);
 
-    initialize(tensor->info()->num_dimensions(), tensor->info()->strides_in_bytes(), tensor->buffer(), tensor->info()->offset_first_element_in_bytes(), win);
+    initialize(tensor->info()->num_dimensions(), tensor->info()->strides_in_bytes(), tensor->buffer(),
+               tensor->info()->offset_first_element_in_bytes(), win);
 }
 
 inline Iterator::Iterator(size_t num_dims, const Strides &strides, uint8_t *buffer, size_t offset, const Window &win)
@@ -107,21 +104,22 @@ inline Iterator::Iterator(size_t num_dims, const Strides &strides, uint8_t *buff
     initialize(num_dims, strides, buffer, offset, win);
 }
 
-inline void Iterator::initialize(size_t num_dims, const Strides &strides, uint8_t *buffer, size_t offset, const Window &win)
+inline void
+Iterator::initialize(size_t num_dims, const Strides &strides, uint8_t *buffer, size_t offset, const Window &win)
 {
     ARM_COMPUTE_ERROR_ON(buffer == nullptr);
 
     _ptr = buffer + offset;
 
     //Initialize the stride for each dimension and calculate the position of the first element of the iteration:
-    for(unsigned int n = 0; n < num_dims; ++n)
+    for (unsigned int n = 0; n < num_dims; ++n)
     {
         _dims[n]._stride = win[n].step() * strides[n];
         std::get<0>(_dims)._dim_start += static_cast<size_t>(strides[n]) * win[n].start();
     }
 
     //Copy the starting point to all the dimensions:
-    for(unsigned int n = 1; n < Coordinates::num_max_dimensions; ++n)
+    for (unsigned int n = 1; n < Coordinates::num_max_dimensions; ++n)
     {
         _dims[n]._dim_start = std::get<0>(_dims)._dim_start;
     }
@@ -135,7 +133,7 @@ inline void Iterator::increment(const size_t dimension)
 
     _dims[dimension]._dim_start += _dims[dimension]._stride;
 
-    for(unsigned int n = 0; n < dimension; ++n)
+    for (unsigned int n = 0; n < dimension; ++n)
     {
         _dims[n]._dim_start = _dims[dimension]._dim_start;
     }
@@ -157,7 +155,7 @@ inline void Iterator::reset(const size_t dimension)
 
     _dims[dimension]._dim_start = _dims[dimension + 1]._dim_start;
 
-    for(unsigned int n = 0; n < dimension; ++n)
+    for (unsigned int n = 0; n < dimension; ++n)
     {
         _dims[n]._dim_start = _dims[dimension]._dim_start;
     }
@@ -170,9 +168,9 @@ inline Coordinates index2coords(const TensorShape &shape, int index)
     ARM_COMPUTE_ERROR_ON_MSG(index < 0 || index >= num_elements, "Index has to be in [0, num_elements]!");
     ARM_COMPUTE_ERROR_ON_MSG(num_elements == 0, "Cannot create coordinate from empty shape!");
 
-    Coordinates coord{ 0 };
+    Coordinates coord{0};
 
-    for(int d = shape.num_dimensions() - 1; d >= 0; --d)
+    for (int d = shape.num_dimensions() - 1; d >= 0; --d)
     {
         num_elements /= shape[d];
         coord.set(d, index / num_elements);
@@ -191,7 +189,7 @@ inline int coords2index(const TensorShape &shape, const Coordinates &coord)
     int index  = 0;
     int stride = 1;
 
-    for(unsigned int d = 0; d < coord.num_dimensions(); ++d)
+    for (unsigned int d = 0; d < coord.num_dimensions(); ++d)
     {
         index += coord[d] * stride;
         stride *= shape[d];
@@ -200,9 +198,11 @@ inline int coords2index(const TensorShape &shape, const Coordinates &coord)
     return index;
 }
 
-inline size_t get_data_layout_dimension_index(const DataLayout &data_layout, const DataLayoutDimension &data_layout_dimension)
+inline size_t get_data_layout_dimension_index(const DataLayout          &data_layout,
+                                              const DataLayoutDimension &data_layout_dimension)
 {
-    ARM_COMPUTE_ERROR_ON_MSG(data_layout == DataLayout::UNKNOWN, "Cannot retrieve the dimension index for an unknown layout!");
+    ARM_COMPUTE_ERROR_ON_MSG(data_layout == DataLayout::UNKNOWN,
+                             "Cannot retrieve the dimension index for an unknown layout!");
     const auto &dims = get_layout_map().at(data_layout);
     const auto &it   = std::find(dims.cbegin(), dims.cend(), data_layout_dimension);
     ARM_COMPUTE_ERROR_ON_MSG(it == dims.cend(), "Invalid dimension for the given layout.");
@@ -211,7 +211,8 @@ inline size_t get_data_layout_dimension_index(const DataLayout &data_layout, con
 
 inline DataLayoutDimension get_index_data_layout_dimension(const DataLayout &data_layout, const size_t index)
 {
-    ARM_COMPUTE_ERROR_ON_MSG(data_layout == DataLayout::UNKNOWN, "Cannot retrieve the layout dimension for an unknown layout!");
+    ARM_COMPUTE_ERROR_ON_MSG(data_layout == DataLayout::UNKNOWN,
+                             "Cannot retrieve the layout dimension for an unknown layout!");
     const auto &dims = get_layout_map().at(data_layout);
     ARM_COMPUTE_ERROR_ON_MSG(index >= dims.size(), "Invalid index for the given layout.");
     return dims[index];
diff --git a/arm_compute/core/IAccessWindow.h b/arm_compute/core/IAccessWindow.h
index 880f6d6b27..9c9fb90915 100644
--- a/arm_compute/core/IAccessWindow.h
+++ b/arm_compute/core/IAccessWindow.h
@@ -100,7 +100,10 @@ public:
      * @return a valid region.
      *
      */
-    virtual ValidRegion compute_valid_region(const Window &window, ValidRegion input_valid_region, bool border_undefined, BorderSize border_size) const = 0;
+    virtual ValidRegion compute_valid_region(const Window &window,
+                                             ValidRegion   input_valid_region,
+                                             bool          border_undefined,
+                                             BorderSize    border_size) const = 0;
 };
 
 /** Implementation of a rectangular access pattern. */
@@ -161,7 +164,10 @@ public:
      * @param[in] border_undefined   (Optional) Undefined borders are excluded from the valid region.
      * @param[in] border_size        (Optional) Size of the border around the XY-plane of the tensor.
      */
-    void set_valid_region(const Window &window, const ValidRegion &input_valid_region, bool border_undefined = false, const BorderSize &border_size = BorderSize(0));
+    void set_valid_region(const Window      &window,
+                          const ValidRegion &input_valid_region,
+                          bool               border_undefined = false,
+                          const BorderSize  &border_size      = BorderSize(0));
 
     /** Compute the valid region based on access pattern, valid region of the inputs and border mode.
      *
@@ -189,7 +195,10 @@ public:
      * @return a valid region.
      *
      */
-    ValidRegion compute_valid_region(const Window &window, ValidRegion input_valid_region, bool border_undefined, BorderSize border_size) const override;
+    ValidRegion compute_valid_region(const Window &window,
+                                     ValidRegion   input_valid_region,
+                                     bool          border_undefined,
+                                     BorderSize    border_size) const override;
 
     bool update_window_if_needed(Window &window) const override;
     bool update_padding_if_needed(const Window &window) override;
diff --git a/arm_compute/core/IArray.h b/arm_compute/core/IArray.h
index 6edbc1d5d5..3471fc9a86 100644
--- a/arm_compute/core/IArray.h
+++ b/arm_compute/core/IArray.h
@@ -25,6 +25,7 @@
 #define ARM_COMPUTE_IARRAY_H
 
 #include "arm_compute/core/Error.h"
+
 #include <cstddef>
 #include <cstdint>
 
@@ -36,14 +37,12 @@ class IArray
 {
 public:
     /** Default constructor */
-    IArray()
-        : _num_values(0), _max_size(0) {};
+    IArray() : _num_values(0), _max_size(0){};
     /** Constructor: initializes an array which can contain up to max_num_points values
      *
      * @param[in] max_num_values Maximum number of values the array will be able to stored
      */
-    IArray(size_t max_num_values)
-        : _num_values(0), _max_size(max_num_values)
+    IArray(size_t max_num_values) : _num_values(0), _max_size(max_num_values)
     {
     }
     /** Maximum number of values which can be stored in this array
@@ -73,7 +72,7 @@ public:
     bool push_back(const T &val)
     {
         ARM_COMPUTE_ERROR_ON(0 == _max_size);
-        if(_num_values >= max_num_values())
+        if (_num_values >= max_num_values())
         {
             _num_values = max_num_values() + 1;
             return false;
@@ -142,5 +141,5 @@ using IInt16Array = IArray<int16_t>;
 using IInt32Array = IArray<int32_t>;
 /** Interface for Array of floats. */
 using IFloatArray = IArray<float>;
-}
+} // namespace arm_compute
 #endif /* ARM_COMPUTE_IARRAY_H */
diff --git a/arm_compute/core/IKernel.h b/arm_compute/core/IKernel.h
index 98fd18cc91..403a2c724e 100644
--- a/arm_compute/core/IKernel.h
+++ b/arm_compute/core/IKernel.h
@@ -73,5 +73,5 @@ protected:
 private:
     Window _window;
 };
-}
+} // namespace arm_compute
 #endif /*ARM_COMPUTE_IKERNEL_H */
diff --git a/arm_compute/core/ITensor.h b/arm_compute/core/ITensor.h
index 32b93576bd..aad8313261 100644
--- a/arm_compute/core/ITensor.h
+++ b/arm_compute/core/ITensor.h
@@ -94,9 +94,9 @@ public:
     void mark_as_used() const;
 
 private:
-    mutable bool _is_used = { true }; /**< Flag that marks if the tensor is used or not */
+    mutable bool _is_used = {true}; /**< Flag that marks if the tensor is used or not */
 };
 
 using IImage = ITensor;
-}
+} // namespace arm_compute
 #endif /*ARM_COMPUTE_ITENSOR_H */
diff --git a/arm_compute/core/ITensorInfo.h b/arm_compute/core/ITensorInfo.h
index e7c0b182c6..c42f4b57a1 100644
--- a/arm_compute/core/ITensorInfo.h
+++ b/arm_compute/core/ITensorInfo.h
@@ -29,6 +29,7 @@
 #include "arm_compute/core/TensorShape.h"
 #include "arm_compute/core/Types.h"
 #include "arm_compute/core/utils/misc/Utility.h"
+
 #include "support/ICloneable.h"
 
 #include <cstddef>
@@ -328,23 +329,23 @@ public:
      * not broadcast compatible.
      */
     template <typename... Infos>
-    static std::pair<TensorShape, ValidRegion> broadcast_shape_and_valid_region(const Infos &... infos)
+    static std::pair<TensorShape, ValidRegion> broadcast_shape_and_valid_region(const Infos &...infos)
     {
         TensorShape bc_shape = TensorShape::broadcast_shape(infos.tensor_shape()...);
-        ValidRegion bc_valid_region{ Coordinates(), bc_shape };
+        ValidRegion bc_valid_region{Coordinates(), bc_shape};
 
-        auto broadcast_valid_region = [&bc_valid_region](const ITensorInfo & info)
+        auto broadcast_valid_region = [&bc_valid_region](const ITensorInfo &info)
         {
-            if(info.num_dimensions() != 0)
+            if (info.num_dimensions() != 0)
             {
-                for(size_t d = 0; d < bc_valid_region.shape.num_dimensions(); ++d)
+                for (size_t d = 0; d < bc_valid_region.shape.num_dimensions(); ++d)
                 {
                     const bool is_broadcast = (info.tensor_shape()[d] == 1);
 
                     const int    anchor_max = std::max(bc_valid_region.anchor[d], info.valid_region().anchor[d]);
                     const size_t valid_min  = std::min(bc_valid_region.shape[d], info.valid_region().shape[d]);
 
-                    if(!is_broadcast || (valid_min == 0))
+                    if (!is_broadcast || (valid_min == 0))
                     {
                         bc_valid_region.anchor.set(d, anchor_max);
                         bc_valid_region.shape.set(d, valid_min);
diff --git a/arm_compute/core/ITensorPack.h b/arm_compute/core/ITensorPack.h
index 17b7241862..f456c50769 100644
--- a/arm_compute/core/ITensorPack.h
+++ b/arm_compute/core/ITensorPack.h
@@ -42,18 +42,16 @@ public:
     struct PackElement
     {
         PackElement() = default;
-        PackElement(int id, ITensor *tensor)
-            : id(id), tensor(tensor), ctensor(nullptr)
+        PackElement(int id, ITensor *tensor) : id(id), tensor(tensor), ctensor(nullptr)
         {
         }
-        PackElement(int id, const ITensor *ctensor)
-            : id(id), tensor(nullptr), ctensor(ctensor)
+        PackElement(int id, const ITensor *ctensor) : id(id), tensor(nullptr), ctensor(ctensor)
         {
         }
 
-        int            id{ -1 };
-        ITensor       *tensor{ nullptr };
-        const ITensor *ctensor{ nullptr };
+        int            id{-1};
+        ITensor       *tensor{nullptr};
+        const ITensor *ctensor{nullptr};
     };
 
 public:
diff --git a/arm_compute/core/KernelDescriptors.h b/arm_compute/core/KernelDescriptors.h
index 2bf5dee18c..168a06a55c 100644
--- a/arm_compute/core/KernelDescriptors.h
+++ b/arm_compute/core/KernelDescriptors.h
@@ -33,24 +33,24 @@ namespace arm_compute
 /** Descriptor for FFT scale kernels */
 struct FFTScaleKernelInfo
 {
-    float scale{ 0.f };      /**< Axis to perform the kernel on. */
-    bool  conjugate{ true }; /**< Flag to conjugate the output/ */
+    float scale{0.f};      /**< Axis to perform the kernel on. */
+    bool  conjugate{true}; /**< Flag to conjugate the output/ */
 };
 
 /** Descriptor for FFT digit reverse kernels */
 struct FFTDigitReverseKernelInfo
 {
-    unsigned int axis{ 0 };          /**< Axis to perform the kernel on. */
-    bool         conjugate{ false }; /**< Flag to conjugate the output/ */
+    unsigned int axis{0};          /**< Axis to perform the kernel on. */
+    bool         conjugate{false}; /**< Flag to conjugate the output/ */
 };
 
 /** Descriptor used by the FFT core kernels */
 struct FFTRadixStageKernelInfo
 {
-    unsigned int axis{ 0 };               /**< Axis to run the kernel on. */
-    unsigned int radix{ 0 };              /**< Radix to use. */
-    unsigned int Nx{ 0 };                 /**< Nx coefficient. */
-    bool         is_first_stage{ false }; /**< Flags if the FFT kernels is the first stage of a decomposed FFT. */
+    unsigned int axis{0};               /**< Axis to run the kernel on. */
+    unsigned int radix{0};              /**< Radix to use. */
+    unsigned int Nx{0};                 /**< Nx coefficient. */
+    bool         is_first_stage{false}; /**< Flags if the FFT kernels is the first stage of a decomposed FFT. */
 };
 
 class ITensorInfo;
@@ -58,89 +58,102 @@ class ITensorInfo;
 struct GEMMKernelInfo
 {
     GEMMKernelInfo() = default;
-    GEMMKernelInfo(
-        unsigned int        im,
-        unsigned int        in,
-        unsigned int        ik,
-        unsigned int        idepth_output_gemm3d,
-        bool                ireinterpret_input_as_3d,
-        bool                ibroadcast_bias,
-        bool                ifp_mixed_precision,
-        bool                ihas_pad_y,
-        ActivationLayerInfo iactivation_info,
-        int                 inmult_transpose1xW_width,
-        int                 imult_interleave4x4_height,
-        GEMMLHSMatrixInfo   ilhs_info,
-        GEMMRHSMatrixInfo   irhs_info,
-        int32_t             ina_offset,
-        int32_t             inb_offset)
-        : m(im), n(in), k(ik), depth_output_gemm3d(idepth_output_gemm3d), reinterpret_input_as_3d(ireinterpret_input_as_3d), broadcast_bias(ibroadcast_bias), fp_mixed_precision(ifp_mixed_precision),
-          has_pad_y(ihas_pad_y), activation_info(iactivation_info), mult_transpose1xW_width(inmult_transpose1xW_width), mult_interleave4x4_height(imult_interleave4x4_height), lhs_info(ilhs_info),
-          rhs_info(irhs_info), a_offset(ina_offset), b_offset(inb_offset)
+    GEMMKernelInfo(unsigned int        im,
+                   unsigned int        in,
+                   unsigned int        ik,
+                   unsigned int        idepth_output_gemm3d,
+                   bool                ireinterpret_input_as_3d,
+                   bool                ibroadcast_bias,
+                   bool                ifp_mixed_precision,
+                   bool                ihas_pad_y,
+                   ActivationLayerInfo iactivation_info,
+                   int                 inmult_transpose1xW_width,
+                   int                 imult_interleave4x4_height,
+                   GEMMLHSMatrixInfo   ilhs_info,
+                   GEMMRHSMatrixInfo   irhs_info,
+                   int32_t             ina_offset,
+                   int32_t             inb_offset)
+        : m(im),
+          n(in),
+          k(ik),
+          depth_output_gemm3d(idepth_output_gemm3d),
+          reinterpret_input_as_3d(ireinterpret_input_as_3d),
+          broadcast_bias(ibroadcast_bias),
+          fp_mixed_precision(ifp_mixed_precision),
+          has_pad_y(ihas_pad_y),
+          activation_info(iactivation_info),
+          mult_transpose1xW_width(inmult_transpose1xW_width),
+          mult_interleave4x4_height(imult_interleave4x4_height),
+          lhs_info(ilhs_info),
+          rhs_info(irhs_info),
+          a_offset(ina_offset),
+          b_offset(inb_offset)
     {
     }
 
-    unsigned int            m{ 0 };                           /**< Number of LHS rows*/
-    unsigned int            n{ 0 };                           /**< Number of RHS columns*/
-    unsigned int            k{ 0 };                           /**< Number of LHS columns or RHS rows */
-    unsigned int            depth_output_gemm3d{ 0 };         /**< Depth of the output tensor in case is reinterpreted as 3D */
-    bool                    reinterpret_input_as_3d{ false }; /**< Flag used to reinterpret the input as 3D */
-    bool                    broadcast_bias{ false };          /**< Flag used to broadcast the bias addition */
-    bool                    fp_mixed_precision{ false };      /**< Flag used to indicate wider accumulators (32 bit instead of 16 for FP16). */
-    bool                    has_pad_y{ false };               /**< Flag used to indicate if the input/output tensors have internal pad on the y direction */
-    ActivationLayerInfo     activation_info{};                /**< Activation function to perform after the matrix multiplication */
-    int                     mult_transpose1xW_width{ 1 };     /**< Multiplication factor for the width of the 1xW transposed block */
-    int                     mult_interleave4x4_height{ 1 };   /**< Multiplication factor for the height of the 4x4 interleaved block */
-    GEMMLHSMatrixInfo       lhs_info{};                       /**< LHS matrix information used to retrieve the number of rows processed by each thread */
-    GEMMRHSMatrixInfo       rhs_info{};                       /**< RHS matrix information used for reshaping the RHS matrix */
-    int32_t                 a_offset{ 0 };                    /**< Offset to be added to each element of the matrix A */
-    int32_t                 b_offset{ 0 };                    /**< Offset to be added to each element of the matrix B */
-    GEMMLowpOutputStageInfo output_stage{};                   /**< GEMMLowp output stage information */
+    unsigned int m{0};                           /**< Number of LHS rows*/
+    unsigned int n{0};                           /**< Number of RHS columns*/
+    unsigned int k{0};                           /**< Number of LHS columns or RHS rows */
+    unsigned int depth_output_gemm3d{0};         /**< Depth of the output tensor in case is reinterpreted as 3D */
+    bool         reinterpret_input_as_3d{false}; /**< Flag used to reinterpret the input as 3D */
+    bool         broadcast_bias{false};          /**< Flag used to broadcast the bias addition */
+    bool fp_mixed_precision{false}; /**< Flag used to indicate wider accumulators (32 bit instead of 16 for FP16). */
+    bool has_pad_y{
+        false}; /**< Flag used to indicate if the input/output tensors have internal pad on the y direction */
+    ActivationLayerInfo activation_info{}; /**< Activation function to perform after the matrix multiplication */
+    int mult_transpose1xW_width{1};        /**< Multiplication factor for the width of the 1xW transposed block */
+    int mult_interleave4x4_height{1};      /**< Multiplication factor for the height of the 4x4 interleaved block */
+    GEMMLHSMatrixInfo
+        lhs_info{}; /**< LHS matrix information used to retrieve the number of rows processed by each thread */
+    GEMMRHSMatrixInfo       rhs_info{};     /**< RHS matrix information used for reshaping the RHS matrix */
+    int32_t                 a_offset{0};    /**< Offset to be added to each element of the matrix A */
+    int32_t                 b_offset{0};    /**< Offset to be added to each element of the matrix B */
+    GEMMLowpOutputStageInfo output_stage{}; /**< GEMMLowp output stage information */
 };
 
 /** Compute descriptor used by the depthwise convolution native kernel */
 struct DWCComputeKernelInfo
 {
-    unsigned int n0{ 1 };                             /**< Number of columns processed by each thread */
-    unsigned int m0{ 1 };                             /**< Number of rows processed by each thread */
-    bool         export_input_to_cl_image{ false };   /**< Export input to cl_image */
-    bool         export_weights_to_cl_image{ false }; /**< Export the weights to cl_image */
+    unsigned int n0{1};                             /**< Number of columns processed by each thread */
+    unsigned int m0{1};                             /**< Number of rows processed by each thread */
+    bool         export_input_to_cl_image{false};   /**< Export input to cl_image */
+    bool         export_weights_to_cl_image{false}; /**< Export the weights to cl_image */
 };
 
 /** Compute descriptor used by the direct convolution kernel */
 struct DirectConvComputeKernelInfo
 {
-    int32_t m0{ 1 };                             /**< Number of rows to be processed by the kernel */
-    int32_t n0{ 1 };                             /**< Number of columns to be processed by the kernel */
-    int32_t k0{ 1 };                             /**< Number of partial accumulations to be processed in a single iteration by the kernel */
-    bool    export_weights_to_cl_image{ false }; /**< Flag to export the weights to cl_image */
-    bool    export_output_to_cl_image{ false };  /**< Flag to export the output to cl_image */
-    bool    export_input_to_cl_image{ false };   /**< Flag to export the input to cl_image */
+    int32_t m0{1}; /**< Number of rows to be processed by the kernel */
+    int32_t n0{1}; /**< Number of columns to be processed by the kernel */
+    int32_t k0{1}; /**< Number of partial accumulations to be processed in a single iteration by the kernel */
+    bool    export_weights_to_cl_image{false}; /**< Flag to export the weights to cl_image */
+    bool    export_output_to_cl_image{false};  /**< Flag to export the output to cl_image */
+    bool    export_input_to_cl_image{false};   /**< Flag to export the input to cl_image */
 };
 
 /** Descriptor used by the softmax kernels */
 struct SoftmaxKernelInfo
 {
-    float    beta{ 1.f };                          /**< A scaling factor for the exponent with default value 1.0 */
-    bool     is_log{ false };                      /**< Flag used to perform Log Softmax operation */
-    DataType input_data_type{ DataType::UNKNOWN }; /**< Input tensor data type */
-    int32_t  axis{ 0 };                            /**< The dimension in which to apply softmax. */
+    float    beta{1.f};                          /**< A scaling factor for the exponent with default value 1.0 */
+    bool     is_log{false};                      /**< Flag used to perform Log Softmax operation */
+    DataType input_data_type{DataType::UNKNOWN}; /**< Input tensor data type */
+    int32_t  axis{0};                            /**< The dimension in which to apply softmax. */
 };
 
 /** Descriptor used by the direct convolution layer output stage kernels */
 struct DirectConvolutionLayerOutputStageKernelInfo
 {
-    int32_t  result_fixedpoint_multiplier{ 0 };     /**< Result output stage multiplier used for quantizing */
-    int32_t  result_shift{ 0 };                     /**< Result output stage shift used for quantizing */
-    int32_t  result_offset_after_shift{ 0 };        /**< Result offset used for quantizing */
-    DataType output_data_type{ DataType::UNKNOWN }; /**< Output tensor data type to use if the output is not initialized */
+    int32_t  result_fixedpoint_multiplier{0}; /**< Result output stage multiplier used for quantizing */
+    int32_t  result_shift{0};                 /**< Result output stage shift used for quantizing */
+    int32_t  result_offset_after_shift{0};    /**< Result offset used for quantizing */
+    DataType output_data_type{
+        DataType::UNKNOWN}; /**< Output tensor data type to use if the output is not initialized */
 };
 
 struct InstanceNormalizationLayerKernelInfo
 {
     /** Default constructor */
-    InstanceNormalizationLayerKernelInfo()
-        : InstanceNormalizationLayerKernelInfo(1.f, 0.f, 1e-12, true)
+    InstanceNormalizationLayerKernelInfo() : InstanceNormalizationLayerKernelInfo(1.f, 0.f, 1e-12, true)
     {
     }
     /** Constructor
@@ -177,10 +190,10 @@ struct GEMMLowpReductionKernelInfo
     {
     }
 
-    int32_t k{ 0 };                 /**< Number of matrix columns/rows */
-    bool    is_reshaped{ false };   /**< True if the input tensor has been reshaped */
-    int32_t scalar{ 0 };            /**< Scalar value to multiply each reduced column/row by */
-    bool    mul_by_scalar{ false }; /**< True if each column/row reduction has to be multiplied by a scalar value */
+    int32_t k{0};                 /**< Number of matrix columns/rows */
+    bool    is_reshaped{false};   /**< True if the input tensor has been reshaped */
+    int32_t scalar{0};            /**< Scalar value to multiply each reduced column/row by */
+    bool    mul_by_scalar{false}; /**< True if each column/row reduction has to be multiplied by a scalar value */
 };
 
 struct ScaleKernelInfo
@@ -202,13 +215,13 @@ struct ScaleKernelInfo
                     bool                use_padding           = true,
                     bool                align_corners         = false,
                     DataLayout          data_layout           = DataLayout::UNKNOWN) noexcept
-        : interpolation_policy{ interpolation_policy },
-    border_mode{ border_mode },
-    constant_border_value{ constant_border_value },
-    sampling_policy{ sampling_policy },
-    use_padding{ use_padding },
-    align_corners{ align_corners },
-    data_layout{ data_layout }
+        : interpolation_policy{interpolation_policy},
+          border_mode{border_mode},
+          constant_border_value{constant_border_value},
+          sampling_policy{sampling_policy},
+          use_padding{use_padding},
+          align_corners{align_corners},
+          data_layout{data_layout}
     {
     }
 
@@ -224,16 +237,17 @@ struct ScaleKernelInfo
 struct MatMulKernelInfo
 {
     MatMulKernelInfo() = default;
-    MatMulKernelInfo(bool adj_lhs, bool adj_rhs, int m0 = 1, int n0 = 1, int k0 = 1, bool export_rhs_to_cl_image = false)
-        : adj_lhs{ adj_lhs }, adj_rhs{ adj_rhs }, m0{ m0 }, n0{ n0 }, k0{ k0 }, export_rhs_to_cl_image{ export_rhs_to_cl_image }
+    MatMulKernelInfo(
+        bool adj_lhs, bool adj_rhs, int m0 = 1, int n0 = 1, int k0 = 1, bool export_rhs_to_cl_image = false)
+        : adj_lhs{adj_lhs}, adj_rhs{adj_rhs}, m0{m0}, n0{n0}, k0{k0}, export_rhs_to_cl_image{export_rhs_to_cl_image}
     {
     }
-    bool adj_lhs{ false };                /**< Get Adjoint LHS flag value */
-    bool adj_rhs{ false };                /**< Get Adjoint RHS flag value */
-    int  m0{ 1 };                         /**< Number of output rows processed by each work-item*/
-    int  n0{ 1 };                         /**< Number of output columns processed by each work-item*/
-    int  k0{ 1 };                         /**< Number of inner accumulations */
-    bool export_rhs_to_cl_image{ false }; /**< Flag to know whether the RHS tensor should be exported to cl_image*/
+    bool adj_lhs{false};                /**< Get Adjoint LHS flag value */
+    bool adj_rhs{false};                /**< Get Adjoint RHS flag value */
+    int  m0{1};                         /**< Number of output rows processed by each work-item*/
+    int  n0{1};                         /**< Number of output columns processed by each work-item*/
+    int  k0{1};                         /**< Number of inner accumulations */
+    bool export_rhs_to_cl_image{false}; /**< Flag to know whether the RHS tensor should be exported to cl_image*/
 };
 } // namespace arm_compute
 #endif // ACL_ARM_COMPUTE_CORE_KERNELDESCRIPTORS_H
diff --git a/arm_compute/core/Log.h b/arm_compute/core/Log.h
index bc0ecb802e..03b861f765 100644
--- a/arm_compute/core/Log.h
+++ b/arm_compute/core/Log.h
@@ -34,11 +34,11 @@
 #define ARM_COMPUTE_CREATE_DEFAULT_CORE_LOGGER()                                   \
     do                                                                             \
     {                                                                              \
-        if(arm_compute::logging::LoggerRegistry::get().logger("CORE") == nullptr)  \
+        if (arm_compute::logging::LoggerRegistry::get().logger("CORE") == nullptr) \
         {                                                                          \
             arm_compute::logging::LoggerRegistry::get().create_reserved_loggers(); \
         }                                                                          \
-    } while(false)
+    } while (false)
 #else /* ARM_COMPUTE_LOGGING_ENABLED */
 #define ARM_COMPUTE_CREATE_DEFAULT_CORE_LOGGER()
 #endif /* ARM_COMPUTE_LOGGING_ENABLED */
@@ -53,7 +53,7 @@
     {                                                \
         ARM_COMPUTE_CREATE_DEFAULT_CORE_LOGGER();    \
         ARM_COMPUTE_LOG_MSG("CORE", log_level, msg); \
-    } while(false)
+    } while (false)
 
 /** Log a message with format to the core system logger
  *
@@ -66,7 +66,7 @@
     {                                                                         \
         ARM_COMPUTE_CREATE_DEFAULT_CORE_LOGGER();                             \
         ARM_COMPUTE_LOG_MSG_WITH_FORMAT("CORE", log_level, fmt, __VA_ARGS__); \
-    } while(false)
+    } while (false)
 
 /** Log a stream to the core system logger
  *
@@ -78,7 +78,7 @@
     {                                                  \
         ARM_COMPUTE_CREATE_DEFAULT_CORE_LOGGER();      \
         ARM_COMPUTE_LOG_STREAM("CORE", log_level, ss); \
-    } while(false)
+    } while (false)
 
 /** Log information level message to the core system logger
  *
@@ -89,7 +89,7 @@
     {                                                                        \
         ARM_COMPUTE_CREATE_DEFAULT_CORE_LOGGER();                            \
         ARM_COMPUTE_LOG_MSG_CORE(arm_compute::logging::LogLevel::INFO, msg); \
-    } while(false)
+    } while (false)
 
 /** Log information level formatted message to the core system logger
  *
@@ -101,7 +101,7 @@
     {                                                                                                  \
         ARM_COMPUTE_CREATE_DEFAULT_CORE_LOGGER();                                                      \
         ARM_COMPUTE_LOG_MSG_WITH_FORMAT_CORE(arm_compute::logging::LogLevel::INFO, #fmt, __VA_ARGS__); \
-    } while(false)
+    } while (false)
 
 /** Log information level stream to the core system logger
  *
@@ -112,6 +112,6 @@
     {                                                                          \
         ARM_COMPUTE_CREATE_DEFAULT_CORE_LOGGER();                              \
         ARM_COMPUTE_LOG_STREAM_CORE(arm_compute::logging::LogLevel::INFO, ss); \
-    } while(false)
+    } while (false)
 
 #endif /* ARM_COMPUTE_LOGGING_MACROS_H */
diff --git a/arm_compute/core/PixelValue.h b/arm_compute/core/PixelValue.h
index 790f58a793..0b4df4f2e2 100644
--- a/arm_compute/core/PixelValue.h
+++ b/arm_compute/core/PixelValue.h
@@ -24,8 +24,8 @@
 #ifndef ARM_COMPUTE_PIXELVALUE_H
 #define ARM_COMPUTE_PIXELVALUE_H
 
-#include "arm_compute/core/Types.h"
 #include "arm_compute/core/QuantizationInfo.h"
+#include "arm_compute/core/Types.h"
 
 #include <cstdint>
 
@@ -36,11 +36,7 @@ class PixelValue
 {
 public:
     /** Default constructor: value initialized to 0 */
-    PixelValue() noexcept
-        : value
-    {
-        int64_t(0)
-    }
+    PixelValue() noexcept : value{int64_t(0)}
     {
     }
     /** Initialize the union with a pixel value of chosen datatype
@@ -49,10 +45,9 @@ public:
      * @param[in] datatype DataType that @p v have to be stored
      * @param[in] qinfo    (Optional) QuantizationInfo to apply in case of quantized data types to @p v
      */
-    PixelValue(double v, DataType datatype, QuantizationInfo qinfo = QuantizationInfo())
-        : PixelValue()
+    PixelValue(double v, DataType datatype, QuantizationInfo qinfo = QuantizationInfo()) : PixelValue()
     {
-        switch(datatype)
+        switch (datatype)
         {
             case DataType::U8:
                 value.u8 = static_cast<uint8_t>(v);
@@ -112,8 +107,7 @@ public:
      *
      * @param[in] v S8 value.
      */
-    PixelValue(int8_t v)
-        : PixelValue()
+    PixelValue(int8_t v) : PixelValue()
     {
         value.s8 = v;
     }
@@ -121,8 +115,7 @@ public:
      *
      * @param[in] v U8 value.
      */
-    PixelValue(uint8_t v)
-        : PixelValue()
+    PixelValue(uint8_t v) : PixelValue()
     {
         value.u8 = v;
     }
@@ -130,8 +123,7 @@ public:
      *
      * @param[in] v U16 value.
      */
-    PixelValue(uint16_t v)
-        : PixelValue()
+    PixelValue(uint16_t v) : PixelValue()
     {
         value.u16 = v;
     }
@@ -139,8 +131,7 @@ public:
      *
      * @param[in] v S16 value.
      */
-    PixelValue(int16_t v)
-        : PixelValue()
+    PixelValue(int16_t v) : PixelValue()
     {
         value.s16 = v;
     }
@@ -148,8 +139,7 @@ public:
      *
      * @param[in] v U32 value.
      */
-    PixelValue(uint32_t v)
-        : PixelValue()
+    PixelValue(uint32_t v) : PixelValue()
     {
         value.u32 = v;
     }
@@ -157,8 +147,7 @@ public:
      *
      * @param[in] v S32 value.
      */
-    PixelValue(int32_t v)
-        : PixelValue()
+    PixelValue(int32_t v) : PixelValue()
     {
         value.s32 = v;
     }
@@ -167,8 +156,7 @@ public:
      *
      * @param[in] v U64 value.
      */
-    PixelValue(uint64_t v)
-        : PixelValue()
+    PixelValue(uint64_t v) : PixelValue()
     {
         value.u64 = v;
     }
@@ -176,8 +164,7 @@ public:
      *
      * @param[in] v S64 value.
      */
-    PixelValue(int64_t v)
-        : PixelValue()
+    PixelValue(int64_t v) : PixelValue()
     {
         value.s64 = v;
     }
@@ -185,8 +172,7 @@ public:
      *
      * @param[in] v F16 value.
      */
-    PixelValue(bfloat16 v)
-        : PixelValue()
+    PixelValue(bfloat16 v) : PixelValue()
     {
         value.bf16 = v;
     }
@@ -194,8 +180,7 @@ public:
      *
      * @param[in] v F16 value.
      */
-    PixelValue(half v)
-        : PixelValue()
+    PixelValue(half v) : PixelValue()
     {
         value.f16 = v;
     }
@@ -203,8 +188,7 @@ public:
      *
      * @param[in] v F32 value.
      */
-    PixelValue(float v)
-        : PixelValue()
+    PixelValue(float v) : PixelValue()
     {
         value.f32 = v;
     }
@@ -212,8 +196,7 @@ public:
      *
      * @param[in] v F64 value.
      */
-    PixelValue(double v)
-        : PixelValue()
+    PixelValue(double v) : PixelValue()
     {
         value.f64 = v;
     }
@@ -221,23 +204,23 @@ public:
      * Use the field corresponding to the image format
      */
     union
-        {
-            uint64_t u64;     /**< Single channel U64 */
-            int64_t  s64;     /**< Single channel S64 */
-            uint8_t  rgb[3];  /**< 3 channels: RGB888 */
-            uint8_t  yuv[3];  /**< 3 channels: Any YUV format */
-            uint8_t  rgbx[4]; /**< 4 channels: RGBX8888 */
-            double   f64;     /**< Single channel double */
-            float    f32;     /**< Single channel float 32 */
-            half     f16;     /**< Single channel F16 */
-            bfloat16 bf16;    /**< Single channel brain floating-point number */
-            uint8_t  u8;      /**< Single channel U8 */
-            int8_t   s8;      /**< Single channel S8 */
-            uint16_t u16;     /**< Single channel U16 */
-            int16_t  s16;     /**< Single channel S16 */
-            uint32_t u32;     /**< Single channel U32 */
-            int32_t  s32;     /**< Single channel S32 */
-        } value;
+    {
+        uint64_t u64;     /**< Single channel U64 */
+        int64_t  s64;     /**< Single channel S64 */
+        uint8_t  rgb[3];  /**< 3 channels: RGB888 */
+        uint8_t  yuv[3];  /**< 3 channels: Any YUV format */
+        uint8_t  rgbx[4]; /**< 4 channels: RGBX8888 */
+        double   f64;     /**< Single channel double */
+        float    f32;     /**< Single channel float 32 */
+        half     f16;     /**< Single channel F16 */
+        bfloat16 bf16;    /**< Single channel brain floating-point number */
+        uint8_t  u8;      /**< Single channel U8 */
+        int8_t   s8;      /**< Single channel S8 */
+        uint16_t u16;     /**< Single channel U16 */
+        int16_t  s16;     /**< Single channel S16 */
+        uint32_t u32;     /**< Single channel U32 */
+        int32_t  s32;     /**< Single channel S32 */
+    } value;
     /** Interpret the pixel value as a U8
      *
      * @param[out] v Returned value
diff --git a/arm_compute/core/QuantizationInfo.h b/arm_compute/core/QuantizationInfo.h
index 8fa513eee1..471b8c57ab 100644
--- a/arm_compute/core/QuantizationInfo.h
+++ b/arm_compute/core/QuantizationInfo.h
@@ -26,6 +26,7 @@
 
 #include "arm_compute/core/Rounding.h"
 #include "arm_compute/core/utils/misc/Utility.h"
+
 #include "support/ToolchainSupport.h"
 
 #include <vector>
@@ -41,8 +42,7 @@ using qasymm16_t       = uint16_t; /**< 16 bit quantized asymmetric scalar value
 struct UniformQuantizationInfo
 {
     /** Default constructor */
-    UniformQuantizationInfo()
-        : scale(0.f), offset(0)
+    UniformQuantizationInfo() : scale(0.f), offset(0)
     {
     }
     /** Constructor
@@ -50,8 +50,7 @@ struct UniformQuantizationInfo
      * @param[in] scale  Quantization scale
      * @param[in] offset Quantization offset
      */
-    UniformQuantizationInfo(float scale, int32_t offset)
-        : scale(scale), offset(offset)
+    UniformQuantizationInfo(float scale, int32_t offset) : scale(scale), offset(offset)
     {
     }
     /** Checks if the scale and offset are both zero */
@@ -69,9 +68,7 @@ class QuantizationInfo
 {
 public:
     /** Default constructor */
-    QuantizationInfo() noexcept
-        : _scale(),
-          _offset()
+    QuantizationInfo() noexcept : _scale(), _offset()
     {
     }
     /** Construct quantization info.
@@ -80,8 +77,7 @@ public:
      *
      * @param[in] scale Scale.
      */
-    QuantizationInfo(float scale)
-        : _scale(1, scale), _offset()
+    QuantizationInfo(float scale) : _scale(1, scale), _offset()
     {
     }
     /** Construct quantization info.
@@ -91,8 +87,7 @@ public:
      * @param[in] scale  Scale.
      * @param[in] offset Offset.
      */
-    QuantizationInfo(float scale, int offset)
-        : _scale(1, scale), _offset(1, offset)
+    QuantizationInfo(float scale, int offset) : _scale(1, scale), _offset(1, offset)
     {
     }
     /** Construct quantization info.
@@ -101,8 +96,7 @@ public:
      *
      * @param[in] scale Scale.
      */
-    QuantizationInfo(std::vector<float> scale)
-        : _scale(scale), _offset()
+    QuantizationInfo(std::vector<float> scale) : _scale(scale), _offset()
     {
     }
     /** Construct quantization info.
@@ -112,8 +106,7 @@ public:
      * @param[in] scale  Scale.
      * @param[in] offset Offset.
      */
-    QuantizationInfo(std::vector<float> scale, std::vector<int32_t> offset)
-        : _scale(scale), _offset(offset)
+    QuantizationInfo(std::vector<float> scale, std::vector<int32_t> offset) : _scale(scale), _offset(offset)
     {
     }
     /** Scale vector accessor
@@ -208,8 +201,7 @@ inline bool operator!=(const UniformQuantizationInfo &lhs, const UniformQuantiza
 template <typename QUANTIZED_TYPE = uint8_t>
 struct Qasymm8QuantizationHelper
 {
-    static_assert(std::is_same<QUANTIZED_TYPE, uint8_t>::value
-                  || std::is_same<QUANTIZED_TYPE, int8_t>::value,
+    static_assert(std::is_same<QUANTIZED_TYPE, uint8_t>::value || std::is_same<QUANTIZED_TYPE, int8_t>::value,
                   "quantized type should be either uint8_t or int8_t.");
 
     /** Quantize a value given a 8-bit asymmetric quantization scheme
@@ -234,9 +226,10 @@ struct Qasymm8QuantizationHelper
      *
      * @return Quantized value
      */
-    static inline QUANTIZED_TYPE quantize(float value, const UniformQuantizationInfo &qinfo, RoundingPolicy rounding_policy)
+    static inline QUANTIZED_TYPE
+    quantize(float value, const UniformQuantizationInfo &qinfo, RoundingPolicy rounding_policy)
     {
-        if(rounding_policy == RoundingPolicy::TO_NEAREST_UP)
+        if (rounding_policy == RoundingPolicy::TO_NEAREST_UP)
         {
             return quantize(value, qinfo);
         }
@@ -254,7 +247,8 @@ struct Qasymm8QuantizationHelper
      *
      * @return Quantized value
      */
-    static inline QUANTIZED_TYPE quantize(float value, const QuantizationInfo &qinfo, RoundingPolicy rounding_policy = RoundingPolicy::TO_NEAREST_UP)
+    static inline QUANTIZED_TYPE
+    quantize(float value, const QuantizationInfo &qinfo, RoundingPolicy rounding_policy = RoundingPolicy::TO_NEAREST_UP)
     {
         const UniformQuantizationInfo uqinfo = qinfo.uniform();
         ARM_COMPUTE_ERROR_ON(uqinfo.scale == 0);
@@ -297,7 +291,8 @@ struct Qasymm8QuantizationHelper
  * @return Quantized value
  */
 template <typename INFO_TYPE>
-inline uint8_t quantize_qasymm8(float value, const INFO_TYPE &qinfo, RoundingPolicy rounding_policy = RoundingPolicy::TO_NEAREST_UP)
+inline uint8_t
+quantize_qasymm8(float value, const INFO_TYPE &qinfo, RoundingPolicy rounding_policy = RoundingPolicy::TO_NEAREST_UP)
 {
     return Qasymm8QuantizationHelper<uint8_t>::quantize(value, qinfo, rounding_policy);
 }
@@ -311,7 +306,9 @@ inline uint8_t quantize_qasymm8(float value, const INFO_TYPE &qinfo, RoundingPol
  * @return Quantized value
  */
 template <typename INFO_TYPE>
-inline int8_t quantize_qasymm8_signed(float value, const INFO_TYPE &qinfo, RoundingPolicy rounding_policy = RoundingPolicy::TO_NEAREST_UP)
+inline int8_t quantize_qasymm8_signed(float            value,
+                                      const INFO_TYPE &qinfo,
+                                      RoundingPolicy   rounding_policy = RoundingPolicy::TO_NEAREST_UP)
 {
     return Qasymm8QuantizationHelper<int8_t>::quantize(value, qinfo, rounding_policy);
 }
@@ -441,7 +438,9 @@ inline float dequantize(uint16_t value, float scale, int32_t offset)
  *
  * @return Quantized value
  */
-inline int16_t quantize_qsymm16(float value, const UniformQuantizationInfo &qinfo, RoundingPolicy rounding_policy = RoundingPolicy::TO_NEAREST_UP)
+inline int16_t quantize_qsymm16(float                          value,
+                                const UniformQuantizationInfo &qinfo,
+                                RoundingPolicy                 rounding_policy = RoundingPolicy::TO_NEAREST_UP)
 {
     int quantized = arm_compute::round(value / qinfo.scale, rounding_policy);
     quantized     = arm_compute::utility::clamp<int, int16_t>(quantized);
@@ -492,7 +491,9 @@ inline float dequantize_qsymm16(int16_t value, const QuantizationInfo &qinfo)
  *
  * @return Quantized value
  */
-inline uint16_t quantize_qasymm16(float value, const UniformQuantizationInfo &qinfo, RoundingPolicy rounding_policy = RoundingPolicy::TO_NEAREST_UP)
+inline uint16_t quantize_qasymm16(float                          value,
+                                  const UniformQuantizationInfo &qinfo,
+                                  RoundingPolicy                 rounding_policy = RoundingPolicy::TO_NEAREST_UP)
 {
     int quantized = arm_compute::round(value / qinfo.scale, rounding_policy) + qinfo.offset;
     quantized     = arm_compute::utility::clamp<int, uint16_t>(quantized);
@@ -565,7 +566,8 @@ inline float dequantize_qasymm16(uint16_t value, const QuantizationInfo &qinfo)
  * z_n = - z_i * s_i / s_o + z_o
  *
  */
-inline UniformQuantizationInfo compute_requantization_scale_offset(const UniformQuantizationInfo &uqinfo_in, const UniformQuantizationInfo &uqinfo_out)
+inline UniformQuantizationInfo compute_requantization_scale_offset(const UniformQuantizationInfo &uqinfo_in,
+                                                                   const UniformQuantizationInfo &uqinfo_out)
 {
     float   scale_to_apply  = uqinfo_out.scale;
     int32_t offset_to_apply = uqinfo_out.offset;
diff --git a/arm_compute/core/Rounding.h b/arm_compute/core/Rounding.h
index b6817b5107..30a5a0fe9d 100644
--- a/arm_compute/core/Rounding.h
+++ b/arm_compute/core/Rounding.h
@@ -42,5 +42,5 @@ enum class RoundingPolicy
  * @return Rounded value of the argument x.
  */
 int round(float x, RoundingPolicy rounding_policy);
-}
+} // namespace arm_compute
 #endif /*ARM_COMPUTE_ROUNDING_H */
diff --git a/arm_compute/core/Size2D.h b/arm_compute/core/Size2D.h
index f3e9bea4c7..672b392050 100644
--- a/arm_compute/core/Size2D.h
+++ b/arm_compute/core/Size2D.h
@@ -41,9 +41,7 @@ public:
      * @param[in] w Width of the image or rectangle
      * @param[in] h Height of the image or rectangle
      */
-    Size2D(size_t w, size_t h) noexcept
-        : width(w),
-          height(h)
+    Size2D(size_t w, size_t h) noexcept : width(w), height(h)
     {
     }
     /** The area of the image or rectangle calculated as (width * height)
@@ -90,5 +88,5 @@ public:
     size_t width  = {}; /**< Width of the image region or rectangle */
     size_t height = {}; /**< Height of the image region or rectangle */
 };
-}
+} // namespace arm_compute
 #endif /*ARM_COMPUTE_SIZE2D_H */
diff --git a/arm_compute/core/Size3D.h b/arm_compute/core/Size3D.h
index 4241ed4f7e..e2dc6fe012 100644
--- a/arm_compute/core/Size3D.h
+++ b/arm_compute/core/Size3D.h
@@ -40,8 +40,7 @@ public:
      * @param[in] h Height of the 3D shape or object
      * @param[in] d Depth of the 3D shape or object
      */
-    Size3D(size_t w, size_t h, size_t d) noexcept
-        : width(w), height(h), depth(d)
+    Size3D(size_t w, size_t h, size_t d) noexcept : width(w), height(h), depth(d)
     {
     }
 
diff --git a/arm_compute/core/Steps.h b/arm_compute/core/Steps.h
index 208fc4b294..6b261becc0 100644
--- a/arm_compute/core/Steps.h
+++ b/arm_compute/core/Steps.h
@@ -45,8 +45,7 @@ public:
      * @param[in] steps Values to initialize the steps.
      */
     template <typename... Ts>
-    Steps(Ts... steps)
-        : Dimensions{ steps... }
+    Steps(Ts... steps) : Dimensions{steps...}
     {
         // Initialize empty dimensions to 1
         std::fill(_id.begin() + _num_dimensions, _id.end(), 1);
@@ -62,5 +61,5 @@ public:
     /** Default destructor */
     ~Steps() = default;
 };
-}
+} // namespace arm_compute
 #endif /*ARM_COMPUTE_STEPS_H*/
diff --git a/arm_compute/core/Strides.h b/arm_compute/core/Strides.h
index b582d066f7..627b219987 100644
--- a/arm_compute/core/Strides.h
+++ b/arm_compute/core/Strides.h
@@ -43,8 +43,7 @@ public:
      * @param[in] strides Values to initialize the strides.
      */
     template <typename... Ts>
-    constexpr Strides(Ts... strides)
-        : Dimensions{ strides... }
+    constexpr Strides(Ts... strides) : Dimensions{strides...}
     {
     }
     /** Allow instances of this class to be copy constructed */
diff --git a/arm_compute/core/SubTensorInfo.h b/arm_compute/core/SubTensorInfo.h
index 21703b0d93..7a3ee2cfd0 100644
--- a/arm_compute/core/SubTensorInfo.h
+++ b/arm_compute/core/SubTensorInfo.h
@@ -24,10 +24,9 @@
 #ifndef ARM_COMPUTE_SUBTENSORINFO_H
 #define ARM_COMPUTE_SUBTENSORINFO_H
 
-#include "arm_compute/core/ITensorInfo.h"
-
 #include "arm_compute/core/Coordinates.h"
 #include "arm_compute/core/Helpers.h"
+#include "arm_compute/core/ITensorInfo.h"
 #include "arm_compute/core/Strides.h"
 #include "arm_compute/core/TensorInfo.h"
 #include "arm_compute/core/TensorShape.h"
@@ -73,7 +72,7 @@ public:
 
     // Inherited methods overridden:
     std::unique_ptr<ITensorInfo> clone() const override;
-    ITensorInfo &set_data_type(DataType data_type) override
+    ITensorInfo                 &set_data_type(DataType data_type) override
     {
         ARM_COMPUTE_ERROR_ON(_parent == nullptr);
         _parent->set_data_type(data_type);
@@ -143,7 +142,7 @@ public:
         return _parent->offset_element_in_bytes(_coords);
     }
     int32_t offset_element_in_bytes(const Coordinates &pos) const override;
-    size_t element_size() const override
+    size_t  element_size() const override
     {
         ARM_COMPUTE_ERROR_ON(_parent == nullptr);
         return _parent->element_size();
@@ -227,7 +226,7 @@ public:
     {
         ARM_COMPUTE_ERROR_ON(_parent == nullptr);
         // Check if subtensor is valid if parent is configured
-        if(_parent->tensor_shape().total_size() != 0)
+        if (_parent->tensor_shape().total_size() != 0)
         {
             ARM_COMPUTE_ERROR_ON_INVALID_SUBTENSOR_VALID_REGION(_parent->valid_region(), valid_region);
         }
diff --git a/arm_compute/core/TensorInfo.h b/arm_compute/core/TensorInfo.h
index e738a797b2..b18f750427 100644
--- a/arm_compute/core/TensorInfo.h
+++ b/arm_compute/core/TensorInfo.h
@@ -24,15 +24,14 @@
 #ifndef ARM_COMPUTE_TENSORINFO_H
 #define ARM_COMPUTE_TENSORINFO_H
 
-#include "arm_compute/core/ITensorInfo.h"
-
-#include "ITensorInfo.h"
 #include "arm_compute/core/Coordinates.h"
 #include "arm_compute/core/Helpers.h"
+#include "arm_compute/core/ITensorInfo.h"
 #include "arm_compute/core/Strides.h"
 #include "arm_compute/core/TensorShape.h"
 #include "arm_compute/core/Types.h"
 
+#include "ITensorInfo.h"
 #include <cstddef>
 #include <memory>
 
@@ -112,7 +111,10 @@ public:
      * @param[in] data_type         Data type to use for each tensor element
      * @param[in] quantization_info The quantization settings for the tensor data.
      */
-    TensorInfo(const TensorShape &tensor_shape, size_t num_channels, DataType data_type, QuantizationInfo quantization_info);
+    TensorInfo(const TensorShape &tensor_shape,
+               size_t             num_channels,
+               DataType           data_type,
+               QuantizationInfo   quantization_info);
 
     /** Initialize the tensor info with just a format.
      *
@@ -136,7 +138,11 @@ public:
      * @param[in] offset_first_element_in_bytes Offset in bytes from the beginning of memory allocation to access the first element.
      * @param[in] total_size_in_bytes           Size in bytes of the memory allocation (including the offset to the first element).
      */
-    void init(const TensorShape &tensor_shape, Format format, const Strides &strides_in_bytes, size_t offset_first_element_in_bytes, size_t total_size_in_bytes);
+    void init(const TensorShape &tensor_shape,
+              Format             format,
+              const Strides     &strides_in_bytes,
+              size_t             offset_first_element_in_bytes,
+              size_t             total_size_in_bytes);
 
     /** Initialize the tensor info with just a format.
      *
@@ -164,8 +170,12 @@ public:
      * @param[in] offset_first_element_in_bytes Offset in bytes from the beginning of memory allocation to access the first element.
      * @param[in] total_size_in_bytes           Size in bytes of the memory allocation (including the offset to the first element).
      */
-    void init(const TensorShape &tensor_shape, size_t num_channels, DataType data_type, const Strides &strides_in_bytes, size_t offset_first_element_in_bytes,
-              size_t total_size_in_bytes);
+    void init(const TensorShape &tensor_shape,
+              size_t             num_channels,
+              DataType           data_type,
+              const Strides     &strides_in_bytes,
+              size_t             offset_first_element_in_bytes,
+              size_t             total_size_in_bytes);
     /** Initialize the metadata structure for the given tensor shape and single-plane format, (Padding is automatically calculated)
      *
      * @note The padding used by this method is really conservative so that the tensor can be used for most functions.
@@ -191,19 +201,19 @@ public:
 
     // Inherited methods overridden:
     std::unique_ptr<ITensorInfo> clone() const override;
-    ITensorInfo &set_data_type(DataType data_type) override;
-    ITensorInfo &set_num_channels(int num_channels) override;
-    ITensorInfo &set_format(Format format) override;
-    ITensorInfo &set_tensor_shape(const TensorShape &shape) override;
-    ITensorInfo &set_tensor_dims_state(const TensorDimsState &state) override;
-    ITensorInfo &set_quantization_info(const QuantizationInfo &quantization_info) override;
-    ITensorInfo &set_data_layout(const DataLayout &data_layout) override;
-    ITensorInfo &reset_padding() override;
-    bool         auto_padding() override;
-    ITensorInfo &set_lock_paddings(bool flag) override;
-    bool lock_paddings() const override;
-    bool extend_padding(const PaddingSize &padding) override;
-    size_t dimension(size_t index) const override
+    ITensorInfo                 &set_data_type(DataType data_type) override;
+    ITensorInfo                 &set_num_channels(int num_channels) override;
+    ITensorInfo                 &set_format(Format format) override;
+    ITensorInfo                 &set_tensor_shape(const TensorShape &shape) override;
+    ITensorInfo                 &set_tensor_dims_state(const TensorDimsState &state) override;
+    ITensorInfo                 &set_quantization_info(const QuantizationInfo &quantization_info) override;
+    ITensorInfo                 &set_data_layout(const DataLayout &data_layout) override;
+    ITensorInfo                 &reset_padding() override;
+    bool                         auto_padding() override;
+    ITensorInfo                 &set_lock_paddings(bool flag) override;
+    bool                         lock_paddings() const override;
+    bool                         extend_padding(const PaddingSize &padding) override;
+    size_t                       dimension(size_t index) const override
     {
         return _tensor_shape[index];
     }
@@ -220,7 +230,7 @@ public:
         return _offset_first_element_in_bytes;
     }
     int32_t offset_element_in_bytes(const Coordinates &pos) const override;
-    size_t element_size() const override
+    size_t  element_size() const override
     {
         return data_size_from_type(_data_type) * _num_channels;
     }
@@ -266,7 +276,8 @@ public:
     }
     bool is_dynamic() const override
     {
-        return std::find(std::cbegin(_dims_state), std::cend(_dims_state), get_dynamic_state_value()) != std::cend(_dims_state);
+        return std::find(std::cbegin(_dims_state), std::cend(_dims_state), get_dynamic_state_value()) !=
+               std::cend(_dims_state);
     }
     bool are_values_constant() const override
     {
@@ -343,11 +354,15 @@ private:
  */
 inline bool operator==(const TensorInfo &lhs, const TensorInfo &rhs)
 {
-    return (lhs._total_size == rhs._total_size) && (lhs._offset_first_element_in_bytes == rhs._offset_first_element_in_bytes) && (lhs._strides_in_bytes == rhs._strides_in_bytes)
-           && (lhs._num_channels == rhs._num_channels) && (lhs._tensor_shape == rhs._tensor_shape) && (lhs._dims_state == rhs._dims_state) && (lhs._data_type == rhs._data_type) && (lhs._format == rhs._format)
-           && (lhs._is_resizable == rhs._is_resizable) && (lhs._valid_region == rhs._valid_region) && (lhs._padding == rhs._padding) && (lhs._quantization_info == rhs._quantization_info)
-           && (lhs._data_layout == rhs._data_layout) && (lhs._are_values_constant == rhs._are_values_constant)
-           && (lhs._id == rhs._id);
+    return (lhs._total_size == rhs._total_size) &&
+           (lhs._offset_first_element_in_bytes == rhs._offset_first_element_in_bytes) &&
+           (lhs._strides_in_bytes == rhs._strides_in_bytes) && (lhs._num_channels == rhs._num_channels) &&
+           (lhs._tensor_shape == rhs._tensor_shape) && (lhs._dims_state == rhs._dims_state) &&
+           (lhs._data_type == rhs._data_type) && (lhs._format == rhs._format) &&
+           (lhs._is_resizable == rhs._is_resizable) && (lhs._valid_region == rhs._valid_region) &&
+           (lhs._padding == rhs._padding) && (lhs._quantization_info == rhs._quantization_info) &&
+           (lhs._data_layout == rhs._data_layout) && (lhs._are_values_constant == rhs._are_values_constant) &&
+           (lhs._id == rhs._id);
 }
 } // namespace arm_compute
 #endif /*ARM_COMPUTE_TENSORINFO_H */
diff --git a/arm_compute/core/TensorShape.h b/arm_compute/core/TensorShape.h
index 4c9186ac64..c1707e262f 100644
--- a/arm_compute/core/TensorShape.h
+++ b/arm_compute/core/TensorShape.h
@@ -44,11 +44,10 @@ public:
      * @param[in] dims Values to initialize the dimensions.
      */
     template <typename... Ts>
-    TensorShape(Ts... dims)
-        : Dimensions{ dims... }
+    TensorShape(Ts... dims) : Dimensions{dims...}
     {
         // Initialize unspecified dimensions to 1
-        if(_num_dimensions > 0)
+        if (_num_dimensions > 0)
         {
             std::fill(_id.begin() + _num_dimensions, _id.end(), 1);
         }
@@ -79,7 +78,7 @@ public:
     TensorShape &set(size_t dimension, size_t value, bool apply_dim_correction = true, bool increase_dim_unit = true)
     {
         // Clear entire shape if one dimension is zero
-        if(value == 0)
+        if (value == 0)
         {
             _num_dimensions = 0;
             std::fill(_id.begin(), _id.end(), 0);
@@ -94,7 +93,7 @@ public:
             Dimensions::set(dimension, value, increase_dim_unit);
 
             // Correct number dimensions to ignore trailing dimensions of size 1
-            if(apply_dim_correction)
+            if (apply_dim_correction)
             {
                 apply_dimension_correction();
             }
@@ -123,7 +122,7 @@ public:
         std::fill(_id.begin() + _num_dimensions, _id.end(), 1);
 
         // Correct number dimensions to ignore trailing dimensions of size 1
-        if(apply_dim_correction)
+        if (apply_dim_correction)
         {
             apply_dimension_correction();
         }
@@ -212,26 +211,26 @@ public:
      * @return The broadcasted shape or an empty shape if the shapes are not broadcast compatible.
      */
     template <typename... Shapes>
-    static TensorShape broadcast_shape(const Shapes &... shapes)
+    static TensorShape broadcast_shape(const Shapes &...shapes)
     {
         TensorShape bc_shape;
 
-        auto broadcast = [&bc_shape](const TensorShape & other)
+        auto broadcast = [&bc_shape](const TensorShape &other)
         {
-            if(bc_shape.num_dimensions() == 0)
+            if (bc_shape.num_dimensions() == 0)
             {
                 bc_shape = other;
             }
-            else if(other.num_dimensions() != 0)
+            else if (other.num_dimensions() != 0)
             {
-                for(size_t d = 0; d < TensorShape::num_max_dimensions; ++d)
+                for (size_t d = 0; d < TensorShape::num_max_dimensions; ++d)
                 {
                     const size_t dim_min = std::min(bc_shape[d], other[d]);
                     const size_t dim_max = std::max(bc_shape[d], other[d]);
 
-                    if((dim_min != 1) && (dim_min != dim_max))
+                    if ((dim_min != 1) && (dim_min != dim_max))
                     {
-                        bc_shape = TensorShape{ 0U };
+                        bc_shape = TensorShape{0U};
                         break;
                     }
 
@@ -249,9 +248,9 @@ private:
     /** Remove trailing dimensions of size 1 from the reported number of dimensions. */
     void apply_dimension_correction()
     {
-        for(int i = static_cast<int>(_num_dimensions) - 1; i > 0; --i)
+        for (int i = static_cast<int>(_num_dimensions) - 1; i > 0; --i)
         {
-            if(_id[i] == 1)
+            if (_id[i] == 1)
             {
                 --_num_dimensions;
             }
@@ -262,5 +261,5 @@ private:
         }
     }
 };
-}
+} // namespace arm_compute
 #endif /*ARM_COMPUTE_TENSORSHAPE_H*/
diff --git a/arm_compute/core/Types.h b/arm_compute/core/Types.h
index 9264cefe3e..6b51af17d4 100644
--- a/arm_compute/core/Types.h
+++ b/arm_compute/core/Types.h
@@ -59,13 +59,13 @@
 /** The following symbols have been moved to:
  * MatMulInfo
  */
-#include "arm_compute/function_info/MatMulInfo.h"
-
 #include "arm_compute/core/Coordinates.h"
 #include "arm_compute/core/Size2D.h"
 #include "arm_compute/core/Size3D.h"
 #include "arm_compute/core/TensorShape.h"
 #include "arm_compute/core/utils/misc/Macros.h"
+#include "arm_compute/function_info/MatMulInfo.h"
+
 #include "support/Bfloat16.h"
 
 #include <cmath>
@@ -143,8 +143,7 @@ enum class ComparisonOperation
 struct ValidRegion
 {
     /** Default constructor */
-    ValidRegion()
-        : anchor{}, shape{}
+    ValidRegion() : anchor{}, shape{}
     {
     }
 
@@ -165,8 +164,7 @@ struct ValidRegion
      * @param[in] a_shape   Shape of the valid region.
      *
      */
-    ValidRegion(const Coordinates &an_anchor, const TensorShape &a_shape)
-        : anchor{ an_anchor }, shape{ a_shape }
+    ValidRegion(const Coordinates &an_anchor, const TensorShape &a_shape) : anchor{an_anchor}, shape{a_shape}
     {
         anchor.set_num_dimensions(std::max(anchor.num_dimensions(), shape.num_dimensions()));
     }
@@ -179,7 +177,7 @@ struct ValidRegion
      *
      */
     ValidRegion(const Coordinates &an_anchor, const TensorShape &a_shape, size_t num_dimensions)
-        : anchor{ an_anchor }, shape{ a_shape }
+        : anchor{an_anchor}, shape{a_shape}
     {
         ARM_COMPUTE_ERROR_ON(num_dimensions < std::max(anchor.num_dimensions(), shape.num_dimensions()));
         anchor.set_num_dimensions(num_dimensions);
@@ -241,32 +239,24 @@ enum class BorderMode
 struct BorderSize
 {
     /** Empty border, i.e. no border */
-    constexpr BorderSize() noexcept
-        : top{ 0 },
-    right{ 0 },
-    bottom{ 0 },
-    left{ 0 }
+    constexpr BorderSize() noexcept : top{0}, right{0}, bottom{0}, left{0}
     {
     }
 
     /** Border with equal size around the 2D plane */
-    explicit constexpr BorderSize(unsigned int size) noexcept
-        : top{ size },
-    right{ size },
-    bottom{ size },
-    left{ size }
+    explicit constexpr BorderSize(unsigned int size) noexcept : top{size}, right{size}, bottom{size}, left{size}
     {
     }
 
     /** Border with same size for top/bottom and left/right */
     constexpr BorderSize(unsigned int top_bottom, unsigned int left_right)
-        : top{ top_bottom }, right{ left_right }, bottom{ top_bottom }, left{ left_right }
+        : top{top_bottom}, right{left_right}, bottom{top_bottom}, left{left_right}
     {
     }
 
     /** Border with different sizes */
     constexpr BorderSize(unsigned int top, unsigned int right, unsigned int bottom, unsigned int left)
-        : top{ top }, right{ right }, bottom{ bottom }, left{ left }
+        : top{top}, right{right}, bottom{bottom}, left{left}
     {
     }
 
@@ -371,7 +361,7 @@ enum class InterpolationPolicy
 {
     NEAREST_NEIGHBOR, /**< Output values are defined to match the source pixel whose center is nearest to the sample position */
     BILINEAR,         /**< Output values are defined by bilinear interpolation between the pixels */
-    AREA,             /**< Output values are determined by averaging the source pixels whose areas fall under the area of the destination pixel, projected onto the source image */
+    AREA, /**< Output values are determined by averaging the source pixels whose areas fall under the area of the destination pixel, projected onto the source image */
 };
 
 /** Bilinear Interpolation method used by LKTracker */
@@ -478,12 +468,12 @@ enum class NormType
  */
 struct DetectionWindow
 {
-    uint16_t x{ 0 };         /**< Top-left x coordinate */
-    uint16_t y{ 0 };         /**< Top-left y coordinate */
-    uint16_t width{ 0 };     /**< Width of the detection window */
-    uint16_t height{ 0 };    /**< Height of the detection window */
-    uint16_t idx_class{ 0 }; /**< Index of the class */
-    float    score{ 0.f };   /**< Confidence value for the detection window */
+    uint16_t x{0};         /**< Top-left x coordinate */
+    uint16_t y{0};         /**< Top-left y coordinate */
+    uint16_t width{0};     /**< Width of the detection window */
+    uint16_t height{0};    /**< Height of the detection window */
+    uint16_t idx_class{0}; /**< Index of the class */
+    float    score{0.f};   /**< Confidence value for the detection window */
 };
 
 /** Available pooling types */
@@ -520,12 +510,28 @@ public:
      * @param[in] im_width                 (Optional) Boxes whose centers (on the x axis) is beyond im_width will be filtered. Defaults to 1
      * @param[in] im_height                (Optional) Boxes whose centers (on the y axis) is beyond im_height will be filtered. Defaults to 1
      */
-    BoxNMSLimitInfo(float score_thresh = 0.05f, float nms = 0.3f,
-                    int detections = 100, bool soft_nms_enabled = false,
-                    NMSType soft_nms_method = NMSType::LINEAR,
-                    float soft_nms_sigma = 0.5f, float soft_nms_min_score_thres = 0.001f, bool suppress_size = false, float min_size = 1.0f, float im_width = 1.0f, float im_height = 1.0f)
-        : _score_thresh(score_thresh), _nms(nms), _detections_per_im(detections), _soft_nms_enabled(soft_nms_enabled), _soft_nms_method(soft_nms_method), _soft_nms_sigma(soft_nms_sigma),
-          _soft_nms_min_score_thres(soft_nms_min_score_thres), _suppress_size(suppress_size), _min_size(min_size), _im_width(im_width), _im_height(im_height)
+    BoxNMSLimitInfo(float   score_thresh             = 0.05f,
+                    float   nms                      = 0.3f,
+                    int     detections               = 100,
+                    bool    soft_nms_enabled         = false,
+                    NMSType soft_nms_method          = NMSType::LINEAR,
+                    float   soft_nms_sigma           = 0.5f,
+                    float   soft_nms_min_score_thres = 0.001f,
+                    bool    suppress_size            = false,
+                    float   min_size                 = 1.0f,
+                    float   im_width                 = 1.0f,
+                    float   im_height                = 1.0f)
+        : _score_thresh(score_thresh),
+          _nms(nms),
+          _detections_per_im(detections),
+          _soft_nms_enabled(soft_nms_enabled),
+          _soft_nms_method(soft_nms_method),
+          _soft_nms_sigma(soft_nms_sigma),
+          _soft_nms_min_score_thres(soft_nms_min_score_thres),
+          _suppress_size(suppress_size),
+          _min_size(min_size),
+          _im_width(im_width),
+          _im_height(im_height)
     {
     }
     /** Get the score threshold */
@@ -603,14 +609,13 @@ private:
 struct Padding2D
 {
     Padding2D() = default;
-    Padding2D(size_t left, size_t right, size_t top, size_t bottom)
-        : left(left), right(right), top(top), bottom(bottom)
+    Padding2D(size_t left, size_t right, size_t top, size_t bottom) : left(left), right(right), top(top), bottom(bottom)
     {
     }
-    size_t left   = { 0 }; /**<  Padding across the width dimension on the left, in elements. */
-    size_t right  = { 0 }; /**<  Padding across the width dimension on the right, in elements. */
-    size_t top    = { 0 }; /**<  Padding across the height dimension on the top, in elements. */
-    size_t bottom = { 0 }; /**<  Padding across the height dimension on the bottom, in elements. */
+    size_t left   = {0}; /**<  Padding across the width dimension on the left, in elements. */
+    size_t right  = {0}; /**<  Padding across the width dimension on the right, in elements. */
+    size_t top    = {0}; /**<  Padding across the height dimension on the top, in elements. */
+    size_t bottom = {0}; /**<  Padding across the height dimension on the bottom, in elements. */
 };
 
 /** Padding information for 3D operations like Conv3d */
@@ -630,12 +635,12 @@ struct Padding3D
     {
     }
 
-    size_t left   = { 0 }; /**<  Padding across the width dimenstion on the left, in elements. */
-    size_t right  = { 0 }; /**<  Padding across the width dimenstion on the right, in elements. */
-    size_t top    = { 0 }; /**<  Padding across the height dimenstion  on the top, in elements. */
-    size_t bottom = { 0 }; /**<  Padding across the height dimenstion on the bottom, in elements. */
-    size_t front  = { 0 }; /**<  Padding across the depth dimenstion on the front, in elements. */
-    size_t back   = { 0 }; /**<  Padding across the depth dimenstion on the back, in elements. */
+    size_t left   = {0}; /**<  Padding across the width dimenstion on the left, in elements. */
+    size_t right  = {0}; /**<  Padding across the width dimenstion on the right, in elements. */
+    size_t top    = {0}; /**<  Padding across the height dimenstion  on the top, in elements. */
+    size_t bottom = {0}; /**<  Padding across the height dimenstion on the bottom, in elements. */
+    size_t front  = {0}; /**<  Padding across the depth dimenstion on the front, in elements. */
+    size_t back   = {0}; /**<  Padding across the depth dimenstion on the back, in elements. */
 };
 
 /** PriorBox layer info */
@@ -667,9 +672,15 @@ public:
      * @param[in] img_size      (Optional) Image size.
      * @param[in] steps         (Optional) Step values.
      */
-    PriorBoxLayerInfo(const std::vector<float> &min_sizes, const std::vector<float> &variances, float offset, bool flip = true, bool clip = false,
-                      const std::vector<float> &max_sizes = {}, const std::vector<float> &aspect_ratios = {},
-    const Coordinates2D &img_size = Coordinates2D{ 0, 0 }, const std::array<float, 2> &steps = { { 0.f, 0.f } })
+    PriorBoxLayerInfo(const std::vector<float>   &min_sizes,
+                      const std::vector<float>   &variances,
+                      float                       offset,
+                      bool                        flip          = true,
+                      bool                        clip          = false,
+                      const std::vector<float>   &max_sizes     = {},
+                      const std::vector<float>   &aspect_ratios = {},
+                      const Coordinates2D        &img_size      = Coordinates2D{0, 0},
+                      const std::array<float, 2> &steps         = {{0.f, 0.f}})
         : _min_sizes(min_sizes),
           _variances(variances),
           _offset(offset),
@@ -681,22 +692,22 @@ public:
           _steps(steps)
     {
         _aspect_ratios.push_back(1.);
-        for(unsigned int i = 0; i < aspect_ratios.size(); ++i)
+        for (unsigned int i = 0; i < aspect_ratios.size(); ++i)
         {
             float ar            = aspect_ratios[i];
             bool  already_exist = false;
-            for(auto ar_new : _aspect_ratios)
+            for (auto ar_new : _aspect_ratios)
             {
-                if(fabs(ar - ar_new) < 1e-6)
+                if (fabs(ar - ar_new) < 1e-6)
                 {
                     already_exist = true;
                     break;
                 }
             }
-            if(!already_exist)
+            if (!already_exist)
             {
                 _aspect_ratios.push_back(ar);
-                if(flip)
+                if (flip)
                 {
                     _aspect_ratios.push_back(1.f / ar);
                 }
@@ -808,8 +819,16 @@ public:
      * @param[in] variance_encoded_in_target (Optional) If true, variance is encoded in target. Otherwise we need to adjust the predicted offset accordingly.Default set to false.
      * @param[in] eta                        (Optional) Eta.
      */
-    DetectionOutputLayerInfo(int num_classes, bool share_location, DetectionOutputLayerCodeType code_type, int keep_top_k, float nms_threshold, int top_k = -1, int background_label_id = -1,
-                             float confidence_threshold = std::numeric_limits<float>::lowest(), bool variance_encoded_in_target = false, float eta = 1)
+    DetectionOutputLayerInfo(int                          num_classes,
+                             bool                         share_location,
+                             DetectionOutputLayerCodeType code_type,
+                             int                          keep_top_k,
+                             float                        nms_threshold,
+                             int                          top_k                = -1,
+                             int                          background_label_id  = -1,
+                             float                        confidence_threshold = std::numeric_limits<float>::lowest(),
+                             bool                         variance_encoded_in_target = false,
+                             float                        eta                        = 1)
         : _num_classes(num_classes),
           _share_location(share_location),
           _code_type(code_type),
@@ -923,8 +942,15 @@ public:
      * @param[in] detection_per_class       (Optional) Number of detection per class. Used in the Regular Non-Max-Suppression. Defaults to 100.
      * @param[in] dequantize_scores         (Optional) If the scores need to be dequantized. Defaults to true.
      */
-    DetectionPostProcessLayerInfo(unsigned int max_detections, unsigned int max_classes_per_detection, float nms_score_threshold, float iou_threshold, unsigned int num_classes,
-                                  std::array<float, 4> scales_values, bool use_regular_nms = false, unsigned int detection_per_class = 100, bool dequantize_scores = true)
+    DetectionPostProcessLayerInfo(unsigned int         max_detections,
+                                  unsigned int         max_classes_per_detection,
+                                  float                nms_score_threshold,
+                                  float                iou_threshold,
+                                  unsigned int         num_classes,
+                                  std::array<float, 4> scales_values,
+                                  bool                 use_regular_nms     = false,
+                                  unsigned int         detection_per_class = 100,
+                                  bool                 dequantize_scores   = true)
         : _max_detections(max_detections),
           _max_classes_per_detection(max_classes_per_detection),
           _nms_score_threshold(nms_score_threshold),
@@ -1240,8 +1266,14 @@ public:
      * @param[in] spatial_scale  Spatial scale to be applied to the ROI coordinates and dimensions.
      * @param[in] sampling_ratio Number of samples to include in each pooling region (if set to zero, a ceil(roi_dims/pooling_dims))
      */
-    ROIPoolingLayerInfo(unsigned int pooled_width, unsigned int pooled_height, float spatial_scale, unsigned int sampling_ratio = 0)
-        : _pooled_width(pooled_width), _pooled_height(pooled_height), _spatial_scale(spatial_scale), _sampling_ratio(sampling_ratio)
+    ROIPoolingLayerInfo(unsigned int pooled_width,
+                        unsigned int pooled_height,
+                        float        spatial_scale,
+                        unsigned int sampling_ratio = 0)
+        : _pooled_width(pooled_width),
+          _pooled_height(pooled_height),
+          _spatial_scale(spatial_scale),
+          _sampling_ratio(sampling_ratio)
     {
     }
     /** Get the pooled width of the layer */
@@ -1288,10 +1320,24 @@ public:
      * @param[in] min_size       (Optional)Size used to validate the anchors produced. Defaults to 16.
      * @param[in] values_per_roi (Optional)Values used to represent a ROI(Region of interest). Defaults to 4.
      */
-    GenerateProposalsInfo(float im_width, float im_height, float im_scale, float spatial_scale = 1.0, int pre_nms_topN = 6000, int post_nms_topN = 300, float nms_thres = 0.7, float min_size = 16.0,
+    GenerateProposalsInfo(float  im_width,
+                          float  im_height,
+                          float  im_scale,
+                          float  spatial_scale  = 1.0,
+                          int    pre_nms_topN   = 6000,
+                          int    post_nms_topN  = 300,
+                          float  nms_thres      = 0.7,
+                          float  min_size       = 16.0,
                           size_t values_per_roi = 4)
-        : _im_height(im_height), _im_width(im_width), _im_scale(im_scale), _spatial_scale(spatial_scale), _pre_nms_topN(pre_nms_topN), _post_nms_topN(post_nms_topN), _nms_thres(nms_thres),
-          _min_size(min_size), _values_per_roi(values_per_roi)
+        : _im_height(im_height),
+          _im_width(im_width),
+          _im_scale(im_scale),
+          _spatial_scale(spatial_scale),
+          _pre_nms_topN(pre_nms_topN),
+          _post_nms_topN(post_nms_topN),
+          _nms_thres(nms_thres),
+          _min_size(min_size),
+          _values_per_roi(values_per_roi)
     {
     }
 
@@ -1417,11 +1463,20 @@ public:
      * @param[in] correct_transform_coords (Optional)Correct bounding box transform coordinates. Defaults to false
      * @param[in] bbox_xform_clip          (Optional)Minimum bounding box width and height after bounding box transformation in log-space. Defaults to log(1000/16)
      */
-    BoundingBoxTransformInfo(float img_width, float img_height, float scale, bool apply_scale = false, const std::array<float, 4> weights = { { 1.f, 1.f, 1.f, 1.f } }, bool correct_transform_coords =
-    false,
-    float bbox_xform_clip =
-        4.135166556742356f)
-        : _img_width(img_width), _img_height(img_height), _scale(scale), _apply_scale(apply_scale), _correct_transform_coords(correct_transform_coords), _weights(weights), _bbox_xform_clip(bbox_xform_clip)
+    BoundingBoxTransformInfo(float                      img_width,
+                             float                      img_height,
+                             float                      scale,
+                             bool                       apply_scale              = false,
+                             const std::array<float, 4> weights                  = {{1.f, 1.f, 1.f, 1.f}},
+                             bool                       correct_transform_coords = false,
+                             float                      bbox_xform_clip          = 4.135166556742356f)
+        : _img_width(img_width),
+          _img_height(img_height),
+          _scale(scale),
+          _apply_scale(apply_scale),
+          _correct_transform_coords(correct_transform_coords),
+          _weights(weights),
+          _bbox_xform_clip(bbox_xform_clip)
     {
     }
 
@@ -1484,7 +1539,12 @@ public:
      * @param[in] is_scaled (Optional) Boolean that specifies if alpha will be scaled by the normalization size or not.
      *                      Should be false to follow [Krichevksy 2012].
      */
-    NormalizationLayerInfo(NormType type, uint32_t norm_size = 5, float alpha = 0.0001f, float beta = 0.5f, float kappa = 1.f, bool is_scaled = true)
+    NormalizationLayerInfo(NormType type,
+                           uint32_t norm_size = 5,
+                           float    alpha     = 0.0001f,
+                           float    beta      = 0.5f,
+                           float    kappa     = 1.f,
+                           bool     is_scaled = true)
         : _type(type), _norm_size(norm_size), _alpha(alpha), _beta(beta), _kappa(kappa), _is_scaled(is_scaled)
     {
     }
@@ -1612,7 +1672,12 @@ class WeightsInfo
 public:
     /** Default constructor */
     WeightsInfo()
-        : _are_reshaped(false), _kernel_width(0), _kernel_height(0), _num_kernels(0), _retain_internal_weights(false), _weight_format(arm_compute::WeightFormat::UNSPECIFIED)
+        : _are_reshaped(false),
+          _kernel_width(0),
+          _kernel_height(0),
+          _num_kernels(0),
+          _retain_internal_weights(false),
+          _weight_format(arm_compute::WeightFormat::UNSPECIFIED)
     {
     }
     /** Constructor
@@ -1624,9 +1689,18 @@ public:
      * @param[in] retain_internal_weights (Optional) True if internal reshaped weights must be retained. Used for reconfiguration purposes. Default is false.
      * @param[in] weight_format           (Optional) arm_gemm:WeightFormat enumeration requested by the user. Default is arm_compute::WeightFormat::UNSPECIFIED.
      */
-    WeightsInfo(bool are_reshaped, unsigned int kernel_width, unsigned int kernel_height, unsigned int num_kernels, bool retain_internal_weights = false,
-                arm_compute::WeightFormat weight_format = arm_compute::WeightFormat::UNSPECIFIED)
-        : _are_reshaped(are_reshaped), _kernel_width(kernel_width), _kernel_height(kernel_height), _num_kernels(num_kernels), _retain_internal_weights(retain_internal_weights), _weight_format(weight_format)
+    WeightsInfo(bool                      are_reshaped,
+                unsigned int              kernel_width,
+                unsigned int              kernel_height,
+                unsigned int              num_kernels,
+                bool                      retain_internal_weights = false,
+                arm_compute::WeightFormat weight_format           = arm_compute::WeightFormat::UNSPECIFIED)
+        : _are_reshaped(are_reshaped),
+          _kernel_width(kernel_width),
+          _kernel_height(kernel_height),
+          _num_kernels(num_kernels),
+          _retain_internal_weights(retain_internal_weights),
+          _weight_format(weight_format)
     {
     }
     /** Flag which specifies if the weights tensor has been reshaped.
@@ -1698,7 +1772,14 @@ class GEMMReshapeInfo final
 public:
     /** Default constructor */
     GEMMReshapeInfo()
-        : _m(1), _n(1), _k(1), _mult_transpose1xW_width(1), _mult_interleave4x4_height(1), _depth_output_gemm3d(0), _reinterpret_input_as_3d(false), _broadcast_bias(false)
+        : _m(1),
+          _n(1),
+          _k(1),
+          _mult_transpose1xW_width(1),
+          _mult_interleave4x4_height(1),
+          _depth_output_gemm3d(0),
+          _reinterpret_input_as_3d(false),
+          _broadcast_bias(false)
     {
     }
     /** Constructor
@@ -1714,9 +1795,22 @@ public:
      *                                      to perform 1x1 convolutions with the NHWC data layout)
      * @param[in] broadcast_bias            (Optional) Broadcast the shape of the bias tensor from a vector to a matrix.
      */
-    GEMMReshapeInfo(int m, int n, int k, int mult_transpose1xW_width = 1, int mult_interleave4x4_height = 1, int depth_output_gemm3d = 0, bool reinterpret_input_as_3d = false, bool broadcast_bias = false)
-        : _m(m), _n(n), _k(k), _mult_transpose1xW_width(mult_transpose1xW_width), _mult_interleave4x4_height(mult_interleave4x4_height), _depth_output_gemm3d(depth_output_gemm3d),
-          _reinterpret_input_as_3d(reinterpret_input_as_3d), _broadcast_bias(broadcast_bias)
+    GEMMReshapeInfo(int  m,
+                    int  n,
+                    int  k,
+                    int  mult_transpose1xW_width   = 1,
+                    int  mult_interleave4x4_height = 1,
+                    int  depth_output_gemm3d       = 0,
+                    bool reinterpret_input_as_3d   = false,
+                    bool broadcast_bias            = false)
+        : _m(m),
+          _n(n),
+          _k(k),
+          _mult_transpose1xW_width(mult_transpose1xW_width),
+          _mult_interleave4x4_height(mult_interleave4x4_height),
+          _depth_output_gemm3d(depth_output_gemm3d),
+          _reinterpret_input_as_3d(reinterpret_input_as_3d),
+          _broadcast_bias(broadcast_bias)
     {
     }
     /** Number of matrix A rows
@@ -1806,11 +1900,11 @@ struct GEMMLHSMatrixInfo
         : m0(m), k0(k), v0(v), transpose(trans), interleave(inter)
     {
     }
-    unsigned int m0{ 1 };            /**< Number of rows processed by the matrix multiplication */
-    unsigned int k0{ 1 };            /**< Number of partial accumulations performed by the matrix multiplication */
-    unsigned int v0{ 1 };            /**< Number of vertical blocks of size (m0xk0) stored on the same output row */
-    bool         transpose{ true };  /**< True if the (m0xk0) block has to be transposed before been stored */
-    bool         interleave{ true }; /**< True if the v0 (m0xk0) blocks have to be interleaved in the output row */
+    unsigned int m0{1};            /**< Number of rows processed by the matrix multiplication */
+    unsigned int k0{1};            /**< Number of partial accumulations performed by the matrix multiplication */
+    unsigned int v0{1};            /**< Number of vertical blocks of size (m0xk0) stored on the same output row */
+    bool         transpose{true};  /**< True if the (m0xk0) block has to be transposed before been stored */
+    bool         interleave{true}; /**< True if the v0 (m0xk0) blocks have to be interleaved in the output row */
 };
 
 /** GEMM RHS (Right Hand Side) matrix information */
@@ -1821,12 +1915,13 @@ struct GEMMRHSMatrixInfo
         : n0(n), k0(k), h0(h), transpose(trans), interleave(inter), export_to_cl_image(export_to_cl_img)
     {
     }
-    unsigned int n0{ 1 };                     /**< Number of columns processed by the matrix multiplication */
-    unsigned int k0{ 1 };                     /**< Number of partial accumulations performed by the matrix multiplication */
-    unsigned int h0{ 1 };                     /**< Number of horizontal blocks of size (k0xn0) stored on the same output row */
-    bool         transpose{ true };           /**< True if the (k0xn0) block has to be transposed before been stored */
-    bool         interleave{ true };          /**< True if the h0 (k0xn0) blocks have to be interleaved in the output row */
-    bool         export_to_cl_image{ false }; /**< True if the reshaped rhs has to be exported to cl_image. n0 must be equal to 4 */
+    unsigned int n0{1};            /**< Number of columns processed by the matrix multiplication */
+    unsigned int k0{1};            /**< Number of partial accumulations performed by the matrix multiplication */
+    unsigned int h0{1};            /**< Number of horizontal blocks of size (k0xn0) stored on the same output row */
+    bool         transpose{true};  /**< True if the (k0xn0) block has to be transposed before been stored */
+    bool         interleave{true}; /**< True if the h0 (k0xn0) blocks have to be interleaved in the output row */
+    bool         export_to_cl_image{
+        false}; /**< True if the reshaped rhs has to be exported to cl_image. n0 must be equal to 4 */
 };
 
 class ITensorInfo;
@@ -1842,16 +1937,23 @@ struct WinogradInfo
      * @param[in] conv_info      Convolution info (Pads, strides)
      * @param[in] data_layout    Data layout to use for the output tensor once the convolution has been applied
      */
-    WinogradInfo(Size2D output_tile_sz, Size2D kernel_sz, Size2D input_dims, PadStrideInfo conv_info, DataLayout data_layout)
-        : output_tile_size(output_tile_sz), kernel_size(kernel_sz), input_dimensions(input_dims), convolution_info(conv_info), output_data_layout(data_layout)
-    {
-    }
-
-    Size2D        output_tile_size{};                     /**< Width and height of the output tile */
-    Size2D        kernel_size{};                          /**< Width and height of the kernel*/
-    Size2D        input_dimensions{};                     /**< Width and height of the input tensor before the convolution is applied */
-    PadStrideInfo convolution_info{};                     /**< Convolution info (Pads, strides,...) */
-    DataLayout    output_data_layout{ DataLayout::NCHW }; /**< Data layout to use for the output tensor once the convolution has been applied (NCHW or NHWC) */
+    WinogradInfo(
+        Size2D output_tile_sz, Size2D kernel_sz, Size2D input_dims, PadStrideInfo conv_info, DataLayout data_layout)
+        : output_tile_size(output_tile_sz),
+          kernel_size(kernel_sz),
+          input_dimensions(input_dims),
+          convolution_info(conv_info),
+          output_data_layout(data_layout)
+    {
+    }
+
+    Size2D        output_tile_size{}; /**< Width and height of the output tile */
+    Size2D        kernel_size{};      /**< Width and height of the kernel*/
+    Size2D        input_dimensions{}; /**< Width and height of the input tensor before the convolution is applied */
+    PadStrideInfo convolution_info{}; /**< Convolution info (Pads, strides,...) */
+    DataLayout    output_data_layout{
+        DataLayout::
+            NCHW}; /**< Data layout to use for the output tensor once the convolution has been applied (NCHW or NHWC) */
 };
 
 /** IO formatting information class*/
diff --git a/arm_compute/core/Utils.h b/arm_compute/core/Utils.h
index c5b50167bf..a2146522f7 100644
--- a/arm_compute/core/Utils.h
+++ b/arm_compute/core/Utils.h
@@ -69,7 +69,7 @@ template <typename T>
 inline void permute_strides(Dimensions<T> &dimensions, const PermutationVector &perm)
 {
     const auto old_dim = utility::make_array<Dimensions<T>::num_max_dimensions>(dimensions.begin(), dimensions.end());
-    for(unsigned int i = 0; i < perm.num_dimensions(); ++i)
+    for (unsigned int i = 0; i < perm.num_dimensions(); ++i)
     {
         T dimension_val = old_dim[i];
         dimensions.set(perm[i], dimension_val);
@@ -87,7 +87,11 @@ inline void permute_strides(Dimensions<T> &dimensions, const PermutationVector &
  *
  * @return PadStrideInfo for SAME padding
  */
-PadStrideInfo calculate_same_pad(TensorShape input_shape, TensorShape weights_shape, PadStrideInfo conv_info, DataLayout data_layout = DataLayout::NCHW, const Size2D &dilation = Size2D(1u, 1u),
+PadStrideInfo calculate_same_pad(TensorShape                  input_shape,
+                                 TensorShape                  weights_shape,
+                                 PadStrideInfo                conv_info,
+                                 DataLayout                   data_layout   = DataLayout::NCHW,
+                                 const Size2D                &dilation      = Size2D(1u, 1u),
                                  const DimensionRoundingType &rounding_type = DimensionRoundingType::FLOOR);
 
 /** Returns expected width and height of the deconvolution's output tensor.
@@ -100,8 +104,10 @@ PadStrideInfo calculate_same_pad(TensorShape input_shape, TensorShape weights_sh
  *
  * @return A pair with the new width in the first position and the new height in the second.
  */
-std::pair<unsigned int, unsigned int> deconvolution_output_dimensions(unsigned int in_width, unsigned int in_height,
-                                                                      unsigned int kernel_width, unsigned int kernel_height,
+std::pair<unsigned int, unsigned int> deconvolution_output_dimensions(unsigned int         in_width,
+                                                                      unsigned int         in_height,
+                                                                      unsigned int         kernel_width,
+                                                                      unsigned int         kernel_height,
                                                                       const PadStrideInfo &pad_stride_info);
 
 /** Returns expected width and height of output scaled tensor depending on dimensions rounding mode.
@@ -115,8 +121,10 @@ std::pair<unsigned int, unsigned int> deconvolution_output_dimensions(unsigned i
  *
  * @return A pair with the new width in the first position and the new height in the second.
  */
-std::pair<unsigned int, unsigned int> scaled_dimensions(int width, int height,
-                                                        int kernel_width, int kernel_height,
+std::pair<unsigned int, unsigned int> scaled_dimensions(int                  width,
+                                                        int                  height,
+                                                        int                  kernel_width,
+                                                        int                  kernel_height,
                                                         const PadStrideInfo &pad_stride_info,
                                                         const Size2D        &dilation = Size2D(1U, 1U));
 
@@ -130,9 +138,8 @@ std::pair<unsigned int, unsigned int> scaled_dimensions(int width, int height,
  *
  * @return A pair with the new width in the first position and the new height in the second, returned values can be < 1
  */
-std::pair<int, int> scaled_dimensions_signed(int width, int height,
-                                             int kernel_width, int kernel_height,
-                                             const PadStrideInfo &pad_stride_info);
+std::pair<int, int> scaled_dimensions_signed(
+    int width, int height, int kernel_width, int kernel_height, const PadStrideInfo &pad_stride_info);
 
 /** Returns calculated width, height and depth of output scaled tensor depending on dimensions rounding mode.
  *
@@ -147,8 +154,12 @@ std::pair<int, int> scaled_dimensions_signed(int width, int height,
  * @return A tuple with the new width in the first position, the new height in the second, and the new depth in the third.
  *         Returned values can be < 1
  */
-std::tuple<int, int, int> scaled_3d_dimensions_signed(int width, int height, int depth,
-                                                      int kernel_width, int kernel_height, int kernel_depth,
+std::tuple<int, int, int> scaled_3d_dimensions_signed(int                       width,
+                                                      int                       height,
+                                                      int                       depth,
+                                                      int                       kernel_width,
+                                                      int                       kernel_height,
+                                                      int                       kernel_depth,
                                                       const Pooling3dLayerInfo &pool3d_info);
 
 /** Check if the given reduction operation should be handled in a serial way.
@@ -178,7 +189,9 @@ QuantizationInfo get_softmax_output_quantization_info(DataType input_type, bool
  *
  * @return The pair with minimum and maximum values
  */
-std::pair<int32_t, int32_t> get_quantized_activation_min_max(const ActivationLayerInfo &act_info, DataType data_type, UniformQuantizationInfo oq_info);
+std::pair<int32_t, int32_t> get_quantized_activation_min_max(const ActivationLayerInfo &act_info,
+                                                             DataType                   data_type,
+                                                             UniformQuantizationInfo    oq_info);
 
 /** Convert a channel identity into a string.
  *
@@ -295,26 +308,27 @@ inline size_t num_of_elements_in_range(const float start, const float end, const
  * @param[in]  element_delim (Optional) Delimeter among the consecutive elements. Defaults to space delimeter
  */
 template <typename T>
-void print_consecutive_elements_impl(std::ostream &s, const T *ptr, unsigned int n, int stream_width = 0, const std::string &element_delim = " ")
+void print_consecutive_elements_impl(
+    std::ostream &s, const T *ptr, unsigned int n, int stream_width = 0, const std::string &element_delim = " ")
 {
     using print_type = typename std::conditional<std::is_floating_point<T>::value, T, int>::type;
     std::ios stream_status(nullptr);
     stream_status.copyfmt(s);
 
-    for(unsigned int i = 0; i < n; ++i)
+    for (unsigned int i = 0; i < n; ++i)
     {
         // Set stream width as it is not a "sticky" stream manipulator
-        if(stream_width != 0)
+        if (stream_width != 0)
         {
             s.width(stream_width);
         }
 
-        if(std::is_same<typename std::decay<T>::type, half>::value)
+        if (std::is_same<typename std::decay<T>::type, half>::value)
         {
             // We use T instead of print_type here is because the std::is_floating_point<half> returns false and then the print_type becomes int.
             s << std::right << static_cast<T>(ptr[i]) << element_delim;
         }
-        else if(std::is_same<typename std::decay<T>::type, bfloat16>::value)
+        else if (std::is_same<typename std::decay<T>::type, bfloat16>::value)
         {
             // We use T instead of print_type here is because the std::is_floating_point<bfloat16> returns false and then the print_type becomes int.
             s << std::right << float(ptr[i]) << element_delim;
@@ -343,17 +357,17 @@ int max_consecutive_elements_display_width_impl(std::ostream &s, const T *ptr, u
     using print_type = typename std::conditional<std::is_floating_point<T>::value, T, int>::type;
 
     int max_width = -1;
-    for(unsigned int i = 0; i < n; ++i)
+    for (unsigned int i = 0; i < n; ++i)
     {
         std::stringstream ss;
         ss.copyfmt(s);
 
-        if(std::is_same<typename std::decay<T>::type, half>::value)
+        if (std::is_same<typename std::decay<T>::type, half>::value)
         {
             // We use T instead of print_type here is because the std::is_floating_point<half> returns false and then the print_type becomes int.
             ss << static_cast<T>(ptr[i]);
         }
-        else if(std::is_same<typename std::decay<T>::type, bfloat16>::value)
+        else if (std::is_same<typename std::decay<T>::type, bfloat16>::value)
         {
             // We use T instead of print_type here is because the std::is_floating_point<bfloat> returns false and then the print_type becomes int.
             ss << float(ptr[i]);
@@ -377,7 +391,12 @@ int max_consecutive_elements_display_width_impl(std::ostream &s, const T *ptr, u
  * @param[in]  stream_width  (Optional) Width of the stream. If set to 0 the element's width is used. Defaults to 0.
  * @param[in]  element_delim (Optional) Delimeter among the consecutive elements. Defaults to space delimeter
  */
-void print_consecutive_elements(std::ostream &s, DataType dt, const uint8_t *ptr, unsigned int n, int stream_width, const std::string &element_delim = " ");
+void print_consecutive_elements(std::ostream      &s,
+                                DataType           dt,
+                                const uint8_t     *ptr,
+                                unsigned int       n,
+                                int                stream_width,
+                                const std::string &element_delim = " ");
 
 /** Identify the maximum width of n consecutive elements.
  *
@@ -390,5 +409,5 @@ void print_consecutive_elements(std::ostream &s, DataType dt, const uint8_t *ptr
  */
 int max_consecutive_elements_display_width(std::ostream &s, DataType dt, const uint8_t *ptr, unsigned int n);
 #endif /* ARM_COMPUTE_ASSERTS_ENABLED */
-}
+} // namespace arm_compute
 #endif /*ARM_COMPUTE_UTILS_H */
diff --git a/arm_compute/core/Validate.h b/arm_compute/core/Validate.h
index 5bffc16f3b..5550560aff 100644
--- a/arm_compute/core/Validate.h
+++ b/arm_compute/core/Validate.h
@@ -24,13 +24,13 @@
 #ifndef ARM_COMPUTE_VALIDATE_H
 #define ARM_COMPUTE_VALIDATE_H
 
-#include "arm_compute/core/utils/DataLayoutUtils.h"
-#include "arm_compute/core/utils/DataTypeUtils.h"
 #include "arm_compute/core/Error.h"
-#include "arm_compute/core/utils/FormatUtils.h"
 #include "arm_compute/core/IKernel.h"
 #include "arm_compute/core/ITensor.h"
 #include "arm_compute/core/QuantizationInfo.h"
+#include "arm_compute/core/utils/DataLayoutUtils.h"
+#include "arm_compute/core/utils/DataTypeUtils.h"
+#include "arm_compute/core/utils/FormatUtils.h"
 #include "arm_compute/core/Window.h"
 
 #include <algorithm>
@@ -50,9 +50,9 @@ namespace detail
 template <typename T>
 inline bool have_different_dimensions(const Dimensions<T> &dim1, const Dimensions<T> &dim2, unsigned int upper_dim)
 {
-    for(unsigned int i = upper_dim; i < arm_compute::Dimensions<T>::num_max_dimensions; ++i)
+    for (unsigned int i = upper_dim; i < arm_compute::Dimensions<T>::num_max_dimensions; ++i)
     {
-        if(dim1[i] != dim2[i])
+        if (dim1[i] != dim2[i])
         {
             return true;
         }
@@ -80,7 +80,7 @@ public:
      * @param[in] line     Source code line. Used for error reporting.
      */
     compare_dimension(const Dimensions<T> &dim, const char *function, const char *file, int line)
-        : _dim{ dim }, _function{ function }, _file{ file }, _line{ line }
+        : _dim{dim}, _function{function}, _file{file}, _line{line}
     {
     }
 
@@ -111,7 +111,7 @@ inline arm_compute::Status for_each_error(F &&)
 }
 
 template <typename F, typename T, typename... Ts>
-inline arm_compute::Status for_each_error(F &&func, T &&arg, Ts &&... args)
+inline arm_compute::Status for_each_error(F &&func, T &&arg, Ts &&...args)
 {
     ARM_COMPUTE_RETURN_ON_ERROR(func(arg));
     ARM_COMPUTE_RETURN_ON_ERROR(for_each_error(func, args...));
@@ -148,13 +148,11 @@ struct get_tensor_info_t<ITensorInfo *>
  * @return Status
  */
 template <typename... Ts>
-inline arm_compute::Status error_on_nullptr(const char *function, const char *file, const int line, Ts &&... pointers)
+inline arm_compute::Status error_on_nullptr(const char *function, const char *file, const int line, Ts &&...pointers)
 {
-    const std::array<const void *, sizeof...(Ts)> pointers_array{ { std::forward<Ts>(pointers)... } };
-    bool has_nullptr = std::any_of(pointers_array.begin(), pointers_array.end(), [&](const void *ptr)
-    {
-        return (ptr == nullptr);
-    });
+    const std::array<const void *, sizeof...(Ts)> pointers_array{{std::forward<Ts>(pointers)...}};
+    bool                                          has_nullptr =
+        std::any_of(pointers_array.begin(), pointers_array.end(), [&](const void *ptr) { return (ptr == nullptr); });
     ARM_COMPUTE_RETURN_ERROR_ON_LOC_MSG(has_nullptr, function, file, line, "Nullptr object!");
     return arm_compute::Status{};
 }
@@ -178,8 +176,8 @@ inline arm_compute::Status error_on_nullptr(const char *function, const char *fi
  *
  * @return Status
  */
-arm_compute::Status error_on_mismatching_windows(const char *function, const char *file, const int line,
-                                                 const Window &full, const Window &win);
+arm_compute::Status error_on_mismatching_windows(
+    const char *function, const char *file, const int line, const Window &full, const Window &win);
 #define ARM_COMPUTE_ERROR_ON_MISMATCHING_WINDOWS(f, w) \
     ARM_COMPUTE_ERROR_THROW_ON(::arm_compute::error_on_mismatching_windows(__func__, __FILE__, __LINE__, f, w))
 #define ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_WINDOWS(f, w) \
@@ -200,8 +198,8 @@ arm_compute::Status error_on_mismatching_windows(const char *function, const cha
  *
  * @return Status
  */
-arm_compute::Status error_on_invalid_subwindow(const char *function, const char *file, const int line,
-                                               const Window &full, const Window &sub);
+arm_compute::Status error_on_invalid_subwindow(
+    const char *function, const char *file, const int line, const Window &full, const Window &sub);
 #define ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(f, s) \
     ARM_COMPUTE_ERROR_THROW_ON(::arm_compute::error_on_invalid_subwindow(__func__, __FILE__, __LINE__, f, s))
 #define ARM_COMPUTE_RETURN_ERROR_ON_INVALID_SUBWINDOW(f, s) \
@@ -220,12 +218,14 @@ arm_compute::Status error_on_invalid_subwindow(const char *function, const char
  *
  * @return Status
  */
-arm_compute::Status error_on_window_not_collapsable_at_dimension(const char *function, const char *file, const int line,
-                                                                 const Window &full, const Window &window, const int dim);
+arm_compute::Status error_on_window_not_collapsable_at_dimension(
+    const char *function, const char *file, const int line, const Window &full, const Window &window, const int dim);
 #define ARM_COMPUTE_ERROR_ON_WINDOW_NOT_COLLAPSABLE_AT_DIMENSION(f, w, d) \
-    ARM_COMPUTE_ERROR_THROW_ON(::arm_compute::error_on_window_not_collapsable_at_dimension(__func__, __FILE__, __LINE__, f, w, d))
+    ARM_COMPUTE_ERROR_THROW_ON(                                           \
+        ::arm_compute::error_on_window_not_collapsable_at_dimension(__func__, __FILE__, __LINE__, f, w, d))
 #define ARM_COMPUTE_RETURN_ERROR_ON_WINDOW_NOT_COLLAPSABLE_AT_DIMENSION(f, w, d) \
-    ARM_COMPUTE_RETURN_ON_ERROR(::arm_compute::error_on_window_not_collapsable_at_dimension(__func__, __FILE__, __LINE__, f, w, d))
+    ARM_COMPUTE_RETURN_ON_ERROR(                                                 \
+        ::arm_compute::error_on_window_not_collapsable_at_dimension(__func__, __FILE__, __LINE__, f, w, d))
 
 /** Return an error if the passed coordinates have too many dimensions.
  *
@@ -239,8 +239,8 @@ arm_compute::Status error_on_window_not_collapsable_at_dimension(const char *fun
  *
  * @return Status
  */
-arm_compute::Status error_on_coordinates_dimensions_gte(const char *function, const char *file, const int line,
-                                                        const Coordinates &pos, unsigned int max_dim);
+arm_compute::Status error_on_coordinates_dimensions_gte(
+    const char *function, const char *file, const int line, const Coordinates &pos, unsigned int max_dim);
 #define ARM_COMPUTE_ERROR_ON_COORDINATES_DIMENSIONS_GTE(p, md) \
     ARM_COMPUTE_ERROR_THROW_ON(::arm_compute::error_on_coordinates_dimensions_gte(__func__, __FILE__, __LINE__, p, md))
 #define ARM_COMPUTE_RETURN_ERROR_ON_COORDINATES_DIMENSIONS_GTE(p, md) \
@@ -258,8 +258,8 @@ arm_compute::Status error_on_coordinates_dimensions_gte(const char *function, co
  *
  * @return Status
  */
-arm_compute::Status error_on_window_dimensions_gte(const char *function, const char *file, const int line,
-                                                   const Window &win, unsigned int max_dim);
+arm_compute::Status error_on_window_dimensions_gte(
+    const char *function, const char *file, const int line, const Window &win, unsigned int max_dim);
 #define ARM_COMPUTE_ERROR_ON_WINDOW_DIMENSIONS_GTE(w, md) \
     ARM_COMPUTE_ERROR_THROW_ON(::arm_compute::error_on_window_dimensions_gte(__func__, __FILE__, __LINE__, w, md))
 #define ARM_COMPUTE_RETURN_ERROR_ON_WINDOW_DIMENSIONS_GTE(w, md) \
@@ -277,16 +277,23 @@ arm_compute::Status error_on_window_dimensions_gte(const char *function, const c
  * @return Status
  */
 template <typename T, typename... Ts>
-arm_compute::Status error_on_mismatching_dimensions(const char *function, const char *file, int line,
-                                                    const Dimensions<T> &dim1, const Dimensions<T> &dim2, Ts &&... dims)
+arm_compute::Status error_on_mismatching_dimensions(const char          *function,
+                                                    const char          *file,
+                                                    int                  line,
+                                                    const Dimensions<T> &dim1,
+                                                    const Dimensions<T> &dim2,
+                                                    Ts &&...dims)
 {
-    ARM_COMPUTE_RETURN_ON_ERROR(detail::for_each_error(detail::compare_dimension<T>(dim1, function, file, line), dim2, std::forward<Ts>(dims)...));
+    ARM_COMPUTE_RETURN_ON_ERROR(detail::for_each_error(detail::compare_dimension<T>(dim1, function, file, line), dim2,
+                                                       std::forward<Ts>(dims)...));
     return arm_compute::Status{};
 }
 #define ARM_COMPUTE_ERROR_ON_MISMATCHING_DIMENSIONS(...) \
-    ARM_COMPUTE_ERROR_THROW_ON(::arm_compute::error_on_mismatching_dimensions(__func__, __FILE__, __LINE__, __VA_ARGS__))
+    ARM_COMPUTE_ERROR_THROW_ON(                          \
+        ::arm_compute::error_on_mismatching_dimensions(__func__, __FILE__, __LINE__, __VA_ARGS__))
 #define ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DIMENSIONS(...) \
-    ARM_COMPUTE_RETURN_ON_ERROR(::arm_compute::error_on_mismatching_dimensions(__func__, __FILE__, __LINE__, __VA_ARGS__))
+    ARM_COMPUTE_RETURN_ON_ERROR(                                \
+        ::arm_compute::error_on_mismatching_dimensions(__func__, __FILE__, __LINE__, __VA_ARGS__))
 
 /** Return true if the given format has horizontal subsampling.
  *
@@ -296,7 +303,10 @@ arm_compute::Status error_on_mismatching_dimensions(const char *function, const
  */
 inline bool has_format_horizontal_subsampling(Format format)
 {
-    return (format == Format::YUYV422 || format == Format::UYVY422 || format == Format::NV12 || format == Format::NV21 || format == Format::IYUV || format == Format::UV88) ? true : false;
+    return (format == Format::YUYV422 || format == Format::UYVY422 || format == Format::NV12 ||
+            format == Format::NV21 || format == Format::IYUV || format == Format::UV88)
+               ? true
+               : false;
 }
 
 /** Return true if the given format has vertical subsampling.
@@ -307,7 +317,9 @@ inline bool has_format_horizontal_subsampling(Format format)
  */
 inline bool has_format_vertical_subsampling(Format format)
 {
-    return (format == Format::NV12 || format == Format::NV21 || format == Format::IYUV || format == Format::UV88) ? true : false;
+    return (format == Format::NV12 || format == Format::NV21 || format == Format::IYUV || format == Format::UV88)
+               ? true
+               : false;
 }
 
 /** Adjust tensor shape size if width or height are odd for a given multi-planar format. No modification is done for other formats.
@@ -325,16 +337,16 @@ inline bool has_format_vertical_subsampling(Format format)
  */
 inline TensorShape adjust_odd_shape(const TensorShape &shape, Format format)
 {
-    TensorShape output{ shape };
+    TensorShape output{shape};
 
     // Force width to be even for formats which require subsampling of the U and V channels
-    if(has_format_horizontal_subsampling(format))
+    if (has_format_horizontal_subsampling(format))
     {
         output.set(0, (output.x() + 1) & ~1U);
     }
 
     // Force height to be even for formats which require subsampling of the U and V channels
-    if(has_format_vertical_subsampling(format))
+    if (has_format_vertical_subsampling(format))
     {
         output.set(1, (output.y() + 1) & ~1U);
     }
@@ -354,18 +366,20 @@ inline TensorShape adjust_odd_shape(const TensorShape &shape, Format format)
  * @return Status
  */
 template <typename... Ts>
-arm_compute::Status error_on_tensors_not_even(const char *function, const char *file, int line,
-                                              const Format &format, const ITensor *tensor1, Ts... tensors)
+arm_compute::Status error_on_tensors_not_even(
+    const char *function, const char *file, int line, const Format &format, const ITensor *tensor1, Ts... tensors)
 {
     ARM_COMPUTE_RETURN_ERROR_ON_LOC(tensor1 == nullptr, function, file, line);
     ARM_COMPUTE_RETURN_ON_ERROR(::arm_compute::error_on_nullptr(function, file, line, std::forward<Ts>(tensors)...));
-    const std::array < const ITensor *, 1 + sizeof...(Ts) > tensors_info_array{ { tensor1, std::forward<Ts>(tensors)... } };
-    ARM_COMPUTE_RETURN_ERROR_ON_LOC_MSG(std::any_of(tensors_info_array.cbegin(), tensors_info_array.cend(), [&](const ITensor * tensor)
-    {
-        const TensorShape correct_shape = adjust_odd_shape(tensor->info()->tensor_shape(), format);
-        return detail::have_different_dimensions(tensor->info()->tensor_shape(), correct_shape, 2);
-    }),
-    function, file, line, "Tensor shape has odd dimensions");
+    const std::array<const ITensor *, 1 + sizeof...(Ts)> tensors_info_array{{tensor1, std::forward<Ts>(tensors)...}};
+    ARM_COMPUTE_RETURN_ERROR_ON_LOC_MSG(
+        std::any_of(tensors_info_array.cbegin(), tensors_info_array.cend(),
+                    [&](const ITensor *tensor)
+                    {
+                        const TensorShape correct_shape = adjust_odd_shape(tensor->info()->tensor_shape(), format);
+                        return detail::have_different_dimensions(tensor->info()->tensor_shape(), correct_shape, 2);
+                    }),
+        function, file, line, "Tensor shape has odd dimensions");
     return arm_compute::Status{};
 }
 
@@ -382,21 +396,22 @@ arm_compute::Status error_on_tensors_not_even(const char *function, const char *
  *
  * @return The subsampled tensor shape.
  */
-inline TensorShape calculate_subsampled_shape(const TensorShape &shape, Format format, Channel channel = Channel::UNKNOWN)
+inline TensorShape
+calculate_subsampled_shape(const TensorShape &shape, Format format, Channel channel = Channel::UNKNOWN)
 {
-    TensorShape output{ shape };
+    TensorShape output{shape};
 
     // Subsample shape only for U or V channel
-    if(Channel::U == channel || Channel::V == channel || Channel::UNKNOWN == channel)
+    if (Channel::U == channel || Channel::V == channel || Channel::UNKNOWN == channel)
     {
         // Subsample width for the tensor shape when channel is U or V
-        if(has_format_horizontal_subsampling(format))
+        if (has_format_horizontal_subsampling(format))
         {
             output.set(0, output.x() / 2U);
         }
 
         // Subsample height for the tensor shape when channel is U or V
-        if(has_format_vertical_subsampling(format))
+        if (has_format_vertical_subsampling(format))
         {
             output.set(1, output.y() / 2U);
         }
@@ -418,25 +433,32 @@ inline TensorShape calculate_subsampled_shape(const TensorShape &shape, Format f
  * @return Status
  */
 template <typename... Ts>
-arm_compute::Status error_on_tensors_not_subsampled(const char *function, const char *file, int line,
-                                                    const Format &format, const TensorShape &shape, const ITensor *tensor1, Ts... tensors)
+arm_compute::Status error_on_tensors_not_subsampled(const char        *function,
+                                                    const char        *file,
+                                                    int                line,
+                                                    const Format      &format,
+                                                    const TensorShape &shape,
+                                                    const ITensor     *tensor1,
+                                                    Ts... tensors)
 {
     ARM_COMPUTE_RETURN_ERROR_ON_LOC(tensor1 == nullptr, function, file, line);
     ARM_COMPUTE_RETURN_ON_ERROR(::arm_compute::error_on_nullptr(function, file, line, std::forward<Ts>(tensors)...));
-    const TensorShape sub2_shape = calculate_subsampled_shape(shape, format);
-    const std::array < const ITensor *, 1 + sizeof...(Ts) > tensors_info_array{ { tensor1, std::forward<Ts>(tensors)... } };
-    ARM_COMPUTE_RETURN_ERROR_ON_LOC_MSG(std::any_of(tensors_info_array.cbegin(), tensors_info_array.cend(), [&](const ITensor * tensor)
-    {
-        return detail::have_different_dimensions(tensor->info()->tensor_shape(), sub2_shape, 2);
-    }),
-    function, file, line, "Tensor shape has mismatch dimensions for sub-sampling");
+    const TensorShape                                    sub2_shape = calculate_subsampled_shape(shape, format);
+    const std::array<const ITensor *, 1 + sizeof...(Ts)> tensors_info_array{{tensor1, std::forward<Ts>(tensors)...}};
+    ARM_COMPUTE_RETURN_ERROR_ON_LOC_MSG(
+        std::any_of(tensors_info_array.cbegin(), tensors_info_array.cend(),
+                    [&](const ITensor *tensor)
+                    { return detail::have_different_dimensions(tensor->info()->tensor_shape(), sub2_shape, 2); }),
+        function, file, line, "Tensor shape has mismatch dimensions for sub-sampling");
     return arm_compute::Status{};
 }
 
 #define ARM_COMPUTE_ERROR_ON_TENSORS_NOT_SUBSAMPLED(...) \
-    ARM_COMPUTE_ERROR_THROW_ON(::arm_compute::error_on_tensors_not_subsampled(__func__, __FILE__, __LINE__, __VA_ARGS__))
+    ARM_COMPUTE_ERROR_THROW_ON(                          \
+        ::arm_compute::error_on_tensors_not_subsampled(__func__, __FILE__, __LINE__, __VA_ARGS__))
 #define ARM_COMPUTE_RETURN_ERROR_ON_TENSORS_NOT_SUBSAMPLED(...) \
-    ARM_COMPUTE_RETURN_ON_ERROR(::arm_compute::error_on_tensors_not_subsampled(__func__, __FILE__, __LINE__, __VA_ARGS__))
+    ARM_COMPUTE_RETURN_ON_ERROR(                                \
+        ::arm_compute::error_on_tensors_not_subsampled(__func__, __FILE__, __LINE__, __VA_ARGS__))
 
 /** Return an error if the passed two tensor infos have different shapes from the given dimension
  *
@@ -450,10 +472,15 @@ arm_compute::Status error_on_tensors_not_subsampled(const char *function, const
  * @return Status
  */
 template <typename... Ts>
-inline arm_compute::Status error_on_mismatching_shapes(const char *function, const char *file, const int line,
-                                                       const ITensorInfo *tensor_info_1, const ITensorInfo *tensor_info_2, Ts... tensor_infos)
+inline arm_compute::Status error_on_mismatching_shapes(const char        *function,
+                                                       const char        *file,
+                                                       const int          line,
+                                                       const ITensorInfo *tensor_info_1,
+                                                       const ITensorInfo *tensor_info_2,
+                                                       Ts... tensor_infos)
 {
-    return error_on_mismatching_shapes(function, file, line, 0U, tensor_info_1, tensor_info_2, std::forward<Ts>(tensor_infos)...);
+    return error_on_mismatching_shapes(function, file, line, 0U, tensor_info_1, tensor_info_2,
+                                       std::forward<Ts>(tensor_infos)...);
 }
 /** Return an error if the passed two tensors have different shapes from the given dimension
  *
@@ -467,8 +494,12 @@ inline arm_compute::Status error_on_mismatching_shapes(const char *function, con
  * @return Status
  */
 template <typename... Ts>
-inline arm_compute::Status error_on_mismatching_shapes(const char *function, const char *file, const int line,
-                                                       const ITensor *tensor_1, const ITensor *tensor_2, Ts... tensors)
+inline arm_compute::Status error_on_mismatching_shapes(const char    *function,
+                                                       const char    *file,
+                                                       const int      line,
+                                                       const ITensor *tensor_1,
+                                                       const ITensor *tensor_2,
+                                                       Ts... tensors)
 {
     return error_on_mismatching_shapes(function, file, line, 0U, tensor_1, tensor_2, std::forward<Ts>(tensors)...);
 }
@@ -485,19 +516,28 @@ inline arm_compute::Status error_on_mismatching_shapes(const char *function, con
  * @return Status
  */
 template <typename... Ts>
-inline arm_compute::Status error_on_mismatching_shapes(const char *function, const char *file, const int line,
-                                                       unsigned int upper_dim, const ITensorInfo *tensor_info_1, const ITensorInfo *tensor_info_2, Ts... tensor_infos)
+inline arm_compute::Status error_on_mismatching_shapes(const char        *function,
+                                                       const char        *file,
+                                                       const int          line,
+                                                       unsigned int       upper_dim,
+                                                       const ITensorInfo *tensor_info_1,
+                                                       const ITensorInfo *tensor_info_2,
+                                                       Ts... tensor_infos)
 {
     ARM_COMPUTE_RETURN_ERROR_ON_LOC(tensor_info_1 == nullptr, function, file, line);
     ARM_COMPUTE_RETURN_ERROR_ON_LOC(tensor_info_2 == nullptr, function, file, line);
     ARM_COMPUTE_RETURN_ON_ERROR(::arm_compute::error_on_nullptr(function, file, line, tensor_infos...));
 
-    const std::array < const ITensorInfo *, 2 + sizeof...(Ts) > tensors_info_array{ { tensor_info_1, tensor_info_2, tensor_infos... } };
-    ARM_COMPUTE_RETURN_ERROR_ON_LOC_MSG(std::any_of(std::next(tensors_info_array.cbegin()), tensors_info_array.cend(), [&](const ITensorInfo * tensor_info)
-    {
-        return detail::have_different_dimensions((*tensors_info_array.cbegin())->tensor_shape(), tensor_info->tensor_shape(), upper_dim);
-    }),
-    function, file, line, "Tensors have different shapes");
+    const std::array<const ITensorInfo *, 2 + sizeof...(Ts)> tensors_info_array{
+        {tensor_info_1, tensor_info_2, tensor_infos...}};
+    ARM_COMPUTE_RETURN_ERROR_ON_LOC_MSG(std::any_of(std::next(tensors_info_array.cbegin()), tensors_info_array.cend(),
+                                                    [&](const ITensorInfo *tensor_info)
+                                                    {
+                                                        return detail::have_different_dimensions(
+                                                            (*tensors_info_array.cbegin())->tensor_shape(),
+                                                            tensor_info->tensor_shape(), upper_dim);
+                                                    }),
+                                        function, file, line, "Tensors have different shapes");
     return arm_compute::Status{};
 }
 /** Return an error if the passed two tensors have different shapes from the given dimension
@@ -513,14 +553,20 @@ inline arm_compute::Status error_on_mismatching_shapes(const char *function, con
  * @return Status
  */
 template <typename... Ts>
-inline arm_compute::Status error_on_mismatching_shapes(const char *function, const char *file, const int line,
-                                                       unsigned int upper_dim, const ITensor *tensor_1, const ITensor *tensor_2, Ts... tensors)
+inline arm_compute::Status error_on_mismatching_shapes(const char    *function,
+                                                       const char    *file,
+                                                       const int      line,
+                                                       unsigned int   upper_dim,
+                                                       const ITensor *tensor_1,
+                                                       const ITensor *tensor_2,
+                                                       Ts... tensors)
 {
     ARM_COMPUTE_RETURN_ERROR_ON_LOC(tensor_1 == nullptr, function, file, line);
     ARM_COMPUTE_RETURN_ERROR_ON_LOC(tensor_2 == nullptr, function, file, line);
     ARM_COMPUTE_RETURN_ON_ERROR(::arm_compute::error_on_nullptr(function, file, line, tensors...));
-    ARM_COMPUTE_RETURN_ON_ERROR(::arm_compute::error_on_mismatching_shapes(function, file, line, upper_dim, tensor_1->info(), tensor_2->info(),
-                                                                           detail::get_tensor_info_t<ITensorInfo *>()(tensors)...));
+    ARM_COMPUTE_RETURN_ON_ERROR(
+        ::arm_compute::error_on_mismatching_shapes(function, file, line, upper_dim, tensor_1->info(), tensor_2->info(),
+                                                   detail::get_tensor_info_t<ITensorInfo *>()(tensors)...));
     return arm_compute::Status{};
 }
 #define ARM_COMPUTE_ERROR_ON_MISMATCHING_SHAPES(...) \
@@ -539,19 +585,18 @@ inline arm_compute::Status error_on_mismatching_shapes(const char *function, con
  * @return Status
  */
 template <typename... Ts>
-inline arm_compute::Status error_on_mismatching_data_layouts(const char *function, const char *file, const int line,
-                                                             const ITensorInfo *tensor_info, Ts... tensor_infos)
+inline arm_compute::Status error_on_mismatching_data_layouts(
+    const char *function, const char *file, const int line, const ITensorInfo *tensor_info, Ts... tensor_infos)
 {
     ARM_COMPUTE_RETURN_ERROR_ON_LOC(tensor_info == nullptr, function, file, line);
     ARM_COMPUTE_RETURN_ON_ERROR(::arm_compute::error_on_nullptr(function, file, line, tensor_infos...));
 
-    DataLayout &&tensor_data_layout = tensor_info->data_layout();
-    const std::array<const ITensorInfo *, sizeof...(Ts)> tensors_infos_array{ { tensor_infos... } };
-    ARM_COMPUTE_RETURN_ERROR_ON_LOC_MSG(std::any_of(tensors_infos_array.begin(), tensors_infos_array.end(), [&](const ITensorInfo * tensor_info_obj)
-    {
-        return tensor_info_obj->data_layout() != tensor_data_layout;
-    }),
-    function, file, line, "Tensors have different data layouts");
+    DataLayout                                         &&tensor_data_layout = tensor_info->data_layout();
+    const std::array<const ITensorInfo *, sizeof...(Ts)> tensors_infos_array{{tensor_infos...}};
+    ARM_COMPUTE_RETURN_ERROR_ON_LOC_MSG(std::any_of(tensors_infos_array.begin(), tensors_infos_array.end(),
+                                                    [&](const ITensorInfo *tensor_info_obj)
+                                                    { return tensor_info_obj->data_layout() != tensor_data_layout; }),
+                                        function, file, line, "Tensors have different data layouts");
     return arm_compute::Status{};
 }
 /** Return an error if the passed tensors have different data layouts
@@ -565,19 +610,21 @@ inline arm_compute::Status error_on_mismatching_data_layouts(const char *functio
  * @return Status
  */
 template <typename... Ts>
-inline arm_compute::Status error_on_mismatching_data_layouts(const char *function, const char *file, const int line,
-                                                             const ITensor *tensor, Ts... tensors)
+inline arm_compute::Status error_on_mismatching_data_layouts(
+    const char *function, const char *file, const int line, const ITensor *tensor, Ts... tensors)
 {
     ARM_COMPUTE_RETURN_ERROR_ON_LOC(tensor == nullptr, function, file, line);
     ARM_COMPUTE_RETURN_ON_ERROR(::arm_compute::error_on_nullptr(function, file, line, std::forward<Ts>(tensors)...));
-    ARM_COMPUTE_RETURN_ON_ERROR(::arm_compute::error_on_mismatching_data_layouts(function, file, line, tensor->info(),
-                                                                                 detail::get_tensor_info_t<ITensorInfo *>()(tensors)...));
+    ARM_COMPUTE_RETURN_ON_ERROR(::arm_compute::error_on_mismatching_data_layouts(
+        function, file, line, tensor->info(), detail::get_tensor_info_t<ITensorInfo *>()(tensors)...));
     return arm_compute::Status{};
 }
 #define ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_LAYOUT(...) \
-    ARM_COMPUTE_ERROR_THROW_ON(::arm_compute::error_on_mismatching_data_layouts(__func__, __FILE__, __LINE__, __VA_ARGS__))
+    ARM_COMPUTE_ERROR_THROW_ON(                           \
+        ::arm_compute::error_on_mismatching_data_layouts(__func__, __FILE__, __LINE__, __VA_ARGS__))
 #define ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_LAYOUT(...) \
-    ARM_COMPUTE_RETURN_ON_ERROR(::arm_compute::error_on_mismatching_data_layouts(__func__, __FILE__, __LINE__, __VA_ARGS__))
+    ARM_COMPUTE_RETURN_ON_ERROR(                                 \
+        ::arm_compute::error_on_mismatching_data_layouts(__func__, __FILE__, __LINE__, __VA_ARGS__))
 
 /** Return an error if the passed two tensor infos have different data types
  *
@@ -590,19 +637,18 @@ inline arm_compute::Status error_on_mismatching_data_layouts(const char *functio
  * @return Status
  */
 template <typename... Ts>
-inline arm_compute::Status error_on_mismatching_data_types(const char *function, const char *file, const int line,
-                                                           const ITensorInfo *tensor_info, Ts... tensor_infos)
+inline arm_compute::Status error_on_mismatching_data_types(
+    const char *function, const char *file, const int line, const ITensorInfo *tensor_info, Ts... tensor_infos)
 {
     ARM_COMPUTE_RETURN_ERROR_ON_LOC(tensor_info == nullptr, function, file, line);
     ARM_COMPUTE_RETURN_ON_ERROR(::arm_compute::error_on_nullptr(function, file, line, tensor_infos...));
 
-    DataType &&tensor_data_type = tensor_info->data_type();
-    const std::array<const ITensorInfo *, sizeof...(Ts)> tensors_infos_array{ { tensor_infos... } };
-    ARM_COMPUTE_RETURN_ERROR_ON_LOC_MSG(std::any_of(tensors_infos_array.begin(), tensors_infos_array.end(), [&](const ITensorInfo * tensor_info_obj)
-    {
-        return tensor_info_obj->data_type() != tensor_data_type;
-    }),
-    function, file, line, "Tensors have different data types");
+    DataType                                           &&tensor_data_type = tensor_info->data_type();
+    const std::array<const ITensorInfo *, sizeof...(Ts)> tensors_infos_array{{tensor_infos...}};
+    ARM_COMPUTE_RETURN_ERROR_ON_LOC_MSG(std::any_of(tensors_infos_array.begin(), tensors_infos_array.end(),
+                                                    [&](const ITensorInfo *tensor_info_obj)
+                                                    { return tensor_info_obj->data_type() != tensor_data_type; }),
+                                        function, file, line, "Tensors have different data types");
     return arm_compute::Status{};
 }
 /** Return an error if the passed two tensors have different data types
@@ -616,19 +662,21 @@ inline arm_compute::Status error_on_mismatching_data_types(const char *function,
  * @return Status
  */
 template <typename... Ts>
-inline arm_compute::Status error_on_mismatching_data_types(const char *function, const char *file, const int line,
-                                                           const ITensor *tensor, Ts... tensors)
+inline arm_compute::Status error_on_mismatching_data_types(
+    const char *function, const char *file, const int line, const ITensor *tensor, Ts... tensors)
 {
     ARM_COMPUTE_RETURN_ERROR_ON_LOC(tensor == nullptr, function, file, line);
     ARM_COMPUTE_RETURN_ON_ERROR(::arm_compute::error_on_nullptr(function, file, line, tensors...));
-    ARM_COMPUTE_RETURN_ON_ERROR(::arm_compute::error_on_mismatching_data_types(function, file, line, tensor->info(),
-                                                                               detail::get_tensor_info_t<ITensorInfo *>()(tensors)...));
+    ARM_COMPUTE_RETURN_ON_ERROR(::arm_compute::error_on_mismatching_data_types(
+        function, file, line, tensor->info(), detail::get_tensor_info_t<ITensorInfo *>()(tensors)...));
     return arm_compute::Status{};
 }
 #define ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(...) \
-    ARM_COMPUTE_ERROR_THROW_ON(::arm_compute::error_on_mismatching_data_types(__func__, __FILE__, __LINE__, __VA_ARGS__))
+    ARM_COMPUTE_ERROR_THROW_ON(                          \
+        ::arm_compute::error_on_mismatching_data_types(__func__, __FILE__, __LINE__, __VA_ARGS__))
 #define ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(...) \
-    ARM_COMPUTE_RETURN_ON_ERROR(::arm_compute::error_on_mismatching_data_types(__func__, __FILE__, __LINE__, __VA_ARGS__))
+    ARM_COMPUTE_RETURN_ON_ERROR(                                \
+        ::arm_compute::error_on_mismatching_data_types(__func__, __FILE__, __LINE__, __VA_ARGS__))
 
 /** Return an error if the passed tensor infos have different asymmetric quantized data types or different quantization info
  *
@@ -644,28 +692,32 @@ inline arm_compute::Status error_on_mismatching_data_types(const char *function,
  * @return Status
  */
 template <typename... Ts>
-inline arm_compute::Status error_on_mismatching_quantization_info(const char *function, const char *file, const int line,
-                                                                  const ITensorInfo *tensor_info_1, const ITensorInfo *tensor_info_2, Ts... tensor_infos)
+inline arm_compute::Status error_on_mismatching_quantization_info(const char        *function,
+                                                                  const char        *file,
+                                                                  const int          line,
+                                                                  const ITensorInfo *tensor_info_1,
+                                                                  const ITensorInfo *tensor_info_2,
+                                                                  Ts... tensor_infos)
 {
     DataType             &&first_data_type         = tensor_info_1->data_type();
     const QuantizationInfo first_quantization_info = tensor_info_1->quantization_info();
 
-    if(!is_data_type_quantized(first_data_type))
+    if (!is_data_type_quantized(first_data_type))
     {
         return arm_compute::Status{};
     }
 
-    const std::array < const ITensorInfo *, 1 + sizeof...(Ts) > tensor_infos_array{ { tensor_info_2, std::forward<Ts>(tensor_infos)... } };
-    ARM_COMPUTE_RETURN_ERROR_ON_LOC_MSG(std::any_of(tensor_infos_array.begin(), tensor_infos_array.end(), [&](const ITensorInfo * tensor_info)
-    {
-        return tensor_info->data_type() != first_data_type;
-    }),
-    function, file, line, "Tensors have different asymmetric quantized data types");
-    ARM_COMPUTE_RETURN_ERROR_ON_LOC_MSG(std::any_of(tensor_infos_array.begin(), tensor_infos_array.end(), [&](const ITensorInfo * tensor_info)
-    {
-        return tensor_info->quantization_info() != first_quantization_info;
-    }),
-    function, file, line, "Tensors have different quantization information");
+    const std::array<const ITensorInfo *, 1 + sizeof...(Ts)> tensor_infos_array{
+        {tensor_info_2, std::forward<Ts>(tensor_infos)...}};
+    ARM_COMPUTE_RETURN_ERROR_ON_LOC_MSG(std::any_of(tensor_infos_array.begin(), tensor_infos_array.end(),
+                                                    [&](const ITensorInfo *tensor_info)
+                                                    { return tensor_info->data_type() != first_data_type; }),
+                                        function, file, line, "Tensors have different asymmetric quantized data types");
+    ARM_COMPUTE_RETURN_ERROR_ON_LOC_MSG(
+        std::any_of(tensor_infos_array.begin(), tensor_infos_array.end(),
+                    [&](const ITensorInfo *tensor_info)
+                    { return tensor_info->quantization_info() != first_quantization_info; }),
+        function, file, line, "Tensors have different quantization information");
 
     return arm_compute::Status{};
 }
@@ -683,17 +735,24 @@ inline arm_compute::Status error_on_mismatching_quantization_info(const char *fu
  * @return Status
  */
 template <typename... Ts>
-inline arm_compute::Status error_on_mismatching_quantization_info(const char *function, const char *file, const int line,
-                                                                  const ITensor *tensor_1, const ITensor *tensor_2, Ts... tensors)
+inline arm_compute::Status error_on_mismatching_quantization_info(const char    *function,
+                                                                  const char    *file,
+                                                                  const int      line,
+                                                                  const ITensor *tensor_1,
+                                                                  const ITensor *tensor_2,
+                                                                  Ts... tensors)
 {
-    ARM_COMPUTE_RETURN_ON_ERROR(::arm_compute::error_on_mismatching_quantization_info(function, file, line, tensor_1->info(), tensor_2->info(),
-                                                                                      detail::get_tensor_info_t<ITensorInfo *>()(tensors)...));
+    ARM_COMPUTE_RETURN_ON_ERROR(
+        ::arm_compute::error_on_mismatching_quantization_info(function, file, line, tensor_1->info(), tensor_2->info(),
+                                                              detail::get_tensor_info_t<ITensorInfo *>()(tensors)...));
     return arm_compute::Status{};
 }
 #define ARM_COMPUTE_ERROR_ON_MISMATCHING_QUANTIZATION_INFO(...) \
-    ARM_COMPUTE_ERROR_THROW_ON(::arm_compute::error_on_mismatching_quantization_info(__func__, __FILE__, __LINE__, __VA_ARGS__))
+    ARM_COMPUTE_ERROR_THROW_ON(                                 \
+        ::arm_compute::error_on_mismatching_quantization_info(__func__, __FILE__, __LINE__, __VA_ARGS__))
 #define ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_QUANTIZATION_INFO(...) \
-    ARM_COMPUTE_RETURN_ON_ERROR(::arm_compute::error_on_mismatching_quantization_info(__func__, __FILE__, __LINE__, __VA_ARGS__))
+    ARM_COMPUTE_RETURN_ON_ERROR(                                       \
+        ::arm_compute::error_on_mismatching_quantization_info(__func__, __FILE__, __LINE__, __VA_ARGS__))
 
 /** Throw an error if the format of the passed tensor/multi-image does not match any of the formats provided.
  *
@@ -705,8 +764,8 @@ inline arm_compute::Status error_on_mismatching_quantization_info(const char *fu
  * @param[in] formats  (Optional) Further allowed formats.
  */
 template <typename T, typename F, typename... Fs>
-void error_on_format_not_in(const char *function, const char *file, const int line,
-                            const T *object, F &&format, Fs &&... formats)
+void error_on_format_not_in(
+    const char *function, const char *file, const int line, const T *object, F &&format, Fs &&...formats)
 {
     ARM_COMPUTE_ERROR_ON_LOC(object == nullptr, function, file, line);
 
@@ -715,17 +774,17 @@ void error_on_format_not_in(const char *function, const char *file, const int li
 
     ARM_COMPUTE_ERROR_ON_LOC(object_format == Format::UNKNOWN, function, file, line);
 
-    const std::array<F, sizeof...(Fs)> formats_array{ { std::forward<Fs>(formats)... } };
+    const std::array<F, sizeof...(Fs)> formats_array{{std::forward<Fs>(formats)...}};
     ARM_COMPUTE_UNUSED(formats_array);
 
-    ARM_COMPUTE_ERROR_ON_LOC_MSG(object_format != format && std::none_of(formats_array.begin(), formats_array.end(), [&](const F & f)
-    {
-        return f == object_format;
-    }),
-    function, file, line, "Format %s not supported by this kernel", string_from_format(object_format).c_str());
+    ARM_COMPUTE_ERROR_ON_LOC_MSG(
+        object_format != format &&
+            std::none_of(formats_array.begin(), formats_array.end(), [&](const F &f) { return f == object_format; }),
+        function, file, line, "Format %s not supported by this kernel", string_from_format(object_format).c_str());
     ARM_COMPUTE_UNUSED(function, format, file, line);
 }
-#define ARM_COMPUTE_ERROR_ON_FORMAT_NOT_IN(t, ...) ::arm_compute::error_on_format_not_in(__func__, __FILE__, __LINE__, t, __VA_ARGS__)
+#define ARM_COMPUTE_ERROR_ON_FORMAT_NOT_IN(t, ...) \
+    ::arm_compute::error_on_format_not_in(__func__, __FILE__, __LINE__, t, __VA_ARGS__)
 
 /** Return an error if the data type of the passed tensor info does not match any of the data types provided.
  *
@@ -739,20 +798,19 @@ void error_on_format_not_in(const char *function, const char *file, const int li
  * @return Status
  */
 template <typename T, typename... Ts>
-inline arm_compute::Status error_on_data_type_not_in(const char *function, const char *file, const int line,
-                                                     const ITensorInfo *tensor_info, T &&dt, Ts &&... dts)
+inline arm_compute::Status error_on_data_type_not_in(
+    const char *function, const char *file, const int line, const ITensorInfo *tensor_info, T &&dt, Ts &&...dts)
 {
     ARM_COMPUTE_RETURN_ERROR_ON_LOC(tensor_info == nullptr, function, file, line);
 
     const DataType &tensor_dt = tensor_info->data_type(); //NOLINT
     ARM_COMPUTE_RETURN_ERROR_ON_LOC(tensor_dt == DataType::UNKNOWN, function, file, line);
 
-    const std::array<T, sizeof...(Ts)> dts_array{ { std::forward<Ts>(dts)... } };
-    ARM_COMPUTE_RETURN_ERROR_ON_LOC_MSG_VAR(tensor_dt != dt && std::none_of(dts_array.begin(), dts_array.end(), [&](const T & d)
-    {
-        return d == tensor_dt;
-    }),
-    function, file, line, "ITensor data type %s not supported by this kernel", string_from_data_type(tensor_dt).c_str());
+    const std::array<T, sizeof...(Ts)> dts_array{{std::forward<Ts>(dts)...}};
+    ARM_COMPUTE_RETURN_ERROR_ON_LOC_MSG_VAR(
+        tensor_dt != dt && std::none_of(dts_array.begin(), dts_array.end(), [&](const T &d) { return d == tensor_dt; }),
+        function, file, line, "ITensor data type %s not supported by this kernel",
+        string_from_data_type(tensor_dt).c_str());
     return arm_compute::Status{};
 }
 /** Return an error if the data type of the passed tensor does not match any of the data types provided.
@@ -767,11 +825,12 @@ inline arm_compute::Status error_on_data_type_not_in(const char *function, const
  * @return Status
  */
 template <typename T, typename... Ts>
-inline arm_compute::Status error_on_data_type_not_in(const char *function, const char *file, const int line,
-                                                     const ITensor *tensor, T &&dt, Ts &&... dts)
+inline arm_compute::Status error_on_data_type_not_in(
+    const char *function, const char *file, const int line, const ITensor *tensor, T &&dt, Ts &&...dts)
 {
     ARM_COMPUTE_RETURN_ERROR_ON_LOC(tensor == nullptr, function, file, line);
-    ARM_COMPUTE_RETURN_ON_ERROR(::arm_compute::error_on_data_type_not_in(function, file, line, tensor->info(), std::forward<T>(dt), std::forward<Ts>(dts)...));
+    ARM_COMPUTE_RETURN_ON_ERROR(::arm_compute::error_on_data_type_not_in(
+        function, file, line, tensor->info(), std::forward<T>(dt), std::forward<Ts>(dts)...));
     return arm_compute::Status{};
 }
 #define ARM_COMPUTE_ERROR_ON_DATA_TYPE_NOT_IN(t, ...) \
@@ -791,20 +850,19 @@ inline arm_compute::Status error_on_data_type_not_in(const char *function, const
  * @return Status
  */
 template <typename T, typename... Ts>
-inline arm_compute::Status error_on_data_layout_not_in(const char *function, const char *file, const int line,
-                                                       const ITensorInfo *tensor_info, T &&dl, Ts &&... dls)
+inline arm_compute::Status error_on_data_layout_not_in(
+    const char *function, const char *file, const int line, const ITensorInfo *tensor_info, T &&dl, Ts &&...dls)
 {
     ARM_COMPUTE_RETURN_ERROR_ON_LOC(tensor_info == nullptr, function, file, line);
 
     const DataLayout &tensor_dl = tensor_info->data_layout(); //NOLINT
     ARM_COMPUTE_RETURN_ERROR_ON_LOC(tensor_dl == DataLayout::UNKNOWN, function, file, line);
 
-    const std::array<T, sizeof...(Ts)> dls_array{ { std::forward<Ts>(dls)... } };
-    ARM_COMPUTE_RETURN_ERROR_ON_LOC_MSG_VAR(tensor_dl != dl && std::none_of(dls_array.begin(), dls_array.end(), [&](const T & l)
-    {
-        return l == tensor_dl;
-    }),
-    function, file, line, "ITensor data layout %s not supported by this kernel", string_from_data_layout(tensor_dl).c_str());
+    const std::array<T, sizeof...(Ts)> dls_array{{std::forward<Ts>(dls)...}};
+    ARM_COMPUTE_RETURN_ERROR_ON_LOC_MSG_VAR(
+        tensor_dl != dl && std::none_of(dls_array.begin(), dls_array.end(), [&](const T &l) { return l == tensor_dl; }),
+        function, file, line, "ITensor data layout %s not supported by this kernel",
+        string_from_data_layout(tensor_dl).c_str());
     return arm_compute::Status{};
 }
 /** Return an error if the data layout of the passed tensor does not match any of the data layout provided.
@@ -819,17 +877,19 @@ inline arm_compute::Status error_on_data_layout_not_in(const char *function, con
  * @return Status
  */
 template <typename T, typename... Ts>
-inline arm_compute::Status error_on_data_layout_not_in(const char *function, const char *file, const int line,
-                                                       const ITensor *tensor, T &&dl, Ts &&... dls)
+inline arm_compute::Status error_on_data_layout_not_in(
+    const char *function, const char *file, const int line, const ITensor *tensor, T &&dl, Ts &&...dls)
 {
     ARM_COMPUTE_RETURN_ERROR_ON_LOC(tensor == nullptr, function, file, line);
-    ARM_COMPUTE_RETURN_ON_ERROR(::arm_compute::error_on_data_layout_not_in(function, file, line, tensor->info(), std::forward<T>(dl), std::forward<Ts>(dls)...));
+    ARM_COMPUTE_RETURN_ON_ERROR(::arm_compute::error_on_data_layout_not_in(
+        function, file, line, tensor->info(), std::forward<T>(dl), std::forward<Ts>(dls)...));
     return arm_compute::Status{};
 }
 #define ARM_COMPUTE_ERROR_ON_DATA_LAYOUT_NOT_IN(t, ...) \
     ARM_COMPUTE_ERROR_THROW_ON(::arm_compute::error_on_data_layout_not_in(__func__, __FILE__, __LINE__, t, __VA_ARGS__))
 #define ARM_COMPUTE_RETURN_ERROR_ON_DATA_LAYOUT_NOT_IN(t, ...) \
-    ARM_COMPUTE_RETURN_ON_ERROR(::arm_compute::error_on_data_layout_not_in(__func__, __FILE__, __LINE__, t, __VA_ARGS__))
+    ARM_COMPUTE_RETURN_ON_ERROR(                               \
+        ::arm_compute::error_on_data_layout_not_in(__func__, __FILE__, __LINE__, t, __VA_ARGS__))
 
 /** Return an error if the data type or the number of channels of the passed tensor info does not match any of the data types and number of channels provided.
  *
@@ -844,12 +904,20 @@ inline arm_compute::Status error_on_data_layout_not_in(const char *function, con
  * @return Status
  */
 template <typename T, typename... Ts>
-inline arm_compute::Status error_on_data_type_channel_not_in(const char *function, const char *file, const int line,
-                                                             const ITensorInfo *tensor_info, size_t num_channels, T &&dt, Ts &&... dts)
+inline arm_compute::Status error_on_data_type_channel_not_in(const char        *function,
+                                                             const char        *file,
+                                                             const int          line,
+                                                             const ITensorInfo *tensor_info,
+                                                             size_t             num_channels,
+                                                             T                &&dt,
+                                                             Ts &&...dts)
 {
-    ARM_COMPUTE_RETURN_ON_ERROR(::arm_compute::error_on_data_type_not_in(function, file, line, tensor_info, std::forward<T>(dt), std::forward<Ts>(dts)...));
+    ARM_COMPUTE_RETURN_ON_ERROR(::arm_compute::error_on_data_type_not_in(
+        function, file, line, tensor_info, std::forward<T>(dt), std::forward<Ts>(dts)...));
     const size_t tensor_nc = tensor_info->num_channels();
-    ARM_COMPUTE_RETURN_ERROR_ON_LOC_MSG_VAR(tensor_nc != num_channels, function, file, line, "Number of channels %zu. Required number of channels %zu", tensor_nc, num_channels);
+    ARM_COMPUTE_RETURN_ERROR_ON_LOC_MSG_VAR(tensor_nc != num_channels, function, file, line,
+                                            "Number of channels %zu. Required number of channels %zu", tensor_nc,
+                                            num_channels);
     return arm_compute::Status{};
 }
 /** Return an error if the data type or the number of channels of the passed tensor does not match any of the data types and number of channels provided.
@@ -865,17 +933,25 @@ inline arm_compute::Status error_on_data_type_channel_not_in(const char *functio
  * @return Status
  */
 template <typename T, typename... Ts>
-inline arm_compute::Status error_on_data_type_channel_not_in(const char *function, const char *file, const int line,
-                                                             const ITensor *tensor, size_t num_channels, T &&dt, Ts &&... dts)
+inline arm_compute::Status error_on_data_type_channel_not_in(const char    *function,
+                                                             const char    *file,
+                                                             const int      line,
+                                                             const ITensor *tensor,
+                                                             size_t         num_channels,
+                                                             T            &&dt,
+                                                             Ts &&...dts)
 {
     ARM_COMPUTE_RETURN_ERROR_ON_LOC(tensor == nullptr, function, file, line);
-    ARM_COMPUTE_RETURN_ON_ERROR(error_on_data_type_channel_not_in(function, file, line, tensor->info(), num_channels, std::forward<T>(dt), std::forward<Ts>(dts)...));
+    ARM_COMPUTE_RETURN_ON_ERROR(error_on_data_type_channel_not_in(function, file, line, tensor->info(), num_channels,
+                                                                  std::forward<T>(dt), std::forward<Ts>(dts)...));
     return arm_compute::Status{};
 }
 #define ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(t, c, ...) \
-    ARM_COMPUTE_ERROR_THROW_ON(::arm_compute::error_on_data_type_channel_not_in(__func__, __FILE__, __LINE__, t, c, __VA_ARGS__))
+    ARM_COMPUTE_ERROR_THROW_ON(                                  \
+        ::arm_compute::error_on_data_type_channel_not_in(__func__, __FILE__, __LINE__, t, c, __VA_ARGS__))
 #define ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(t, c, ...) \
-    ARM_COMPUTE_RETURN_ON_ERROR(::arm_compute::error_on_data_type_channel_not_in(__func__, __FILE__, __LINE__, t, c, __VA_ARGS__))
+    ARM_COMPUTE_RETURN_ON_ERROR(                                        \
+        ::arm_compute::error_on_data_type_channel_not_in(__func__, __FILE__, __LINE__, t, c, __VA_ARGS__))
 
 /** Return an error if the data type of the passed tensor info is FP16 and FP16 extension is not supported by the device.
  *
@@ -887,12 +963,12 @@ inline arm_compute::Status error_on_data_type_channel_not_in(const char *functio
  *
  * @return Status
  */
-inline arm_compute::Status error_on_unsupported_fp16(const char *function, const char *file, const int line,
-                                                     const ITensorInfo *tensor_info, bool is_fp16_supported)
+inline arm_compute::Status error_on_unsupported_fp16(
+    const char *function, const char *file, const int line, const ITensorInfo *tensor_info, bool is_fp16_supported)
 {
     ARM_COMPUTE_RETURN_ERROR_ON_LOC(tensor_info == nullptr, function, file, line);
-    ARM_COMPUTE_RETURN_ERROR_ON_LOC_MSG((tensor_info->data_type() == DataType::F16 && !is_fp16_supported),
-                                        function, file, line, "FP16 not supported by the device");
+    ARM_COMPUTE_RETURN_ERROR_ON_LOC_MSG((tensor_info->data_type() == DataType::F16 && !is_fp16_supported), function,
+                                        file, line, "FP16 not supported by the device");
     return arm_compute::Status{};
 }
 
@@ -906,11 +982,12 @@ inline arm_compute::Status error_on_unsupported_fp16(const char *function, const
  *
  * @return Status
  */
-inline arm_compute::Status error_on_unsupported_fp16(const char *function, const char *file, const int line,
-                                                     const ITensor *tensor, bool is_fp16_supported)
+inline arm_compute::Status error_on_unsupported_fp16(
+    const char *function, const char *file, const int line, const ITensor *tensor, bool is_fp16_supported)
 {
     ARM_COMPUTE_RETURN_ERROR_ON_LOC(tensor == nullptr, function, file, line);
-    ARM_COMPUTE_RETURN_ON_ERROR(::arm_compute::error_on_unsupported_fp16(function, file, line, tensor->info(), is_fp16_supported));
+    ARM_COMPUTE_RETURN_ON_ERROR(
+        ::arm_compute::error_on_unsupported_fp16(function, file, line, tensor->info(), is_fp16_supported));
     return arm_compute::Status{};
 }
 
@@ -923,8 +1000,8 @@ inline arm_compute::Status error_on_unsupported_fp16(const char *function, const
  *
  * @return Status
  */
-arm_compute::Status error_on_tensor_not_2d(const char *function, const char *file, const int line,
-                                           const ITensor *tensor);
+arm_compute::Status
+error_on_tensor_not_2d(const char *function, const char *file, const int line, const ITensor *tensor);
 
 /** Return an error if the tensor info is not 2D.
  *
@@ -935,8 +1012,8 @@ arm_compute::Status error_on_tensor_not_2d(const char *function, const char *fil
  *
  * @return Status
  */
-arm_compute::Status error_on_tensor_not_2d(const char *function, const char *file, const int line,
-                                           const ITensorInfo *tensor);
+arm_compute::Status
+error_on_tensor_not_2d(const char *function, const char *file, const int line, const ITensorInfo *tensor);
 
 #define ARM_COMPUTE_ERROR_ON_TENSOR_NOT_2D(t) \
     ARM_COMPUTE_ERROR_THROW_ON(::arm_compute::error_on_tensor_not_2d(__func__, __FILE__, __LINE__, t))
@@ -955,17 +1032,15 @@ arm_compute::Status error_on_tensor_not_2d(const char *function, const char *fil
  * @return Status
  */
 template <typename T, typename... Ts>
-inline arm_compute::Status error_on_channel_not_in(const char *function, const char *file, const int line,
-                                                   T cn, T &&channel, Ts &&... channels)
+inline arm_compute::Status
+error_on_channel_not_in(const char *function, const char *file, const int line, T cn, T &&channel, Ts &&...channels)
 {
     ARM_COMPUTE_RETURN_ERROR_ON_LOC(cn == Channel::UNKNOWN, function, file, line);
 
-    const std::array<T, sizeof...(Ts)> channels_array{ { std::forward<Ts>(channels)... } };
-    ARM_COMPUTE_RETURN_ERROR_ON_LOC(channel != cn && std::none_of(channels_array.begin(), channels_array.end(), [&](const T & f)
-    {
-        return f == cn;
-    }),
-    function, file, line);
+    const std::array<T, sizeof...(Ts)> channels_array{{std::forward<Ts>(channels)...}};
+    ARM_COMPUTE_RETURN_ERROR_ON_LOC(channel != cn && std::none_of(channels_array.begin(), channels_array.end(),
+                                                                  [&](const T &f) { return f == cn; }),
+                                    function, file, line);
     return arm_compute::Status{};
 }
 #define ARM_COMPUTE_ERROR_ON_CHANNEL_NOT_IN(c, ...) \
@@ -983,8 +1058,8 @@ inline arm_compute::Status error_on_channel_not_in(const char *function, const c
  *
  * @return Status
  */
-arm_compute::Status error_on_channel_not_in_known_format(const char *function, const char *file, const int line,
-                                                         Format fmt, Channel cn);
+arm_compute::Status
+error_on_channel_not_in_known_format(const char *function, const char *file, const int line, Format fmt, Channel cn);
 #define ARM_COMPUTE_ERROR_ON_CHANNEL_NOT_IN_KNOWN_FORMAT(f, c) \
     ARM_COMPUTE_ERROR_THROW_ON(::arm_compute::error_on_channel_not_in_known_format(__func__, __FILE__, __LINE__, f, c))
 #define ARM_COMPUTE_RETURN_ERROR_ON_CHANNEL_NOT_IN_KNOWN_FORMAT(f, c) \
@@ -999,8 +1074,8 @@ arm_compute::Status error_on_channel_not_in_known_format(const char *function, c
  *
  * @return Status
  */
-arm_compute::Status error_on_unconfigured_kernel(const char *function, const char *file, const int line,
-                                                 const IKernel *kernel);
+arm_compute::Status
+error_on_unconfigured_kernel(const char *function, const char *file, const int line, const IKernel *kernel);
 #define ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(k) \
     ARM_COMPUTE_ERROR_THROW_ON(::arm_compute::error_on_unconfigured_kernel(__func__, __FILE__, __LINE__, k))
 #define ARM_COMPUTE_RETURN_ERROR_ON_UNCONFIGURED_KERNEL(k) \
@@ -1017,8 +1092,12 @@ arm_compute::Status error_on_unconfigured_kernel(const char *function, const cha
  *
  * @return Status
  */
-arm_compute::Status error_on_invalid_subtensor(const char *function, const char *file, const int line,
-                                               const TensorShape &parent_shape, const Coordinates &coords, const TensorShape &shape);
+arm_compute::Status error_on_invalid_subtensor(const char        *function,
+                                               const char        *file,
+                                               const int          line,
+                                               const TensorShape &parent_shape,
+                                               const Coordinates &coords,
+                                               const TensorShape &shape);
 #define ARM_COMPUTE_ERROR_ON_INVALID_SUBTENSOR(p, c, s) \
     ARM_COMPUTE_ERROR_THROW_ON(::arm_compute::error_on_invalid_subtensor(__func__, __FILE__, __LINE__, p, c, s))
 #define ARM_COMPUTE_RETURN_ERROR_ON_INVALID_SUBTENSOR(p, c, s) \
@@ -1034,11 +1113,16 @@ arm_compute::Status error_on_invalid_subtensor(const char *function, const char
  *
  * @return Status
  */
-arm_compute::Status error_on_invalid_subtensor_valid_region(const char *function, const char *file, const int line,
-                                                            const ValidRegion &parent_valid_region, const ValidRegion &valid_region);
+arm_compute::Status error_on_invalid_subtensor_valid_region(const char        *function,
+                                                            const char        *file,
+                                                            const int          line,
+                                                            const ValidRegion &parent_valid_region,
+                                                            const ValidRegion &valid_region);
 #define ARM_COMPUTE_ERROR_ON_INVALID_SUBTENSOR_VALID_REGION(pv, sv) \
-    ARM_COMPUTE_ERROR_THROW_ON(::arm_compute::error_on_invalid_subtensor_valid_region(__func__, __FILE__, __LINE__, pv, sv))
+    ARM_COMPUTE_ERROR_THROW_ON(                                     \
+        ::arm_compute::error_on_invalid_subtensor_valid_region(__func__, __FILE__, __LINE__, pv, sv))
 #define ARM_COMPUTE_RETURN_ERROR_ON_INVALID_SUBTENSOR_VALID_REGION(pv, sv) \
-    ARM_COMPUTE_RETURN_ON_ERROR(::arm_compute::error_on_invalid_subtensor_valid_region(__func__, __FILE__, __LINE__, pv, sv))
-}
+    ARM_COMPUTE_RETURN_ON_ERROR(                                           \
+        ::arm_compute::error_on_invalid_subtensor_valid_region(__func__, __FILE__, __LINE__, pv, sv))
+} // namespace arm_compute
 #endif /* ARM_COMPUTE_VALIDATE_H*/
diff --git a/arm_compute/core/Version.h b/arm_compute/core/Version.h
index a4d307950a..44d400bad8 100644
--- a/arm_compute/core/Version.h
+++ b/arm_compute/core/Version.h
@@ -28,7 +28,7 @@
 
 /* Macro utilities */
 #define ARM_COMPUTE_STRINGIFY2(s) #s
-#define ARM_COMPUTE_STRINGIFY(s) ARM_COMPUTE_STRINGIFY2(s)
+#define ARM_COMPUTE_STRINGIFY(s)  ARM_COMPUTE_STRINGIFY2(s)
 
 #define ARM_COMPUTE_VERSION_STR                      \
     ARM_COMPUTE_STRINGIFY(ARM_COMPUTE_VERSION_MAJOR) \
diff --git a/arm_compute/core/Window.h b/arm_compute/core/Window.h
index 8ae859f4b3..4863b95045 100644
--- a/arm_compute/core/Window.h
+++ b/arm_compute/core/Window.h
@@ -24,15 +24,15 @@
 #ifndef ARM_COMPUTE_WINDOW_H
 #define ARM_COMPUTE_WINDOW_H
 
-#include <algorithm>
-#include <array>
-#include <cstddef>
-
 #include "arm_compute/core/Coordinates.h"
 #include "arm_compute/core/Error.h"
 #include "arm_compute/core/ITensorInfo.h"
 #include "arm_compute/core/utils/math/Math.h"
 
+#include <algorithm>
+#include <array>
+#include <cstddef>
+
 namespace arm_compute
 {
 /** Describe a multidimensional execution window. */
@@ -86,8 +86,7 @@ public:
          * @param[in] step  Step between two elements of the dimension when iterating.
          *
          */
-        constexpr Dimension(int start = 0, int end = 1, int step = 1)
-            : _start(start), _end(end), _step(step)
+        constexpr Dimension(int start = 0, int end = 1, int step = 1) : _start(start), _end(end), _step(step)
         {
         }
         Dimension(const Dimension &d) = default;
@@ -373,7 +372,8 @@ public:
      *
      * @return Collapsed window.
      */
-    Window collapse_if_possible(const Window &full_window, size_t first, size_t last, bool *has_collapsed = nullptr) const;
+    Window
+    collapse_if_possible(const Window &full_window, size_t first, size_t last, bool *has_collapsed = nullptr) const;
 
     /** Collapse the dimensions higher than @p first if possible.
      *
@@ -441,7 +441,7 @@ private:
      * @return The first slice of the window.
      */
     template <unsigned int window_dimension>
-    Window                 first_slice_window() const;
+    Window first_slice_window() const;
 
     /** Slide the passed window slice.
      *
diff --git a/arm_compute/core/Window.inl b/arm_compute/core/Window.inl
index 5ee4b57145..d935507b1d 100644
--- a/arm_compute/core/Window.inl
+++ b/arm_compute/core/Window.inl
@@ -26,7 +26,7 @@ namespace arm_compute
 inline Window::Window(const Window &src)
     : _dims(), _is_broadcasted(utility::generate_array<bool, Coordinates::num_max_dimensions, false>::value)
 {
-    for(size_t i = 0; i < Coordinates::num_max_dimensions; ++i)
+    for (size_t i = 0; i < Coordinates::num_max_dimensions; ++i)
     {
         set(i, src[i]);
         _is_broadcasted[i] = src.is_broadcasted(i);
@@ -65,32 +65,34 @@ inline bool Window::is_broadcasted(size_t dimension) const
     return _is_broadcasted[dimension];
 }
 
-inline Window Window::collapse_if_possible(const Window &full_window, const size_t first,
-                                           const size_t last, bool *has_collapsed) const
+inline Window Window::collapse_if_possible(const Window &full_window,
+                                           const size_t  first,
+                                           const size_t  last,
+                                           bool         *has_collapsed) const
 {
     Window collapsed(*this);
 
     bool is_collapsable = true;
     int  collapsed_end  = _dims[first].end();
 
-    for(size_t d = first + 1; is_collapsable && (d < last); ++d)
+    for (size_t d = first + 1; is_collapsable && (d < last); ++d)
     {
         // The _dims's dimension must match the full _dims dimension to be collapsable:
-        is_collapsable = (_dims[d].start() == 0) && (full_window[d].start() == 0) && (_dims[d].step() <= 1)
-                         && (full_window[d].end() == _dims[d].end());
+        is_collapsable = (_dims[d].start() == 0) && (full_window[d].start() == 0) && (_dims[d].step() <= 1) &&
+                         (full_window[d].end() == _dims[d].end());
         collapsed_end *= _dims[d].end();
     }
 
-    if(is_collapsable)
+    if (is_collapsable)
     {
         collapsed._dims.at(first).set_end(collapsed_end);
-        for(size_t d = first + 1; is_collapsable && (d < last); ++d)
+        for (size_t d = first + 1; is_collapsable && (d < last); ++d)
         {
             collapsed.set(d, Dimension());
         }
     }
 
-    if(has_collapsed != nullptr)
+    if (has_collapsed != nullptr)
     {
         *has_collapsed = is_collapsable;
     }
@@ -101,7 +103,7 @@ inline Window Window::collapse_if_possible(const Window &full_window, const size
 inline Window Window::shift_dimensions(unsigned int shift_value) const
 {
     Window shifted_window;
-    for(size_t n = 0; n < (Coordinates::num_max_dimensions - shift_value); n++)
+    for (size_t n = 0; n < (Coordinates::num_max_dimensions - shift_value); n++)
     {
         shifted_window.set(n, _dims[n + shift_value]);
     }
@@ -120,9 +122,9 @@ inline Window Window::collapse(const Window &full_window, const size_t first, co
 inline Window Window::broadcast_if_dimension_le_one(const TensorShape &shape) const
 {
     Window broadcastWin(*this);
-    for(size_t d = 0; d < TensorShape::num_max_dimensions; ++d)
+    for (size_t d = 0; d < TensorShape::num_max_dimensions; ++d)
     {
-        if(shape[d] <= 1)
+        if (shape[d] <= 1)
         {
             broadcastWin.set_broadcasted(d);
         }
@@ -142,7 +144,7 @@ inline void Window::adjust(size_t dimension, int adjust_value, bool is_at_start)
     ARM_COMPUTE_ERROR_ON(dimension >= Coordinates::num_max_dimensions);
     Window::Dimension &d = _dims[dimension];
 
-    if(is_at_start)
+    if (is_at_start)
     {
         d = Window::Dimension(d.start() + adjust_value, d.end(), d.step());
     }
@@ -172,7 +174,7 @@ inline void Window::set_dimension_step(size_t dimension, int step)
 
 inline void Window::validate() const
 {
-    for(size_t i = 0; i < Coordinates::num_max_dimensions; ++i)
+    for (size_t i = 0; i < Coordinates::num_max_dimensions; ++i)
     {
         ARM_COMPUTE_ERROR_ON(_dims[i].end() < _dims[i].start());
         ARM_COMPUTE_ERROR_ON((_dims[i].step() != 0) && (((_dims[i].end() - _dims[i].start()) % _dims[i].step()) != 0));
@@ -193,9 +195,9 @@ inline Window Window::split_window(size_t dimension, size_t id, size_t total) co
 
     Window out;
 
-    for(size_t d = 0; d < Coordinates::num_max_dimensions; ++d)
+    for (size_t d = 0; d < Coordinates::num_max_dimensions; ++d)
     {
-        if(d == dimension)
+        if (d == dimension)
         {
             int       start = _dims[d].start();
             int       end   = _dims[d].end();
@@ -207,7 +209,7 @@ inline Window Window::split_window(size_t dimension, size_t id, size_t total) co
 
             int it_start = work * id;
 
-            if(int(id) < rem)
+            if (int(id) < rem)
             {
                 ++work;
                 it_start += id;
@@ -234,18 +236,18 @@ inline Window Window::split_window(size_t dimension, size_t id, size_t total) co
 template <unsigned int window_dimension>
 inline bool Window::slide_window_slice(Window &slice) const
 {
-    for(unsigned int n = window_dimension; n < Coordinates::num_max_dimensions; ++n)
+    for (unsigned int n = window_dimension; n < Coordinates::num_max_dimensions; ++n)
     {
         // Did we reach the end of this dimension?
         const int v = slice._dims[n].start() + 1;
 
-        if(v < _dims[n].end())
+        if (v < _dims[n].end())
         {
             // No: increment
             slice._dims[n] = Dimension(v, v + 1, 1);
 
             // Reset lower dimensions:
-            for(unsigned int lower = window_dimension; lower < n; ++lower)
+            for (unsigned int lower = window_dimension; lower < n; ++lower)
             {
                 slice._dims[lower] = Dimension(_dims[lower].start(), _dims[lower].start() + 1, 1);
             }
@@ -258,14 +260,14 @@ inline bool Window::slide_window_slice(Window &slice) const
 }
 
 template <unsigned int window_dimension>
-inline Window          Window::first_slice_window() const
+inline Window Window::first_slice_window() const
 {
     Window slice;
 
     std::copy_n(_dims.begin(), window_dimension, slice._dims.begin());
 
     //Initialise higher dimensions to be the first slice.
-    for(unsigned int n = window_dimension; n < Coordinates::num_max_dimensions; ++n)
+    for (unsigned int n = window_dimension; n < Coordinates::num_max_dimensions; ++n)
     {
         slice._dims[n] = Dimension(_dims[n].start(), _dims[n].start() + 1, 1);
     }
@@ -275,7 +277,7 @@ inline Window          Window::first_slice_window() const
 
 inline void Window::use_tensor_dimensions(const TensorShape &shape, size_t first_dimension)
 {
-    for(unsigned int n = first_dimension; n < shape.num_dimensions(); ++n)
+    for (unsigned int n = first_dimension; n < shape.num_dimensions(); ++n)
     {
         set(n, Window::Dimension(0, std::max(shape[n], static_cast<size_t>(1))));
     }
@@ -284,7 +286,7 @@ inline void Window::use_tensor_dimensions(const TensorShape &shape, size_t first
 inline TensorShape Window::shape() const
 {
     TensorShape shape;
-    for(size_t d = 0; d < TensorShape::num_max_dimensions; ++d)
+    for (size_t d = 0; d < TensorShape::num_max_dimensions; ++d)
     {
         shape.set(d, (_dims[d].end() - _dims[d].start()) / _dims[d].step());
     }
@@ -294,7 +296,7 @@ inline TensorShape Window::shape() const
 inline size_t Window::num_iterations_total() const
 {
     size_t total = 1;
-    for(size_t d = 0; d < Coordinates::num_max_dimensions; ++d)
+    for (size_t d = 0; d < Coordinates::num_max_dimensions; ++d)
     {
         total *= num_iterations(d);
     }
diff --git a/arm_compute/core/WindowIterator.h b/arm_compute/core/WindowIterator.h
index b1e399c872..29302c410a 100644
--- a/arm_compute/core/WindowIterator.h
+++ b/arm_compute/core/WindowIterator.h
@@ -28,7 +28,6 @@
 #include "arm_compute/core/ITensor.h"
 #include "arm_compute/core/Window.h"
 
-
 namespace arm_compute
 {
 /** Convert an offset in window steps into absolute coordinates.
@@ -41,7 +40,7 @@ namespace arm_compute
 inline Coordinates convert_window_coord_to_position(const Window &w, const Coordinates &offset)
 {
     Coordinates position;
-    for(unsigned int i = 0; i < Coordinates::num_max_dimensions; ++i)
+    for (unsigned int i = 0; i < Coordinates::num_max_dimensions; ++i)
     {
         position.set(i, w[i].start() + offset[i] * w[i].step());
     }
@@ -165,7 +164,7 @@ public:
     template <typename M>
     void iterate_3D(M &&on_new_row_size)
     {
-        while(_end.z() != _position.z())
+        while (_end.z() != _position.z())
         {
             iterate_2D_internal(on_new_row_size, _w.x().end() - _w.x().step(), _w.y().end() - _w.y().step());
             _position[2] += _w.z().step();
@@ -212,7 +211,7 @@ private:
     void iterate_2D_internal(M &&on_new_row_size, int end_x, int end_y)
     {
         //Is there more than one row to process ?
-        if(end_y == _position.y())
+        if (end_y == _position.y())
         {
             // Both start and end belong to the same row:
             iterate_over_dim0(end_x + _w.x().step(), on_new_row_size);
@@ -220,7 +219,7 @@ private:
         else
         {
             // Do we start from the beginning of the row ?
-            if(_w.x().start() != _position.x())
+            if (_w.x().start() != _position.x())
             {
                 //Start in the middle of a row: process left-over X
                 iterate_over_dim0(_w.x().end(), on_new_row_size);
@@ -229,7 +228,7 @@ private:
 
             //Middle rows
             bool no_leftover = end_x + _w.x().step() == _w.x().end();
-            if(no_leftover)
+            if (no_leftover)
             {
                 //Switch to full row size:
                 on_new_row_size(_w[0].start(), _w.x().end());
@@ -241,7 +240,7 @@ private:
             else
             {
                 // Are there full rows to process ?
-                if(_position[1] != end_y)
+                if (_position[1] != end_y)
                 {
                     //Switch to full row size:
                     on_new_row_size(_w[0].start(), _w.x().end());
@@ -261,7 +260,7 @@ private:
      */
     void iterate_over_dim1(int end)
     {
-        for(; _position[1] != end; _position[1] += _w[1].step())
+        for (; _position[1] != end; _position[1] += _w[1].step())
         {
             _position[0] = _w[0].start();
             iterate_over_dim0(_w[0].end());
@@ -288,7 +287,7 @@ private:
     {
         // Both start and end belong to the same row:
         ARM_COMPUTE_ERROR_ON(_position[0] > end);
-        for(; _position.x() < end; _position[0] += _w[0].step())
+        for (; _position.x() < end; _position[0] += _w[0].step())
         {
             _lambda_function(_position);
         }
@@ -310,9 +309,10 @@ private:
  * @return A WindowIterator object.
  */
 template <typename L>
-WindowIterator<L> create_window_iterator(const Window &w, const Coordinates &start, const Coordinates &end, L &&lambda_function)
+WindowIterator<L>
+create_window_iterator(const Window &w, const Coordinates &start, const Coordinates &end, L &&lambda_function)
 {
     return WindowIterator<L>(w, start, end, std::move(lambda_function));
 }
-}
+} // namespace arm_compute
 #endif /*ARM_COMPUTE_WINDOW_ITERATOR_H*/
diff --git a/arm_compute/core/experimental/Types.h b/arm_compute/core/experimental/Types.h
index 8dd6812b58..63a3a1a1ec 100644
--- a/arm_compute/core/experimental/Types.h
+++ b/arm_compute/core/experimental/Types.h
@@ -92,24 +92,18 @@ struct MemoryInfo
 {
     MemoryInfo() = default;
 
-    MemoryInfo(int slot, size_t size, size_t alignment = 0) noexcept
-        : slot(slot),
-          size(size),
-          alignment(alignment)
+    MemoryInfo(int slot, size_t size, size_t alignment = 0) noexcept : slot(slot), size(size), alignment(alignment)
     {
     }
 
     MemoryInfo(int slot, MemoryLifetime lifetime, size_t size, size_t alignment = 0) noexcept
-        : slot(slot),
-          lifetime(lifetime),
-          size(size),
-          alignment(alignment)
+        : slot(slot), lifetime(lifetime), size(size), alignment(alignment)
     {
     }
 
     bool merge(int slot, size_t new_size, size_t new_alignment = 0) noexcept
     {
-        if(slot != this->slot)
+        if (slot != this->slot)
         {
             return false;
         }
@@ -120,10 +114,10 @@ struct MemoryInfo
         return true;
     }
 
-    int            slot{ ACL_UNKNOWN };
-    MemoryLifetime lifetime{ MemoryLifetime::Temporary };
-    size_t         size{ 0 };
-    size_t         alignment{ 64 };
+    int            slot{ACL_UNKNOWN};
+    MemoryLifetime lifetime{MemoryLifetime::Temporary};
+    size_t         size{0};
+    size_t         alignment{64};
 };
 
 using MemoryRequirements = std::vector<MemoryInfo>;
diff --git a/arm_compute/core/utils/ActivationFunctionUtils.h b/arm_compute/core/utils/ActivationFunctionUtils.h
index 1cb66da13d..c988efa256 100644
--- a/arm_compute/core/utils/ActivationFunctionUtils.h
+++ b/arm_compute/core/utils/ActivationFunctionUtils.h
@@ -37,5 +37,5 @@ namespace arm_compute
  * @return The string describing the activation function.
  */
 const std::string &string_from_activation_func(const ActivationFunction &act);
-}
+} // namespace arm_compute
 #endif /*ARM_COMPUTE_CORE_UTILS_ACTIVATIONFUNCTIONUTILS_H */
diff --git a/arm_compute/core/utils/DataLayoutUtils.h b/arm_compute/core/utils/DataLayoutUtils.h
index 399f55c63f..61839c9f91 100644
--- a/arm_compute/core/utils/DataLayoutUtils.h
+++ b/arm_compute/core/utils/DataLayoutUtils.h
@@ -36,5 +36,5 @@ namespace arm_compute
  * @return The string describing the data layout.
  */
 const std::string &string_from_data_layout(DataLayout dl);
-}
+} // namespace arm_compute
 #endif /*ARM_COMPUTE_CORE_UTILS_DATALAYOUTUTILS_H */
diff --git a/arm_compute/core/utils/DataTypeUtils.h b/arm_compute/core/utils/DataTypeUtils.h
index cbb409c8a1..7ea5eb7670 100644
--- a/arm_compute/core/utils/DataTypeUtils.h
+++ b/arm_compute/core/utils/DataTypeUtils.h
@@ -37,7 +37,7 @@ namespace arm_compute
  */
 inline size_t data_size_from_type(DataType data_type)
 {
-    switch(data_type)
+    switch (data_type)
     {
         case DataType::U8:
         case DataType::S8:
@@ -77,7 +77,7 @@ inline size_t data_size_from_type(DataType data_type)
  */
 inline size_t element_size_from_data_type(DataType dt)
 {
-    switch(dt)
+    switch (dt)
     {
         case DataType::S8:
         case DataType::U8:
@@ -114,7 +114,7 @@ inline size_t element_size_from_data_type(DataType dt)
  */
 inline DataType data_type_from_format(Format format)
 {
-    switch(format)
+    switch (format)
     {
         case Format::U8:
         case Format::UV88:
@@ -158,7 +158,7 @@ inline DataType data_type_from_format(Format format)
  */
 inline DataType get_promoted_data_type(DataType dt)
 {
-    switch(dt)
+    switch (dt)
     {
         case DataType::U8:
             return DataType::U16;
@@ -196,7 +196,7 @@ inline std::tuple<PixelValue, PixelValue> get_min_max(DataType dt)
 {
     PixelValue min{};
     PixelValue max{};
-    switch(dt)
+    switch (dt)
     {
         case DataType::U8:
         case DataType::QASYMM8:
@@ -303,7 +303,7 @@ inline ::std::istream &operator>>(::std::istream &stream, DataType &data_type)
  */
 inline bool is_data_type_float(DataType dt)
 {
-    switch(dt)
+    switch (dt)
     {
         case DataType::F16:
         case DataType::F32:
@@ -323,7 +323,7 @@ inline bool is_data_type_float(DataType dt)
  */
 inline bool is_data_type_quantized(DataType dt)
 {
-    switch(dt)
+    switch (dt)
     {
         case DataType::QSYMM8:
         case DataType::QASYMM8:
@@ -345,7 +345,7 @@ inline bool is_data_type_quantized(DataType dt)
  */
 inline bool is_data_type_quantized_asymmetric(DataType dt)
 {
-    switch(dt)
+    switch (dt)
     {
         case DataType::QASYMM8:
         case DataType::QASYMM8_SIGNED:
@@ -364,7 +364,7 @@ inline bool is_data_type_quantized_asymmetric(DataType dt)
  */
 inline bool is_data_type_quantized_asymmetric_signed(DataType dt)
 {
-    switch(dt)
+    switch (dt)
     {
         case DataType::QASYMM8_SIGNED:
             return true;
@@ -381,7 +381,7 @@ inline bool is_data_type_quantized_asymmetric_signed(DataType dt)
  */
 inline bool is_data_type_quantized_symmetric(DataType dt)
 {
-    switch(dt)
+    switch (dt)
     {
         case DataType::QSYMM8:
         case DataType::QSYMM8_PER_CHANNEL:
@@ -400,7 +400,7 @@ inline bool is_data_type_quantized_symmetric(DataType dt)
  */
 inline bool is_data_type_quantized_per_channel(DataType dt)
 {
-    switch(dt)
+    switch (dt)
     {
         case DataType::QSYMM8_PER_CHANNEL:
             return true;
@@ -420,12 +420,13 @@ inline bool is_data_type_quantized_per_channel(DataType dt)
 template <typename T>
 bool check_value_range(T val, DataType dt, QuantizationInfo qinfo = QuantizationInfo())
 {
-    switch(dt)
+    switch (dt)
     {
         case DataType::U8:
         {
             const auto val_u8 = static_cast<uint8_t>(val);
-            return ((val_u8 == val) && val >= std::numeric_limits<uint8_t>::lowest() && val <= std::numeric_limits<uint8_t>::max());
+            return ((val_u8 == val) && val >= std::numeric_limits<uint8_t>::lowest() &&
+                    val <= std::numeric_limits<uint8_t>::max());
         }
         case DataType::QASYMM8:
         {
@@ -436,29 +437,34 @@ bool check_value_range(T val, DataType dt, QuantizationInfo qinfo = Quantization
         case DataType::S8:
         {
             const auto val_s8 = static_cast<int8_t>(val);
-            return ((val_s8 == val) && val >= std::numeric_limits<int8_t>::lowest() && val <= std::numeric_limits<int8_t>::max());
+            return ((val_s8 == val) && val >= std::numeric_limits<int8_t>::lowest() &&
+                    val <= std::numeric_limits<int8_t>::max());
         }
         case DataType::U16:
         {
             const auto val_u16 = static_cast<uint16_t>(val);
-            return ((val_u16 == val) && val >= std::numeric_limits<uint16_t>::lowest() && val <= std::numeric_limits<uint16_t>::max());
+            return ((val_u16 == val) && val >= std::numeric_limits<uint16_t>::lowest() &&
+                    val <= std::numeric_limits<uint16_t>::max());
         }
         case DataType::S16:
         {
             const auto val_s16 = static_cast<int16_t>(val);
-            return ((val_s16 == val) && val >= std::numeric_limits<int16_t>::lowest() && val <= std::numeric_limits<int16_t>::max());
+            return ((val_s16 == val) && val >= std::numeric_limits<int16_t>::lowest() &&
+                    val <= std::numeric_limits<int16_t>::max());
         }
         case DataType::U32:
         {
             const auto val_d64 = static_cast<double>(val);
             const auto val_u32 = static_cast<uint32_t>(val);
-            return ((val_u32 == val_d64) && val_d64 >= std::numeric_limits<uint32_t>::lowest() && val_d64 <= std::numeric_limits<uint32_t>::max());
+            return ((val_u32 == val_d64) && val_d64 >= std::numeric_limits<uint32_t>::lowest() &&
+                    val_d64 <= std::numeric_limits<uint32_t>::max());
         }
         case DataType::S32:
         {
             const auto val_d64 = static_cast<double>(val);
             const auto val_s32 = static_cast<int32_t>(val);
-            return ((val_s32 == val_d64) && val_d64 >= std::numeric_limits<int32_t>::lowest() && val_d64 <= std::numeric_limits<int32_t>::max());
+            return ((val_s32 == val_d64) && val_d64 >= std::numeric_limits<int32_t>::lowest() &&
+                    val_d64 <= std::numeric_limits<int32_t>::max());
         }
         case DataType::BFLOAT16:
             return (val >= bfloat16::lowest() && val <= bfloat16::max());
@@ -482,7 +488,7 @@ inline std::string cpu_impl_dt(const DataType &data_type)
 {
     std::string ret = "";
 
-    switch(data_type)
+    switch (data_type)
     {
         case DataType::F32:
             ret = "fp32";
@@ -521,5 +527,5 @@ inline std::string cpu_impl_dt(const DataType &data_type)
     return ret;
 }
 
-}
+} // namespace arm_compute
 #endif /*ARM_COMPUTE_CORE_UTILS_DATATYPEUTILS_H */
diff --git a/arm_compute/core/utils/FormatUtils.h b/arm_compute/core/utils/FormatUtils.h
index afb0f78255..a8e96bd361 100644
--- a/arm_compute/core/utils/FormatUtils.h
+++ b/arm_compute/core/utils/FormatUtils.h
@@ -37,7 +37,7 @@ namespace arm_compute
  */
 inline size_t pixel_size_from_format(Format format)
 {
-    switch(format)
+    switch (format)
     {
         case Format::U8:
             return 1;
@@ -77,7 +77,7 @@ inline size_t pixel_size_from_format(Format format)
  */
 inline int plane_idx_from_channel(Format format, Channel channel)
 {
-    switch(format)
+    switch (format)
     {
         // Single planar formats have a single plane
         case Format::U8:
@@ -99,7 +99,7 @@ inline int plane_idx_from_channel(Format format, Channel channel)
         case Format::NV21:
         {
             // Channel U and V share the same plane of format UV88
-            switch(channel)
+            switch (channel)
             {
                 case Channel::Y:
                     return 0;
@@ -114,7 +114,7 @@ inline int plane_idx_from_channel(Format format, Channel channel)
         case Format::IYUV:
         case Format::YUV444:
         {
-            switch(channel)
+            switch (channel)
             {
                 case Channel::Y:
                     return 0;
@@ -142,11 +142,11 @@ inline int plane_idx_from_channel(Format format, Channel channel)
  */
 inline int channel_idx_from_format(Format format, Channel channel)
 {
-    switch(format)
+    switch (format)
     {
         case Format::RGB888:
         {
-            switch(channel)
+            switch (channel)
             {
                 case Channel::R:
                     return 0;
@@ -161,7 +161,7 @@ inline int channel_idx_from_format(Format format, Channel channel)
         }
         case Format::RGBA8888:
         {
-            switch(channel)
+            switch (channel)
             {
                 case Channel::R:
                     return 0;
@@ -178,7 +178,7 @@ inline int channel_idx_from_format(Format format, Channel channel)
         }
         case Format::YUYV422:
         {
-            switch(channel)
+            switch (channel)
             {
                 case Channel::Y:
                     return 0;
@@ -193,7 +193,7 @@ inline int channel_idx_from_format(Format format, Channel channel)
         }
         case Format::UYVY422:
         {
-            switch(channel)
+            switch (channel)
             {
                 case Channel::Y:
                     return 1;
@@ -208,7 +208,7 @@ inline int channel_idx_from_format(Format format, Channel channel)
         }
         case Format::NV12:
         {
-            switch(channel)
+            switch (channel)
             {
                 case Channel::Y:
                     return 0;
@@ -223,7 +223,7 @@ inline int channel_idx_from_format(Format format, Channel channel)
         }
         case Format::NV21:
         {
-            switch(channel)
+            switch (channel)
             {
                 case Channel::Y:
                     return 0;
@@ -239,7 +239,7 @@ inline int channel_idx_from_format(Format format, Channel channel)
         case Format::YUV444:
         case Format::IYUV:
         {
-            switch(channel)
+            switch (channel)
             {
                 case Channel::Y:
                     return 0;
@@ -266,7 +266,7 @@ inline int channel_idx_from_format(Format format, Channel channel)
  */
 inline size_t num_planes_from_format(Format format)
 {
-    switch(format)
+    switch (format)
     {
         case Format::U8:
         case Format::S16:
@@ -301,7 +301,7 @@ inline size_t num_planes_from_format(Format format)
  */
 inline size_t num_channels_from_format(Format format)
 {
-    switch(format)
+    switch (format)
     {
         case Format::U8:
         case Format::U16:
@@ -340,5 +340,5 @@ inline size_t num_channels_from_format(Format format)
  * @return The string describing the format.
  */
 const std::string &string_from_format(Format format);
-}
+} // namespace arm_compute
 #endif /*ARM_COMPUTE_CORE_UTILS_FORMATUTILS_H */
diff --git a/arm_compute/core/utils/InterpolationPolicyUtils.h b/arm_compute/core/utils/InterpolationPolicyUtils.h
index 79f6e3aa5f..8d4ae4321c 100644
--- a/arm_compute/core/utils/InterpolationPolicyUtils.h
+++ b/arm_compute/core/utils/InterpolationPolicyUtils.h
@@ -37,5 +37,5 @@ namespace arm_compute
  * @return The string describing the interpolation policy.
  */
 const std::string &string_from_interpolation_policy(InterpolationPolicy policy);
-}
+} // namespace arm_compute
 #endif /*ARM_COMPUTE_CORE_UTILS_INTERPOLATIONPOLICYUTILS_H */
diff --git a/arm_compute/core/utils/StringUtils.h b/arm_compute/core/utils/StringUtils.h
index 41f29b0901..c13cbaa334 100644
--- a/arm_compute/core/utils/StringUtils.h
+++ b/arm_compute/core/utils/StringUtils.h
@@ -61,5 +61,5 @@ std::string float_to_string_with_full_precision(float val);
  * @return std::string
  */
 std::string join(const std::vector<std::string> strings, const std::string &sep);
-}
+} // namespace arm_compute
 #endif /*ARM_COMPUTE_CORE_UTILS_STRINGUTILS_H */
diff --git a/arm_compute/core/utils/helpers/AdjustVecSize.h b/arm_compute/core/utils/helpers/AdjustVecSize.h
index bbb3048b84..842e3b57d6 100644
--- a/arm_compute/core/utils/helpers/AdjustVecSize.h
+++ b/arm_compute/core/utils/helpers/AdjustVecSize.h
@@ -39,17 +39,17 @@ inline unsigned int adjust_vec_size(unsigned int vec_size, size_t dim0)
 {
     ARM_COMPUTE_ERROR_ON(vec_size > 16);
 
-    if((vec_size >= dim0) && (dim0 == 3))
+    if ((vec_size >= dim0) && (dim0 == 3))
     {
         return dim0;
     }
 
-    while(vec_size > dim0)
+    while (vec_size > dim0)
     {
         vec_size >>= 1;
     }
 
     return vec_size;
 }
-}
+} // namespace arm_compute
 #endif /*ARM_COMPUTE_UTILS_H */
diff --git a/arm_compute/core/utils/helpers/tensor_transform.h b/arm_compute/core/utils/helpers/tensor_transform.h
index faa5b4433c..7a61fa192a 100644
--- a/arm_compute/core/utils/helpers/tensor_transform.h
+++ b/arm_compute/core/utils/helpers/tensor_transform.h
@@ -52,7 +52,8 @@ int calculate_stride_on_index(int index, Coordinates strides);
  *
  * @return Absolute start position of a given index
  */
-int calculate_start_on_index(TensorShape input_shape, int index, Coordinates starts, Coordinates strides, int32_t begin_mask);
+int calculate_start_on_index(
+    TensorShape input_shape, int index, Coordinates starts, Coordinates strides, int32_t begin_mask);
 
 /** Returns the absolute end position of a given index for a strided slice operation
  *
@@ -68,8 +69,13 @@ int calculate_start_on_index(TensorShape input_shape, int index, Coordinates sta
  *
  * @return Absolute end position of a given index
  */
-int calculate_end_on_index(TensorShape input_shape, int index, int start_on_index, Coordinates ends, Coordinates strides,
-                           int32_t end_mask = 0, int32_t shrink_axis_mask = 0);
+int calculate_end_on_index(TensorShape input_shape,
+                           int         index,
+                           int         start_on_index,
+                           Coordinates ends,
+                           Coordinates strides,
+                           int32_t     end_mask         = 0,
+                           int32_t     shrink_axis_mask = 0);
 
 /** Calculate start, end and stride coordinates for a strided slice
  *
@@ -87,8 +93,12 @@ int calculate_end_on_index(TensorShape input_shape, int index, int start_on_inde
  * @return A tuple with <Start,End,Strides>
  */
 std::tuple<Coordinates, Coordinates, Coordinates> calculate_strided_slice_coords(TensorShape input_shape,
-                                                                                 Coordinates starts, Coordinates ends, Coordinates strides,
-                                                                                 int32_t begin_mask = 0, int32_t end_mask = 0, int32_t shrink_axis_mask = 0);
+                                                                                 Coordinates starts,
+                                                                                 Coordinates ends,
+                                                                                 Coordinates strides,
+                                                                                 int32_t     begin_mask       = 0,
+                                                                                 int32_t     end_mask         = 0,
+                                                                                 int32_t     shrink_axis_mask = 0);
 
 /** Computes output shape of strided slice
  *
@@ -109,9 +119,14 @@ std::tuple<Coordinates, Coordinates, Coordinates> calculate_strided_slice_coords
  *
  * @return The output tensor shape
  */
-TensorShape compute_strided_slice_output_shape(TensorShape input_shape, Coordinates starts, Coordinates ends, Coordinates strides,
-                                               int32_t begin_mask = 0, int32_t end_mask = 0, int32_t shrink_axis_mask = 0,
-                                               bool return_unshrinked = false);
+TensorShape compute_strided_slice_output_shape(TensorShape input_shape,
+                                               Coordinates starts,
+                                               Coordinates ends,
+                                               Coordinates strides,
+                                               int32_t     begin_mask        = 0,
+                                               int32_t     end_mask          = 0,
+                                               int32_t     shrink_axis_mask  = 0,
+                                               bool        return_unshrinked = false);
 
 /** Constructs end mask in case we want to perform a slice operation using the strided slice interface
  *
@@ -122,7 +137,7 @@ TensorShape compute_strided_slice_output_shape(TensorShape input_shape, Coordina
  * @return End mask
  */
 int32_t construct_slice_end_mask(Coordinates ends);
-} // namespace tensor_tranform
+} // namespace tensor_transform
 } // namespace helpers
 } // namespace arm_compute
 #endif /* ARM_COMPUTE_UTILS_HELPERS_TENSOR_TRANSFORM_H */
diff --git a/arm_compute/core/utils/logging/FilePrinter.h b/arm_compute/core/utils/logging/FilePrinter.h
index 0e5b84f084..a865aadddb 100644
--- a/arm_compute/core/utils/logging/FilePrinter.h
+++ b/arm_compute/core/utils/logging/FilePrinter.h
@@ -24,9 +24,8 @@
 #ifndef ARM_COMPUTE_LOGGING_FILE_PRINTER_H
 #define ARM_COMPUTE_LOGGING_FILE_PRINTER_H
 
-#include "arm_compute/core/utils/logging/IPrinter.h"
-
 #include "arm_compute/core/utils/io/FileHandler.h"
+#include "arm_compute/core/utils/logging/IPrinter.h"
 
 namespace arm_compute
 {
diff --git a/arm_compute/core/utils/logging/Helpers.h b/arm_compute/core/utils/logging/Helpers.h
index 5f8b948592..c3c2f0f0b8 100644
--- a/arm_compute/core/utils/logging/Helpers.h
+++ b/arm_compute/core/utils/logging/Helpers.h
@@ -25,6 +25,7 @@
 #define ARM_COMPUTE_LOGGING_HELPERS_H
 
 #include "arm_compute/core/utils/logging/Types.h"
+
 #include "support/ToolchainSupport.h"
 
 #include <cstddef>
@@ -45,7 +46,7 @@ namespace logging
  * @return The formatted string
  */
 template <typename... Ts>
-inline std::string string_with_format(const std::string &fmt, Ts &&... args)
+inline std::string string_with_format(const std::string &fmt, Ts &&...args)
 {
     size_t size     = support::cpp11::snprintf(nullptr, 0, fmt.c_str(), args...) + 1;
     auto   char_str = std::make_unique<char[]>(size);
diff --git a/arm_compute/core/utils/logging/IPrinter.h b/arm_compute/core/utils/logging/IPrinter.h
index 42dca58ea1..7fde4d9302 100644
--- a/arm_compute/core/utils/logging/IPrinter.h
+++ b/arm_compute/core/utils/logging/IPrinter.h
@@ -35,8 +35,7 @@ class Printer
 {
 public:
     /** Default Constructor */
-    Printer() noexcept
-        : _mtx()
+    Printer() noexcept : _mtx()
     {
     }
     /** Prevent instances of this class from being copied */
diff --git a/arm_compute/core/utils/logging/LogMsgDecorators.h b/arm_compute/core/utils/logging/LogMsgDecorators.h
index 9c9e62740f..66a8180e21 100644
--- a/arm_compute/core/utils/logging/LogMsgDecorators.h
+++ b/arm_compute/core/utils/logging/LogMsgDecorators.h
@@ -63,8 +63,7 @@ public:
      *
      * @param str Sting to append
      */
-    StringDecorator(const std::string &str)
-        : _str(str)
+    StringDecorator(const std::string &str) : _str(str)
     {
         _str = angle_wrap_value(str);
     }
@@ -103,7 +102,7 @@ private:
         auto time = std::chrono::system_clock::to_time_t(now);
 
         // TODO: use put_time for gcc > 4.9
-        char buf[100] = { 0 };
+        char buf[100] = {0};
         std::strftime(buf, sizeof(buf), "%d-%m-%Y %I:%M:%S", std::localtime(&time));
         return buf;
     }
diff --git a/arm_compute/core/utils/logging/Logger.h b/arm_compute/core/utils/logging/Logger.h
index 4fc9bb7dbf..608db39138 100644
--- a/arm_compute/core/utils/logging/Logger.h
+++ b/arm_compute/core/utils/logging/Logger.h
@@ -88,7 +88,7 @@ public:
      * @param[in] args      Message arguments
      */
     template <typename... Ts>
-    void log(LogLevel log_level, const std::string &fmt, Ts &&... args);
+    void log(LogLevel log_level, const std::string &fmt, Ts &&...args);
     /** Sets log level of the logger
      *
      * @warning Not thread-safe
@@ -159,11 +159,11 @@ private:
 };
 
 template <typename... Ts>
-inline void Logger::log(LogLevel log_level, const std::string &fmt, Ts &&... args)
+inline void Logger::log(LogLevel log_level, const std::string &fmt, Ts &&...args)
 {
     // Return if message shouldn't be logged
     // i.e. if log level does not match the logger's
-    if(!is_loggable(log_level))
+    if (!is_loggable(log_level))
     {
         return;
     }
diff --git a/arm_compute/core/utils/logging/LoggerRegistry.h b/arm_compute/core/utils/logging/LoggerRegistry.h
index 7c9931a260..4e52a10935 100644
--- a/arm_compute/core/utils/logging/LoggerRegistry.h
+++ b/arm_compute/core/utils/logging/LoggerRegistry.h
@@ -27,6 +27,7 @@
 #include "arm_compute/core/utils/logging/Logger.h"
 #include "arm_compute/core/utils/logging/Printers.h"
 #include "arm_compute/core/utils/logging/Types.h"
+
 #include "support/Mutex.h"
 
 #include <memory>
@@ -54,8 +55,9 @@ public:
      * @param[in] log_level Logger's log level. Defaults to INFO
      * @param[in] printers  Printers to attach to the system loggers. Defaults with a @ref StdPrinter.
      */
-    void create_logger(const std::string &name, LogLevel log_level = LogLevel::INFO,
-                       const std::vector<std::shared_ptr<Printer>> &printers = { std::make_shared<StdPrinter>() });
+    void create_logger(const std::string                           &name,
+                       LogLevel                                     log_level = LogLevel::INFO,
+                       const std::vector<std::shared_ptr<Printer>> &printers  = {std::make_shared<StdPrinter>()});
     /** Remove a logger
      *
      * @param name Logger's name
@@ -74,16 +76,17 @@ public:
      * @param[in] printers  (Optional) Printers to attach to the system loggers. Defaults with a @ref StdPrinter.
      */
     void create_reserved_loggers(LogLevel                                     log_level = LogLevel::INFO,
-                                 const std::vector<std::shared_ptr<Printer>> &printers  = { std::make_shared<StdPrinter>() });
+                                 const std::vector<std::shared_ptr<Printer>> &printers  = {
+                                      std::make_shared<StdPrinter>()});
 
 private:
     /** Default constructor */
     LoggerRegistry();
 
 private:
-    arm_compute::Mutex _mtx;
+    arm_compute::Mutex                                       _mtx;
     std::unordered_map<std::string, std::shared_ptr<Logger>> _loggers;
-    static std::set<std::string> _reserved_loggers;
+    static std::set<std::string>                             _reserved_loggers;
 };
 } // namespace logging
 } // namespace arm_compute
diff --git a/arm_compute/core/utils/logging/Macros.h b/arm_compute/core/utils/logging/Macros.h
index 0ab17c4464..4d5aa5fe2c 100644
--- a/arm_compute/core/utils/logging/Macros.h
+++ b/arm_compute/core/utils/logging/Macros.h
@@ -48,48 +48,48 @@ inline std::string signature_name(const std::string &pretty_func)
     do                                                                                   \
     {                                                                                    \
         auto __logger = arm_compute::logging::LoggerRegistry::get().logger(logger_name); \
-        if(__logger != nullptr)                                                          \
+        if (__logger != nullptr)                                                         \
         {                                                                                \
             __logger->log(log_level, msg);                                               \
         }                                                                                \
-    } while(false)
+    } while (false)
 
 #define ARM_COMPUTE_LOG_MSG_WITH_FUNCNAME(logger_name, log_level, msg)                   \
     do                                                                                   \
     {                                                                                    \
         auto __logger = arm_compute::logging::LoggerRegistry::get().logger(logger_name); \
-        if(__logger != nullptr)                                                          \
+        if (__logger != nullptr)                                                         \
         {                                                                                \
             std::ostringstream s;                                                        \
             s << ARM_COMPUTE_SIGNATURE_NAME << " : " << msg;                             \
             __logger->log(log_level, s.str());                                           \
         }                                                                                \
-    } while(false)
+    } while (false)
 
 #define ARM_COMPUTE_LOG_MSG_WITH_FORMAT(logger_name, log_level, fmt, ...)                     \
     do                                                                                        \
     {                                                                                         \
         auto __logger = arm_compute::logging::LoggerRegistry::get().logger(logger_name);      \
-        if(__logger != nullptr)                                                               \
+        if (__logger != nullptr)                                                              \
         {                                                                                     \
             size_t size     = ::snprintf(nullptr, 0, fmt, __VA_ARGS__) + 1;                   \
             auto   char_str = std::make_unique<char[]>(size);                                 \
             ::snprintf(char_str.get(), size, fmt, __VA_ARGS__);                               \
             __logger->log(log_level, std::string(char_str.get(), char_str.get() + size - 1)); \
         }                                                                                     \
-    } while(false)
+    } while (false)
 
 #define ARM_COMPUTE_LOG_STREAM(logger_name, log_level, stream)                           \
     do                                                                                   \
     {                                                                                    \
         auto __logger = arm_compute::logging::LoggerRegistry::get().logger(logger_name); \
-        if(__logger != nullptr)                                                          \
+        if (__logger != nullptr)                                                         \
         {                                                                                \
             std::ostringstream s;                                                        \
             s << stream;                                                                 \
             __logger->log(log_level, s.str());                                           \
         }                                                                                \
-    } while(false)
+    } while (false)
 
 #else /* ARM_COMPUTE_LOGGING_ENABLED */
 
diff --git a/arm_compute/core/utils/logging/Types.h b/arm_compute/core/utils/logging/Types.h
index f0ddae6c84..64c567b984 100644
--- a/arm_compute/core/utils/logging/Types.h
+++ b/arm_compute/core/utils/logging/Types.h
@@ -44,8 +44,7 @@ enum class LogLevel
 struct LogMsg
 {
     /** Default constructor */
-    LogMsg()
-        : raw_(), log_level_(LogLevel::OFF)
+    LogMsg() : raw_(), log_level_(LogLevel::OFF)
     {
     }
     /** Construct a log message
@@ -53,8 +52,7 @@ struct LogMsg
      * @param[in] msg       Message to log.
      * @param[in] log_level Logging level. Default: OFF
      */
-    LogMsg(std::string msg, LogLevel log_level = LogLevel::OFF)
-        : raw_(msg), log_level_(log_level)
+    LogMsg(std::string msg, LogLevel log_level = LogLevel::OFF) : raw_(msg), log_level_(log_level)
     {
     }
 
diff --git a/arm_compute/core/utils/math/Math.h b/arm_compute/core/utils/math/Math.h
index c1dce7ff08..e70337ba0f 100644
--- a/arm_compute/core/utils/math/Math.h
+++ b/arm_compute/core/utils/math/Math.h
@@ -67,5 +67,5 @@ inline auto floor_to_multiple(S value, T divisor) -> decltype((value / divisor)
     return (value / divisor) * divisor;
 }
 
-}
+} // namespace arm_compute
 #endif /*ARM_COMPUTE_UTILS_MATH_H */
diff --git a/arm_compute/core/utils/math/SafeOps.h b/arm_compute/core/utils/math/SafeOps.h
index dc928a0e5d..ef8bcf7e14 100644
--- a/arm_compute/core/utils/math/SafeOps.h
+++ b/arm_compute/core/utils/math/SafeOps.h
@@ -25,6 +25,7 @@
 #define ARM_COMPUTE_UTILS_MATH_SAFE_OPS
 
 #include "arm_compute/core/Error.h"
+
 #include "support/AclRequires.h"
 
 #include <limits>
@@ -51,11 +52,11 @@ T safe_integer_add(T val_a, T val_b)
 {
     T result = 0;
 
-    if((val_b > 0) && (val_a > std::numeric_limits<T>::max() - val_b))
+    if ((val_b > 0) && (val_a > std::numeric_limits<T>::max() - val_b))
     {
         result = std::numeric_limits<T>::max();
     }
-    else if((val_b < 0) && (val_a < std::numeric_limits<T>::min() - val_b))
+    else if ((val_b < 0) && (val_a < std::numeric_limits<T>::min() - val_b))
     {
         result = std::numeric_limits<T>::min();
     }
@@ -83,11 +84,11 @@ T safe_integer_sub(T val_a, T val_b)
 {
     T result = 0;
 
-    if((val_b < 0) && (val_a > std::numeric_limits<T>::max() + val_b))
+    if ((val_b < 0) && (val_a > std::numeric_limits<T>::max() + val_b))
     {
         result = std::numeric_limits<T>::max();
     }
-    else if((val_b > 0) && (val_a < std::numeric_limits<T>::min() + val_b))
+    else if ((val_b > 0) && (val_a < std::numeric_limits<T>::min() + val_b))
     {
         result = std::numeric_limits<T>::min();
     }
@@ -115,13 +116,13 @@ T safe_integer_mul(T val_a, T val_b)
 {
     T result = 0;
 
-    if(val_a > 0)
+    if (val_a > 0)
     {
-        if((val_b > 0) && (val_a > (std::numeric_limits<T>::max() / val_b)))
+        if ((val_b > 0) && (val_a > (std::numeric_limits<T>::max() / val_b)))
         {
             result = std::numeric_limits<T>::max();
         }
-        else if(val_b < (std::numeric_limits<T>::min() / val_a))
+        else if (val_b < (std::numeric_limits<T>::min() / val_a))
         {
             result = std::numeric_limits<T>::min();
         }
@@ -132,11 +133,11 @@ T safe_integer_mul(T val_a, T val_b)
     }
     else
     {
-        if((val_b > 0) && (val_a < (std::numeric_limits<T>::min() / val_b)))
+        if ((val_b > 0) && (val_a < (std::numeric_limits<T>::min() / val_b)))
         {
             result = std::numeric_limits<T>::max();
         }
-        else if((val_a != 0) && (val_b < (std::numeric_limits<T>::max() / val_a)))
+        else if ((val_a != 0) && (val_b < (std::numeric_limits<T>::max() / val_a)))
         {
             result = std::numeric_limits<T>::min();
         }
@@ -165,7 +166,7 @@ T safe_integer_div(T val_a, T val_b)
 {
     T result = 0;
 
-    if((val_b == 0) || ((val_a == std::numeric_limits<T>::min()) && (val_b == -1)))
+    if ((val_b == 0) || ((val_a == std::numeric_limits<T>::min()) && (val_b == -1)))
     {
         result = std::numeric_limits<T>::min();
     }
@@ -176,7 +177,7 @@ T safe_integer_div(T val_a, T val_b)
 
     return result;
 }
-} // namespace cast
+} // namespace math
 } // namespace utils
 } // namespace arm_compute
 #endif /* ARM_COMPUTE_UTILS_MATH_SAFE_OPS */
diff --git a/arm_compute/core/utils/misc/InfoHelpers.h b/arm_compute/core/utils/misc/InfoHelpers.h
index ced0d24b56..1d1b4ea8d7 100644
--- a/arm_compute/core/utils/misc/InfoHelpers.h
+++ b/arm_compute/core/utils/misc/InfoHelpers.h
@@ -53,10 +53,12 @@ inline bool is_relu(ActivationLayerInfo activation_info)
  */
 inline bool is_relu6(ActivationLayerInfo activation_info)
 {
-    const bool is_lu_bounded_relu = activation_info.activation() == ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU
-                                    && activation_info.a() == 6.f && activation_info.b() == 0.f;
-    const bool is_bounded_relu = activation_info.activation() == ActivationLayerInfo::ActivationFunction::BOUNDED_RELU
-                                 && activation_info.a() == 6.f;
+    const bool is_lu_bounded_relu =
+        activation_info.activation() == ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU &&
+        activation_info.a() == 6.f && activation_info.b() == 0.f;
+    const bool is_bounded_relu =
+        activation_info.activation() == ActivationLayerInfo::ActivationFunction::BOUNDED_RELU &&
+        activation_info.a() == 6.f;
     return activation_info.enabled() && (is_lu_bounded_relu || is_bounded_relu);
 }
 
@@ -68,34 +70,37 @@ inline bool is_relu6(ActivationLayerInfo activation_info)
  *
  */
 template <typename T>
-inline void build_lstm_params_tensor_info(const LSTMParams<T>     &lstm_params,
-                                          LSTMParams<ITensorInfo> *lstm_params_info)
+inline void build_lstm_params_tensor_info(const LSTMParams<T> &lstm_params, LSTMParams<ITensorInfo> *lstm_params_info)
 {
-    if(lstm_params.has_peephole_opt())
+    if (lstm_params.has_peephole_opt())
     {
         ARM_COMPUTE_ERROR_ON_NULLPTR(lstm_params.cell_to_forget_weights(), lstm_params.cell_to_output_weights());
-        lstm_params_info->set_peephole_params(lstm_params.cell_to_forget_weights()->info(), lstm_params.cell_to_output_weights()->info());
+        lstm_params_info->set_peephole_params(lstm_params.cell_to_forget_weights()->info(),
+                                              lstm_params.cell_to_output_weights()->info());
     }
-    if(lstm_params.has_projection())
+    if (lstm_params.has_projection())
     {
         ARM_COMPUTE_ERROR_ON_NULLPTR(lstm_params.projection_weights());
-        lstm_params_info->set_projection_params(lstm_params.projection_weights()->info(),
-                                                lstm_params.projection_bias() != nullptr ? lstm_params.projection_bias()->info() : nullptr);
+        lstm_params_info->set_projection_params(
+            lstm_params.projection_weights()->info(),
+            lstm_params.projection_bias() != nullptr ? lstm_params.projection_bias()->info() : nullptr);
     }
-    if(!lstm_params.has_cifg_opt())
+    if (!lstm_params.has_cifg_opt())
     {
-        ARM_COMPUTE_ERROR_ON_NULLPTR(lstm_params.input_to_input_weights(), lstm_params.recurrent_to_input_weights(), lstm_params.input_gate_bias());
+        ARM_COMPUTE_ERROR_ON_NULLPTR(lstm_params.input_to_input_weights(), lstm_params.recurrent_to_input_weights(),
+                                     lstm_params.input_gate_bias());
 
-        ITensorInfo *cell_to_input_weights_info = (lstm_params.has_peephole_opt()) ? lstm_params.cell_to_input_weights()->info() : nullptr;
-        lstm_params_info->set_cifg_params(lstm_params.input_to_input_weights()->info(), lstm_params.recurrent_to_input_weights()->info(),
-                                          cell_to_input_weights_info, lstm_params.input_gate_bias()->info());
+        ITensorInfo *cell_to_input_weights_info =
+            (lstm_params.has_peephole_opt()) ? lstm_params.cell_to_input_weights()->info() : nullptr;
+        lstm_params_info->set_cifg_params(lstm_params.input_to_input_weights()->info(),
+                                          lstm_params.recurrent_to_input_weights()->info(), cell_to_input_weights_info,
+                                          lstm_params.input_gate_bias()->info());
     }
-    if(lstm_params.use_layer_norm())
+    if (lstm_params.use_layer_norm())
     {
-        ARM_COMPUTE_ERROR_ON_NULLPTR(lstm_params.forget_layer_norm_weights(),
-                                     lstm_params.output_layer_norm_weights(),
+        ARM_COMPUTE_ERROR_ON_NULLPTR(lstm_params.forget_layer_norm_weights(), lstm_params.output_layer_norm_weights(),
                                      lstm_params.cell_layer_norm_weights());
-        if(!lstm_params.has_cifg_opt())
+        if (!lstm_params.has_cifg_opt())
         {
             ARM_COMPUTE_ERROR_ON_NULLPTR(lstm_params.input_layer_norm_weights());
         }
@@ -103,15 +108,14 @@ inline void build_lstm_params_tensor_info(const LSTMParams<T>     &lstm_params,
         ITensorInfo *forget_info = lstm_params.forget_layer_norm_weights()->info();
         ITensorInfo *cell_info   = lstm_params.cell_layer_norm_weights()->info();
         ITensorInfo *output_info = lstm_params.output_layer_norm_weights()->info();
-        ITensorInfo *input_info  = lstm_params.has_cifg_opt() ? nullptr : lstm_params.input_layer_norm_weights()->info();
+        ITensorInfo *input_info = lstm_params.has_cifg_opt() ? nullptr : lstm_params.input_layer_norm_weights()->info();
 
         lstm_params_info->set_layer_normalization_params(input_info, forget_info, cell_info, output_info);
     }
 
-    lstm_params_info->set_matmul_scale_params(lstm_params.input_intermediate_scale(),
-                                              lstm_params.forget_intermediate_scale(),
-                                              lstm_params.cell_intermediate_scale(),
-                                              lstm_params.output_intermediate_scale());
+    lstm_params_info->set_matmul_scale_params(
+        lstm_params.input_intermediate_scale(), lstm_params.forget_intermediate_scale(),
+        lstm_params.cell_intermediate_scale(), lstm_params.output_intermediate_scale());
 
     lstm_params_info->set_hidden_state_params(lstm_params.hidden_state_zero(), lstm_params.hidden_state_scale());
 }
diff --git a/arm_compute/core/utils/misc/Macros.h b/arm_compute/core/utils/misc/Macros.h
index de66b6a52f..fa861fa442 100644
--- a/arm_compute/core/utils/misc/Macros.h
+++ b/arm_compute/core/utils/misc/Macros.h
@@ -26,15 +26,16 @@
 
 #if defined(__cplusplus) && (__cplusplus >= 201402L)
 
-#define ARM_COMPUTE_DEPRECATED [[deprecated]]
-#define ARM_COMPUTE_DEPRECATED_REL(rel) [[deprecated("Deprecated in : " #rel)]]
+#define ARM_COMPUTE_DEPRECATED                           [[deprecated]]
+#define ARM_COMPUTE_DEPRECATED_REL(rel)                  [[deprecated("Deprecated in : " #rel)]]
 #define ARM_COMPUTE_DEPRECATED_REL_REPLACE(rel, replace) [[deprecated("Deprecated in : " #rel " - Use : " #replace)]]
 
 #elif defined(__GNUC__) || defined(__clang__)
 
-#define ARM_COMPUTE_DEPRECATED __attribute__((deprecated))
+#define ARM_COMPUTE_DEPRECATED          __attribute__((deprecated))
 #define ARM_COMPUTE_DEPRECATED_REL(rel) __attribute__((deprecated("Deprecated in : " #rel)))
-#define ARM_COMPUTE_DEPRECATED_REL_REPLACE(rel, replace) __attribute__((deprecated("Deprecated in : " #rel " - Use : " #replace)))
+#define ARM_COMPUTE_DEPRECATED_REL_REPLACE(rel, replace) \
+    __attribute__((deprecated("Deprecated in : " #rel " - Use : " #replace)))
 
 #else // defined(__cplusplus) && (__cplusplus >= 201402L)
 
diff --git a/arm_compute/core/utils/misc/ShapeCalculator.h b/arm_compute/core/utils/misc/ShapeCalculator.h
index 4c2037ab8d..31362f1ac4 100644
--- a/arm_compute/core/utils/misc/ShapeCalculator.h
+++ b/arm_compute/core/utils/misc/ShapeCalculator.h
@@ -28,11 +28,10 @@
 #include "arm_compute/core/ITensorInfo.h"
 #include "arm_compute/core/KernelDescriptors.h"
 #include "arm_compute/core/Utils.h"
+#include "arm_compute/core/utils/helpers/tensor_transform.h"
 #include "arm_compute/function_info/ConvolutionInfo.h"
 #include "arm_compute/runtime/FunctionDescriptors.h"
 
-#include "arm_compute/core/utils/helpers/tensor_transform.h"
-
 #include <cmath>
 
 namespace arm_compute
@@ -57,12 +56,12 @@ inline TensorShape calculate_reduce_mean_shape(ITensorInfo *input, const Coordin
     convert_negative_axis(axis_local, input_dims);
     TensorShape out_shape = input->tensor_shape();
     // Configure reshape layer if we want to drop the dimensions
-    if(!keep_dims)
+    if (!keep_dims)
     {
         // We have to sort the reduction axis vectors in order for remove_dimension
         // to work properly
         std::sort(axis_local.begin(), axis_local.begin() + reduction_ops);
-        for(int i = 0; i < reduction_ops; ++i)
+        for (int i = 0; i < reduction_ops; ++i)
         {
             out_shape.remove_dimension(axis_local[i] - i, false);
         }
@@ -70,7 +69,7 @@ inline TensorShape calculate_reduce_mean_shape(ITensorInfo *input, const Coordin
     }
     else
     {
-        for(int i = 0; i < reduction_ops; ++i)
+        for (int i = 0; i < reduction_ops; ++i)
         {
             out_shape.set(axis_local[i], 1);
         }
@@ -86,7 +85,10 @@ inline TensorShape calculate_reduce_mean_shape(ITensorInfo *input, const Coordin
  *
  * @return the calculated shape
  */
-inline TensorShape compute_vector_to_tensor_output_shape(const TensorShape &input, size_t conv_w, size_t conv_h, const DataLayout &data_layout)
+inline TensorShape compute_vector_to_tensor_output_shape(const TensorShape &input,
+                                                         size_t             conv_w,
+                                                         size_t             conv_h,
+                                                         const DataLayout  &data_layout)
 {
     const size_t idx_w = get_data_layout_dimension_index(data_layout, DataLayoutDimension::WIDTH);
     const size_t idx_h = get_data_layout_dimension_index(data_layout, DataLayoutDimension::HEIGHT);
@@ -128,10 +130,12 @@ inline TensorShape compute_reorg_output_shape(const ITensorInfo &input, int32_t
     const size_t idx_channel = get_data_layout_dimension_index(input.data_layout(), DataLayoutDimension::CHANNEL);
 
     ARM_COMPUTE_ERROR_ON(stride <= 0);
-    ARM_COMPUTE_ERROR_ON_MSG((input.tensor_shape()[idx_width] % stride != 0), "The width of the input tensor must be a multiple of stride");
-    ARM_COMPUTE_ERROR_ON_MSG((input.tensor_shape()[idx_height] % stride != 0), "The height of the input tensor must be a multiple of stride");
+    ARM_COMPUTE_ERROR_ON_MSG((input.tensor_shape()[idx_width] % stride != 0),
+                             "The width of the input tensor must be a multiple of stride");
+    ARM_COMPUTE_ERROR_ON_MSG((input.tensor_shape()[idx_height] % stride != 0),
+                             "The height of the input tensor must be a multiple of stride");
 
-    TensorShape output_shape{ input.tensor_shape() };
+    TensorShape output_shape{input.tensor_shape()};
 
     output_shape.set(idx_width, output_shape[idx_width] / stride);
     output_shape.set(idx_height, output_shape[idx_height] / stride);
@@ -148,7 +152,8 @@ inline TensorShape compute_reorg_output_shape(const ITensorInfo &input, int32_t
  *
  * @return the calculated shape of the reshaped weights
  */
-inline TensorShape compute_weights_reshaped_shape(const ITensorInfo &weights, bool has_bias = false, unsigned int num_groups = 1)
+inline TensorShape
+compute_weights_reshaped_shape(const ITensorInfo &weights, bool has_bias = false, unsigned int num_groups = 1)
 {
     // Number of groups greater than one are only supported for NCHW data layout, and the number of weights must be a multiple of it.
     ARM_COMPUTE_ERROR_ON(num_groups == 0);
@@ -156,14 +161,14 @@ inline TensorShape compute_weights_reshaped_shape(const ITensorInfo &weights, bo
     ARM_COMPUTE_ERROR_ON((weights.dimension(3) % num_groups) != 0);
 
     // Calculate output shape
-    TensorShape weights_reshaped{ weights.tensor_shape() };
+    TensorShape weights_reshaped{weights.tensor_shape()};
     weights_reshaped.set(3, weights_reshaped[3] / num_groups);
 
     weights_reshaped.collapse(3);
     const size_t tmp_dim = weights_reshaped[0];
     weights_reshaped.set(0, weights_reshaped[1]);
     weights_reshaped.set(1, tmp_dim + (has_bias ? 1 : 0));
-    if(weights.num_dimensions() < 5)
+    if (weights.num_dimensions() < 5)
     {
         weights_reshaped.set(2, num_groups);
     }
@@ -179,7 +184,9 @@ inline TensorShape compute_weights_reshaped_shape(const ITensorInfo &weights, bo
  *
  * @return the calculated shape
  */
-inline TensorShape compute_lhs_reshaped_shape(const ITensorInfo &a, const GEMMLHSMatrixInfo &lhs_info, bool reinterpret_input_as_3d = false)
+inline TensorShape compute_lhs_reshaped_shape(const ITensorInfo       &a,
+                                              const GEMMLHSMatrixInfo &lhs_info,
+                                              bool                     reinterpret_input_as_3d = false)
 {
     ARM_COMPUTE_ERROR_ON(lhs_info.m0 == 0);
     ARM_COMPUTE_ERROR_ON(lhs_info.k0 == 0);
@@ -200,11 +207,11 @@ inline TensorShape compute_lhs_reshaped_shape(const ITensorInfo &a, const GEMMLH
     const unsigned int output_width  = block_size * num_horiz_blocks * lhs_info.v0;
     const unsigned int output_height = std::ceil(num_vert_blocks / static_cast<float>(lhs_info.v0));
 
-    TensorShape lhs_shape{ a.tensor_shape() };
+    TensorShape lhs_shape{a.tensor_shape()};
     lhs_shape.set(0, output_width);
     lhs_shape.set(1, output_height);
 
-    if((reinterpret_input_as_3d) && (lhs_shape.num_dimensions() > 2))
+    if ((reinterpret_input_as_3d) && (lhs_shape.num_dimensions() > 2))
     {
         // When the data format is NHWC and the shapes are Nx1x1
         // the tensor shape num_dimensions is automatically set to 1 instead of 3.
@@ -244,7 +251,7 @@ inline TensorShape compute_rhs_reshaped_shape(const ITensorInfo &a, const GEMMRH
     const unsigned int output_width  = block_size * num_vert_blocks * rhs_info.h0;
     const unsigned int output_height = std::ceil(num_horiz_blocks / static_cast<float>(rhs_info.h0));
 
-    TensorShape rhs_shape{ a.tensor_shape() };
+    TensorShape rhs_shape{a.tensor_shape()};
     rhs_shape.set(0, output_width);
     rhs_shape.set(1, output_height);
 
@@ -259,14 +266,15 @@ inline TensorShape compute_rhs_reshaped_shape(const ITensorInfo &a, const GEMMRH
  *
  * @return the calculated shape
  */
-inline TensorShape compute_interleaved_shape(const ITensorInfo &a, int mult_interleave4x4_height = 1, bool reinterpret_input_as_3d = false)
+inline TensorShape
+compute_interleaved_shape(const ITensorInfo &a, int mult_interleave4x4_height = 1, bool reinterpret_input_as_3d = false)
 {
     // The interleaved output matrix will have the following shape: [ a_height * W, ceil(a_width / W) ] where W = 4 * mult_interleave4x4_height
     ARM_COMPUTE_ERROR_ON(mult_interleave4x4_height < 1);
     const int   interleave_width = 4 * mult_interleave4x4_height;
-    TensorShape shape_interleaved_a{ a.tensor_shape() };
+    TensorShape shape_interleaved_a{a.tensor_shape()};
     shape_interleaved_a.set(0, a.dimension(0) * interleave_width);
-    if(reinterpret_input_as_3d)
+    if (reinterpret_input_as_3d)
     {
         const int M      = a.dimension(1) * a.dimension(2);
         const int height = std::ceil(M / static_cast<float>(interleave_width));
@@ -276,7 +284,7 @@ inline TensorShape compute_interleaved_shape(const ITensorInfo &a, int mult_inte
         // the tensor shape num_dimensions is automatically set to 1 instead of 3.
         // To avoid failures by removing a dimension that doesn't exist
         // check if the number of dimensions is greater than 2.
-        if(shape_interleaved_a.num_dimensions() > 2)
+        if (shape_interleaved_a.num_dimensions() > 2)
         {
             shape_interleaved_a.remove_dimension(2);
         }
@@ -298,7 +306,7 @@ inline TensorShape compute_interleaved_shape(const ITensorInfo &a, int mult_inte
 inline TensorShape compute_transpose1xW_shape(const ITensorInfo &b)
 {
     // The transpose1xW output matrix will have the following shape: [ b_height * 16, ceil(b_width / 16.0f) ]
-    TensorShape shape_transposed1xW_b{ b.tensor_shape() };
+    TensorShape shape_transposed1xW_b{b.tensor_shape()};
     shape_transposed1xW_b.set(0, b.dimension(1) * 16);
     shape_transposed1xW_b.set(1, std::ceil(b.dimension(0) / 16.f));
 
@@ -318,7 +326,7 @@ inline TensorShape compute_transpose1xW_with_element_size_shape(const ITensorInf
     //       The transpose1xW output matrix will have the following shape:
     //       [ b_height * W, ceil(b_width / W) ] where W = (16 / element size of the tensor) * mult_transpose1xW_width
     ARM_COMPUTE_ERROR_ON(mult_transpose1xW_width < 1);
-    TensorShape  shape_transposed1xW_b{ b.tensor_shape() };
+    TensorShape  shape_transposed1xW_b{b.tensor_shape()};
     const size_t transpose_width = (16 / b.element_size()) * mult_transpose1xW_width;
     shape_transposed1xW_b.set(0, b.dimension(1) * transpose_width);
     shape_transposed1xW_b.set(1, static_cast<size_t>(std::ceil(b.dimension(0) / static_cast<float>(transpose_width))));
@@ -334,8 +342,8 @@ inline TensorShape compute_transpose1xW_with_element_size_shape(const ITensorInf
  */
 inline TensorShape compute_reductionA_shape(const ITensorInfo &b)
 {
-    TensorShape shape_vector_sum_col{ b.tensor_shape() };
-    if(shape_vector_sum_col.num_dimensions() > 1)
+    TensorShape shape_vector_sum_col{b.tensor_shape()};
+    if (shape_vector_sum_col.num_dimensions() > 1)
     {
         shape_vector_sum_col.remove_dimension(1);
     }
@@ -351,9 +359,9 @@ inline TensorShape compute_reductionA_shape(const ITensorInfo &b)
  */
 inline TensorShape compute_reductionB_shape(const ITensorInfo &a)
 {
-    TensorShape shape_vector_sum_row{ a.tensor_shape() };
+    TensorShape shape_vector_sum_row{a.tensor_shape()};
     shape_vector_sum_row.set(Window::DimX, a.dimension(1));
-    if(shape_vector_sum_row.num_dimensions() > 1)
+    if (shape_vector_sum_row.num_dimensions() > 1)
     {
         shape_vector_sum_row.remove_dimension(1);
     }
@@ -370,7 +378,10 @@ inline TensorShape compute_reductionB_shape(const ITensorInfo &a)
  *
  * @return the calculated shape
  */
-inline TensorShape compute_col2im_shape(const ITensorInfo &input, const Size2D &convolved_dims, bool batch_size_on_z, unsigned int num_groups = 1)
+inline TensorShape compute_col2im_shape(const ITensorInfo &input,
+                                        const Size2D      &convolved_dims,
+                                        bool               batch_size_on_z,
+                                        unsigned int       num_groups = 1)
 {
     ARM_COMPUTE_ERROR_ON(num_groups == 0);
     ARM_COMPUTE_ERROR_ON(input.tensor_shape()[1] != (convolved_dims.area()));
@@ -381,10 +392,10 @@ inline TensorShape compute_col2im_shape(const ITensorInfo &input, const Size2D &
     const int        height_idx  = get_data_layout_dimension_index(data_layout, DataLayoutDimension::HEIGHT);
     const int        channel_idx = get_data_layout_dimension_index(data_layout, DataLayoutDimension::CHANNEL);
 
-    TensorShape col2im_shape{ input.tensor_shape() };
+    TensorShape col2im_shape{input.tensor_shape()};
     // If batches start on 3rd dimension shift dimensions right by 1 to retain upper tensor shape,
     // as first three will be override by H,W,C data
-    if(batch_size_on_z && num_groups == 1)
+    if (batch_size_on_z && num_groups == 1)
     {
         col2im_shape.shift_right(1);
     }
@@ -403,7 +414,7 @@ inline TensorShape compute_col2im_shape(const ITensorInfo &input, const Size2D &
  */
 inline TensorShape compute_transposed_shape(const ITensorInfo &input)
 {
-    TensorShape shape_transposed{ input.tensor_shape() };
+    TensorShape shape_transposed{input.tensor_shape()};
 
     shape_transposed.set(0, input.dimension(1), false);
     shape_transposed.set(1, input.dimension(0), false);
@@ -419,10 +430,11 @@ inline TensorShape compute_transposed_shape(const ITensorInfo &input)
  *
  * @return the calculated shape
  */
-inline TensorShape compute_depthwise_convolution_shape(const ITensorInfo &input, const ITensorInfo &weights, const ConvolutionInfo &info)
+inline TensorShape
+compute_depthwise_convolution_shape(const ITensorInfo &input, const ITensorInfo &weights, const ConvolutionInfo &info)
 {
-    const TensorShape input_shape{ input.tensor_shape() };
-    const TensorShape weights_shape{ weights.tensor_shape() };
+    const TensorShape input_shape{input.tensor_shape()};
+    const TensorShape weights_shape{weights.tensor_shape()};
 
     const DataLayout data_layout = input.data_layout();
     const int        width_idx   = get_data_layout_dimension_index(data_layout, DataLayoutDimension::WIDTH);
@@ -430,16 +442,16 @@ inline TensorShape compute_depthwise_convolution_shape(const ITensorInfo &input,
     const int        channel_idx = get_data_layout_dimension_index(data_layout, DataLayoutDimension::CHANNEL);
 
     const DataLayout weights_data_layout = weights.data_layout();
-    const int        weights_width_idx   = get_data_layout_dimension_index(weights_data_layout, DataLayoutDimension::WIDTH);
-    const int        weights_height_idx  = get_data_layout_dimension_index(weights_data_layout, DataLayoutDimension::HEIGHT);
+    const int weights_width_idx  = get_data_layout_dimension_index(weights_data_layout, DataLayoutDimension::WIDTH);
+    const int weights_height_idx = get_data_layout_dimension_index(weights_data_layout, DataLayoutDimension::HEIGHT);
 
     unsigned int output_width  = 0;
     unsigned int output_height = 0;
-    std::tie(output_width, output_height) = scaled_dimensions(input_shape[width_idx], input_shape[height_idx],
-                                                              weights_shape[weights_width_idx], weights_shape[weights_height_idx],
-                                                              info.pad_stride_info, info.dilation);
+    std::tie(output_width, output_height) =
+        scaled_dimensions(input_shape[width_idx], input_shape[height_idx], weights_shape[weights_width_idx],
+                          weights_shape[weights_height_idx], info.pad_stride_info, info.dilation);
 
-    TensorShape output_shape{ input_shape };
+    TensorShape output_shape{input_shape};
     output_shape.set(width_idx, output_width);
     output_shape.set(height_idx, output_height);
     output_shape.set(channel_idx, input_shape[channel_idx] * info.depth_multiplier);
@@ -459,8 +471,13 @@ inline TensorShape compute_depthwise_convolution_shape(const ITensorInfo &input,
  *
  * @return the calculated shape
  */
-inline TensorShape compute_deconvolution_upsampled_shape(const ITensorInfo &input, const ITensorInfo &weights, unsigned int sx, unsigned int sy,
-                                                         std::pair<unsigned int, unsigned int> &out_dims, uint32_t &padx, uint32_t &pady)
+inline TensorShape compute_deconvolution_upsampled_shape(const ITensorInfo                     &input,
+                                                         const ITensorInfo                     &weights,
+                                                         unsigned int                           sx,
+                                                         unsigned int                           sy,
+                                                         std::pair<unsigned int, unsigned int> &out_dims,
+                                                         uint32_t                              &padx,
+                                                         uint32_t                              &pady)
 {
     const DataLayout data_layout = input.data_layout();
     const size_t     idx_w       = get_data_layout_dimension_index(data_layout, DataLayoutDimension::WIDTH);
@@ -491,10 +508,12 @@ inline TensorShape compute_deconvolution_upsampled_shape(const ITensorInfo &inpu
  *
  * @return the calculated shape
  */
-inline TensorShape compute_deconvolution_output_shape(const std::pair<unsigned int, unsigned int> &out_dims, const ITensorInfo &input, const ITensorInfo &weights)
+inline TensorShape compute_deconvolution_output_shape(const std::pair<unsigned int, unsigned int> &out_dims,
+                                                      const ITensorInfo                           &input,
+                                                      const ITensorInfo                           &weights)
 {
-    const TensorShape input_shape{ input.tensor_shape() };
-    const TensorShape weights_shape{ weights.tensor_shape() };
+    const TensorShape input_shape{input.tensor_shape()};
+    const TensorShape weights_shape{weights.tensor_shape()};
 
     const DataLayout data_layout = input.data_layout();
     const int        width_idx   = get_data_layout_dimension_index(data_layout, DataLayoutDimension::WIDTH);
@@ -502,7 +521,7 @@ inline TensorShape compute_deconvolution_output_shape(const std::pair<unsigned i
     const int        channel_idx = get_data_layout_dimension_index(data_layout, DataLayoutDimension::CHANNEL);
     const int        batch_idx   = get_data_layout_dimension_index(data_layout, DataLayoutDimension::BATCHES);
 
-    TensorShape out_shape{ input_shape };
+    TensorShape out_shape{input_shape};
     out_shape.set(width_idx, out_dims.first);
     out_shape.set(height_idx, out_dims.second);
     out_shape.set(channel_idx, weights_shape[batch_idx]);
@@ -522,8 +541,14 @@ inline TensorShape compute_deconvolution_output_shape(const std::pair<unsigned i
  *
  * @return the calculated shape
  */
-inline TensorShape compute_im2col_conv_shape(const ITensorInfo *input, const Size2D &kernel_dims, const PadStrideInfo &conv_info, bool has_bias, const Size2D &dilation, bool batch_size_on_z,
-                                             unsigned int num_groups = 1, unsigned int input_pad_right = 0)
+inline TensorShape compute_im2col_conv_shape(const ITensorInfo   *input,
+                                             const Size2D        &kernel_dims,
+                                             const PadStrideInfo &conv_info,
+                                             bool                 has_bias,
+                                             const Size2D        &dilation,
+                                             bool                 batch_size_on_z,
+                                             unsigned int         num_groups      = 1,
+                                             unsigned int         input_pad_right = 0)
 {
     // The output shape will be the 3D shape [ out_channels * kernel_area, num_elems_per_out_channel, batches ]                           if batch_size_on_z == true
     //                       or the 4D shape [ out_channels * kernel_area / num_groups, num_elems_per_out_channel, num_groups, batches ]  if batch_size_on_z == false
@@ -532,17 +557,19 @@ inline TensorShape compute_im2col_conv_shape(const ITensorInfo *input, const Siz
     ARM_COMPUTE_ERROR_ON(num_groups > 1 && input->data_layout() != DataLayout::NCHW);
     ARM_COMPUTE_ERROR_ON(num_groups > 1 && batch_size_on_z);
 
-    TensorShape output_shape{ input->tensor_shape() };
+    TensorShape output_shape{input->tensor_shape()};
 
     const DataLayout data_layout = input->data_layout();
     const int        width_idx   = get_data_layout_dimension_index(data_layout, DataLayoutDimension::WIDTH);
     const int        height_idx  = get_data_layout_dimension_index(data_layout, DataLayoutDimension::HEIGHT);
     const int        channel_idx = get_data_layout_dimension_index(data_layout, DataLayoutDimension::CHANNEL);
 
-    std::pair<unsigned int, unsigned int> out_dims = scaled_dimensions(output_shape[width_idx], output_shape[height_idx], kernel_dims.width, kernel_dims.height, conv_info, dilation);
-    output_shape.set(0, ((output_shape[channel_idx] + input_pad_right) / num_groups * kernel_dims.area() + (has_bias ? 1 : 0))); // NOLINT
+    std::pair<unsigned int, unsigned int> out_dims = scaled_dimensions(
+        output_shape[width_idx], output_shape[height_idx], kernel_dims.width, kernel_dims.height, conv_info, dilation);
+    output_shape.set(0, ((output_shape[channel_idx] + input_pad_right) / num_groups * kernel_dims.area() +
+                         (has_bias ? 1 : 0))); // NOLINT
     output_shape.set(1, (out_dims.first * out_dims.second));
-    if(batch_size_on_z && output_shape.num_dimensions() >= 3)
+    if (batch_size_on_z && output_shape.num_dimensions() >= 3)
     {
         output_shape.remove_dimension(2);
     }
@@ -564,7 +591,7 @@ inline TensorShape compute_flatten_shape(const ITensorInfo *input)
 {
     // The output shape will be the flatten version of the input (i.e. [ width * height * channels, num_batches, ... ] ). Used for FlattenLayer and FullyConnectedLayer.
 
-    TensorShape output_shape{ input->tensor_shape() };
+    TensorShape output_shape{input->tensor_shape()};
 
     output_shape.collapse(3);
 
@@ -586,7 +613,7 @@ inline TensorShape compute_softmax_shape(const ITensorInfo *input, size_t axis =
     // - [x,y,z,w] and axis 3 will return [x*y*z, w]
     TensorShape shape2D = input->tensor_shape();
 
-    if(axis < input->num_dimensions())
+    if (axis < input->num_dimensions())
     {
         // Collapse from axis onward (this changes the shape)
         shape2D.collapse_from(axis);
@@ -600,7 +627,7 @@ inline TensorShape compute_softmax_shape(const ITensorInfo *input, size_t axis =
         shape2D.collapse(shape2D.num_dimensions());
     }
 
-    if(axis == 0)
+    if (axis == 0)
     {
         // If axis is zero the first dim should be one. Since
         // collapse is an inclusive operation we need to shift
@@ -619,15 +646,17 @@ inline TensorShape compute_softmax_shape(const ITensorInfo *input, size_t axis =
  */
 inline TensorShape compute_winograd_filter_transform_shape(const ITensorInfo &input, const WinogradInfo &winograd_info)
 {
-    TensorShape tensor_shape{ input.tensor_shape() };
+    TensorShape tensor_shape{input.tensor_shape()};
 
     const Size2D kernel_size      = winograd_info.kernel_size;
     const Size2D output_tile_size = winograd_info.output_tile_size;
-    const Size2D input_tile_size  = Size2D(output_tile_size.width + kernel_size.width - 1, output_tile_size.height + kernel_size.height - 1);
+    const Size2D input_tile_size =
+        Size2D(output_tile_size.width + kernel_size.width - 1, output_tile_size.height + kernel_size.height - 1);
 
     tensor_shape.remove_dimension(get_data_layout_dimension_index(input.data_layout(), DataLayoutDimension::WIDTH));
     tensor_shape.set(Window::DimX, input.dimension(3));
-    tensor_shape.set(Window::DimY, input.dimension(get_data_layout_dimension_index(input.data_layout(), DataLayoutDimension::CHANNEL)));
+    tensor_shape.set(Window::DimY, input.dimension(get_data_layout_dimension_index(input.data_layout(),
+                                                                                   DataLayoutDimension::CHANNEL)));
     tensor_shape.set(Window::DimZ, input_tile_size.area());
 
     return tensor_shape;
@@ -645,23 +674,22 @@ inline TensorShape compute_winograd_input_transform_shape(const ITensorInfo &inp
     const PadStrideInfo conv_info        = winograd_info.convolution_info;
     const Size2D        kernel_size      = winograd_info.kernel_size;
     const Size2D        output_tile_size = winograd_info.output_tile_size;
-    const Size2D        input_tile_size  = Size2D(output_tile_size.width + kernel_size.width - 1, output_tile_size.height + kernel_size.height - 1);
+    const Size2D        input_tile_size =
+        Size2D(output_tile_size.width + kernel_size.width - 1, output_tile_size.height + kernel_size.height - 1);
 
     const size_t idx_w = get_data_layout_dimension_index(input.data_layout(), DataLayoutDimension::WIDTH);
     const size_t idx_h = get_data_layout_dimension_index(input.data_layout(), DataLayoutDimension::HEIGHT);
     const size_t idx_c = get_data_layout_dimension_index(input.data_layout(), DataLayoutDimension::CHANNEL);
 
     // Compute the number of output tiles along the x and y direction of size "output_tile_size"
-    const Size2D num_tiles = compute_winograd_convolution_tiles(Size2D(input.tensor_shape()[idx_w], input.tensor_shape()[idx_h]),
-                                                                kernel_size,
-                                                                output_tile_size,
-                                                                conv_info);
+    const Size2D num_tiles = compute_winograd_convolution_tiles(
+        Size2D(input.tensor_shape()[idx_w], input.tensor_shape()[idx_h]), kernel_size, output_tile_size, conv_info);
 
     const unsigned int width  = input.tensor_shape()[idx_c];
     const unsigned int height = num_tiles.area();
     const unsigned int depth  = input_tile_size.area();
 
-    TensorShape output_shape{ input.tensor_shape() };
+    TensorShape output_shape{input.tensor_shape()};
     output_shape.set(0, width);
     output_shape.set(1, height);
     output_shape.set(2, depth);
@@ -684,12 +712,12 @@ inline TensorShape compute_winograd_output_transform_shape(const ITensorInfo &in
     const DataLayout    data_layout      = winograd_info.output_data_layout;
 
     // Compute output shape
-    unsigned int output_width  = 0;
-    unsigned int output_height = 0;
+    unsigned int output_width             = 0;
+    unsigned int output_height            = 0;
     std::tie(output_width, output_height) = scaled_dimensions(input_dimensions.width, input_dimensions.height,
                                                               kernel_size.width, kernel_size.height, conv_info);
 
-    TensorShape tensor_shape{ input.tensor_shape() };
+    TensorShape tensor_shape{input.tensor_shape()};
 
     // Output dimension
     const unsigned int out_w = output_width;
@@ -712,7 +740,10 @@ inline TensorShape compute_winograd_output_transform_shape(const ITensorInfo &in
  *
  * @return the calculated shape
  */
-inline TensorShape compute_deep_convolution_shape(const TensorShape &input_shape, DataLayout input_data_layout, const TensorShape &weights_shape, const PadStrideInfo &conv_info)
+inline TensorShape compute_deep_convolution_shape(const TensorShape   &input_shape,
+                                                  DataLayout           input_data_layout,
+                                                  const TensorShape   &weights_shape,
+                                                  const PadStrideInfo &conv_info)
 {
     const size_t idx_width   = get_data_layout_dimension_index(input_data_layout, DataLayoutDimension::WIDTH);
     const size_t idx_height  = get_data_layout_dimension_index(input_data_layout, DataLayoutDimension::HEIGHT);
@@ -725,9 +756,10 @@ inline TensorShape compute_deep_convolution_shape(const TensorShape &input_shape
     const unsigned int weights_out_channel = weights_shape[3];
     unsigned int       output_width        = 0;
     unsigned int       output_height       = 0;
-    std::tie(output_width, output_height) = scaled_dimensions(input_width, input_height, weights_width, weights_height, conv_info);
+    std::tie(output_width, output_height) =
+        scaled_dimensions(input_width, input_height, weights_width, weights_height, conv_info);
 
-    TensorShape output_shape{ input_shape };
+    TensorShape output_shape{input_shape};
     output_shape.set(idx_width, output_width);
     output_shape.set(idx_height, output_height);
     output_shape.set(idx_channel, weights_out_channel);
@@ -743,7 +775,8 @@ inline TensorShape compute_deep_convolution_shape(const TensorShape &input_shape
  *
  * @return the calculated shape
  */
-inline TensorShape compute_deep_convolution_shape(const ITensorInfo &input, const ITensorInfo &weights, const PadStrideInfo &conv_info)
+inline TensorShape
+compute_deep_convolution_shape(const ITensorInfo &input, const ITensorInfo &weights, const PadStrideInfo &conv_info)
 {
     return compute_deep_convolution_shape(input.tensor_shape(), input.data_layout(), weights.tensor_shape(), conv_info);
 }
@@ -758,7 +791,10 @@ inline TensorShape compute_deep_convolution_shape(const ITensorInfo &input, cons
  *
  * @return the calculated shape
  */
-inline TensorShape compute_indirect_buffer_shape(const TensorShape &input_shape, DataLayout input_data_layout, const TensorShape &weights_shape, const PadStrideInfo &conv_info,
+inline TensorShape compute_indirect_buffer_shape(const TensorShape                 &input_shape,
+                                                 DataLayout                         input_data_layout,
+                                                 const TensorShape                 &weights_shape,
+                                                 const PadStrideInfo               &conv_info,
                                                  const DirectConvComputeKernelInfo &desc)
 {
     ARM_COMPUTE_ERROR_ON_MSG(input_data_layout != DataLayout::NHWC, "The data layout can only be NHWC");
@@ -768,7 +804,8 @@ inline TensorShape compute_indirect_buffer_shape(const TensorShape &input_shape,
     const unsigned int kw = weights_shape[1];
     const unsigned int kh = weights_shape[2];
 
-    TensorShape output_conv2d_shape = compute_deep_convolution_shape(input_shape, input_data_layout, weights_shape, conv_info);
+    TensorShape output_conv2d_shape =
+        compute_deep_convolution_shape(input_shape, input_data_layout, weights_shape, conv_info);
 
     const unsigned int output_w = m0 * kw * kh;
     const unsigned int output_h = DIV_CEIL(output_conv2d_shape[1] * output_conv2d_shape[2], m0);
@@ -785,7 +822,7 @@ inline TensorShape compute_indirect_buffer_shape(const TensorShape &input_shape,
  */
 inline TensorShape compute_min_max_shape(const ITensorInfo *input)
 {
-    TensorShape output_shape{ input->tensor_shape() };
+    TensorShape output_shape{input->tensor_shape()};
     output_shape.set(Window::DimX, 2);
     output_shape.remove_dimension(1);
     output_shape.remove_dimension(1);
@@ -805,7 +842,7 @@ inline TensorShape compute_pool_shape(const ITensorInfo &input, PoolingLayerInfo
     int pooled_w = 0;
     int pooled_h = 0;
 
-    TensorShape output_shape{ input.tensor_shape() };
+    TensorShape output_shape{input.tensor_shape()};
 
     const bool is_global_pooling = pool_info.is_global_pooling;
     const int  idx_width         = get_data_layout_dimension_index(input.data_layout(), DataLayoutDimension::WIDTH);
@@ -815,9 +852,8 @@ inline TensorShape compute_pool_shape(const ITensorInfo &input, PoolingLayerInfo
     const int  pool_size_x       = is_global_pooling ? output_shape[idx_width] : pool_info.pool_size.width;
     const int  pool_size_y       = is_global_pooling ? output_shape[idx_height] : pool_info.pool_size.height;
 
-    std::tie(pooled_w, pooled_h) = scaled_dimensions_signed(input_width, input_height,
-                                                            pool_size_x, pool_size_y,
-                                                            pool_info.pad_stride_info);
+    std::tie(pooled_w, pooled_h) =
+        scaled_dimensions_signed(input_width, input_height, pool_size_x, pool_size_y, pool_info.pad_stride_info);
 
     ARM_COMPUTE_ERROR_ON_MSG((pooled_w < 1 || pooled_h < 1), "Calculated output dimension size is invalid");
 
@@ -850,8 +886,10 @@ inline TensorShape compute_unpool_shape(const ITensorInfo &input, PoolingLayerIn
     const int pad_bottom = pad_stride_info.pad_bottom();
 
     TensorShape        output_shape = input_shape;
-    const unsigned int out_width    = (input_shape[idx_width] - 1) * stride_x - pad_left - pad_right + pool_info.pool_size.width;
-    const unsigned int out_height   = (input_shape[idx_height] - 1) * stride_y - pad_top - pad_bottom + pool_info.pool_size.height;
+    const unsigned int out_width =
+        (input_shape[idx_width] - 1) * stride_x - pad_left - pad_right + pool_info.pool_size.width;
+    const unsigned int out_height =
+        (input_shape[idx_height] - 1) * stride_y - pad_top - pad_bottom + pool_info.pool_size.height;
 
     output_shape.set(idx_width, out_width);
     output_shape.set(idx_height, out_height);
@@ -866,9 +904,10 @@ inline TensorShape compute_unpool_shape(const ITensorInfo &input, PoolingLayerIn
  *
  * @return the calculated shape
  */
-inline TensorShape compute_roi_align_shape(const ITensorInfo &input, const ITensorInfo &rois, ROIPoolingLayerInfo pool_info)
+inline TensorShape
+compute_roi_align_shape(const ITensorInfo &input, const ITensorInfo &rois, ROIPoolingLayerInfo pool_info)
 {
-    TensorShape output_shape{ input.tensor_shape() };
+    TensorShape output_shape{input.tensor_shape()};
 
     const unsigned int idx_width  = get_data_layout_dimension_index(input.data_layout(), DataLayoutDimension::WIDTH);
     const unsigned int idx_height = get_data_layout_dimension_index(input.data_layout(), DataLayoutDimension::HEIGHT);
@@ -889,7 +928,7 @@ inline TensorShape compute_roi_align_shape(const ITensorInfo &input, const ITens
  */
 inline TensorShape compute_rnn_shape(const ITensorInfo *input, const unsigned int batch_size)
 {
-    TensorShape output_shape{ input->tensor_shape() };
+    TensorShape output_shape{input->tensor_shape()};
     output_shape.set(1, batch_size);
 
     return output_shape;
@@ -904,15 +943,21 @@ inline TensorShape compute_rnn_shape(const ITensorInfo *input, const unsigned in
  *
  * @return the calculated shape
  */
-inline TensorShape compute_mm_shape(const ITensorInfo &input0, const ITensorInfo &input1, bool is_interleaved_transposed, const GEMMReshapeInfo &reshape_info)
+inline TensorShape compute_mm_shape(const ITensorInfo     &input0,
+                                    const ITensorInfo     &input1,
+                                    bool                   is_interleaved_transposed,
+                                    const GEMMReshapeInfo &reshape_info)
 {
     ARM_COMPUTE_ERROR_ON_MSG(input0.num_dimensions() > 4, "The number of dimensions for the matrix A must be <= 4");
-    ARM_COMPUTE_ERROR_ON_MSG(is_interleaved_transposed && reshape_info.reinterpret_input_as_3d(), "The first input tensor cannot be reinterpreted as 3D if is_interleaved_transposed is true");
+    ARM_COMPUTE_ERROR_ON_MSG(
+        is_interleaved_transposed && reshape_info.reinterpret_input_as_3d(),
+        "The first input tensor cannot be reinterpreted as 3D if is_interleaved_transposed is true");
 
     const bool reinterpret_input_as_3d  = reshape_info.reinterpret_input_as_3d();
     const bool reinterpret_output_as_3d = reshape_info.depth_output_gemm3d() != 0;
     const int  depth_output_gemm3d      = reinterpret_output_as_3d ? reshape_info.depth_output_gemm3d() : 1;
-    const int  m                        = reshape_info.reinterpret_input_as_3d() ? input0.dimension(1) * input0.dimension(2) : input0.dimension(1);
+    const int  m =
+        reshape_info.reinterpret_input_as_3d() ? input0.dimension(1) * input0.dimension(2) : input0.dimension(1);
 
     // If the output of GEMM has to be reinterpreted as 3D, the number of input0 rows (M) is obtained collapsing the second and third
     // dimension of the output tensor
@@ -921,7 +966,7 @@ inline TensorShape compute_mm_shape(const ITensorInfo &input0, const ITensorInfo
     const int dim2 = reinterpret_input_as_3d ? input0.tensor_shape()[3] : input0.tensor_shape()[2];
     const int dim3 = reinterpret_input_as_3d ? 1 : input0.tensor_shape()[3];
 
-    TensorShape output_shape{ input0.tensor_shape() };
+    TensorShape output_shape{input0.tensor_shape()};
 
     output_shape.set(0, dim0);
     output_shape.set(1, dim1);
@@ -940,7 +985,8 @@ inline TensorShape compute_mm_shape(const ITensorInfo &input0, const ITensorInfo
  *
  * @return the calculated shape
  */
-inline TensorShape compute_mm_shape(const ITensorInfo &input0, const ITensorInfo &input1, const GEMMReshapeInfo &gemm_info)
+inline TensorShape
+compute_mm_shape(const ITensorInfo &input0, const ITensorInfo &input1, const GEMMReshapeInfo &gemm_info)
 {
     ARM_COMPUTE_UNUSED(input1);
     ARM_COMPUTE_ERROR_ON_MSG(input0.num_dimensions() > 4, "The number of dimensions for the matrix A must be <= 4");
@@ -949,9 +995,9 @@ inline TensorShape compute_mm_shape(const ITensorInfo &input0, const ITensorInfo
     const bool reinterpret_output_as_3d = gemm_info.depth_output_gemm3d() != 0;
     const int  depth_output_gemm3d      = reinterpret_output_as_3d ? gemm_info.depth_output_gemm3d() : 1;
 
-    TensorShape output_shape{ input0.tensor_shape() };
+    TensorShape output_shape{input0.tensor_shape()};
 
-    if(!reinterpret_input_as_3d && !reinterpret_output_as_3d)
+    if (!reinterpret_input_as_3d && !reinterpret_output_as_3d)
     {
         output_shape.set(0, gemm_info.n());
         output_shape.set(1, gemm_info.m());
@@ -978,7 +1024,8 @@ inline TensorShape compute_mm_shape(const ITensorInfo &input0, const ITensorInfo
  *
  * @return the calculated shape
  */
-inline TensorShape compute_mm_shape(const ITensorInfo &input0, const ITensorInfo &input1, const GEMMKernelInfo &gemm_info)
+inline TensorShape
+compute_mm_shape(const ITensorInfo &input0, const ITensorInfo &input1, const GEMMKernelInfo &gemm_info)
 {
     ARM_COMPUTE_UNUSED(input1);
     ARM_COMPUTE_ERROR_ON_MSG(input0.num_dimensions() > 4, "The number of dimensions for the matrix A must be <= 4");
@@ -987,9 +1034,9 @@ inline TensorShape compute_mm_shape(const ITensorInfo &input0, const ITensorInfo
     const bool         reinterpret_output_as_3d = gemm_info.depth_output_gemm3d != 0;
     const unsigned int depth_output_gemm3d      = reinterpret_output_as_3d ? gemm_info.depth_output_gemm3d : 1;
 
-    TensorShape output_shape{ input0.tensor_shape() };
+    TensorShape output_shape{input0.tensor_shape()};
 
-    if(!reinterpret_input_as_3d && !reinterpret_output_as_3d)
+    if (!reinterpret_input_as_3d && !reinterpret_output_as_3d)
     {
         output_shape.set(0, gemm_info.n);
         output_shape.set(1, gemm_info.m);
@@ -1016,16 +1063,17 @@ inline TensorShape compute_mm_shape(const ITensorInfo &input0, const ITensorInfo
  *
  * @return the calculated shape
  */
-inline TensorShape compute_matmul_shape(const TensorShape &input0, const TensorShape &input1, const MatMulKernelInfo &matmul_info)
+inline TensorShape
+compute_matmul_shape(const TensorShape &input0, const TensorShape &input1, const MatMulKernelInfo &matmul_info)
 {
-    TensorShape output_shape{ input0 };
+    TensorShape output_shape{input0};
 
-    if(matmul_info.adj_lhs)
+    if (matmul_info.adj_lhs)
     {
         output_shape.set(1, input0[0]); // The vertical (M) dimension
     }
 
-    if(matmul_info.adj_rhs)
+    if (matmul_info.adj_rhs)
     {
         output_shape.set(0, input1[1]); // The horizontal (N) dimension
     }
@@ -1044,14 +1092,15 @@ inline TensorShape compute_matmul_shape(const TensorShape &input0, const TensorS
  *
  * @return the calculated shape
  */
-inline TensorShape compute_output_stage_shape(const ITensorInfo &input, unsigned int gemm_3d_depth = 1, bool batch_size_on_z = false)
+inline TensorShape
+compute_output_stage_shape(const ITensorInfo &input, unsigned int gemm_3d_depth = 1, bool batch_size_on_z = false)
 {
     ARM_COMPUTE_ERROR_ON(input.data_layout() != DataLayout::NHWC && gemm_3d_depth > 1);
 
     TensorShape output_shape = input.tensor_shape();
-    if(gemm_3d_depth > 1)
+    if (gemm_3d_depth > 1)
     {
-        if(batch_size_on_z)
+        if (batch_size_on_z)
         {
             output_shape.shift_right(1);
         }
@@ -1076,11 +1125,16 @@ inline TensorShape compute_output_stage_shape(const ITensorInfo &input, unsigned
  * @return the calculated shape
  */
 inline TensorShape compute_strided_slice_shape(const ITensorInfo &input,
-                                               const Coordinates &starts, const Coordinates &ends, const Coordinates &strides,
-                                               int32_t begin_mask, int32_t end_mask, int32_t shrink_axis_mask)
+                                               const Coordinates &starts,
+                                               const Coordinates &ends,
+                                               const Coordinates &strides,
+                                               int32_t            begin_mask,
+                                               int32_t            end_mask,
+                                               int32_t            shrink_axis_mask)
 {
     using namespace arm_compute::helpers::tensor_transform;
-    return compute_strided_slice_output_shape(input.tensor_shape(), starts, ends, strides, begin_mask, end_mask, shrink_axis_mask);
+    return compute_strided_slice_output_shape(input.tensor_shape(), starts, ends, strides, begin_mask, end_mask,
+                                              shrink_axis_mask);
 }
 
 /** Calculate the slice output shape of a tensor
@@ -1091,13 +1145,13 @@ inline TensorShape compute_strided_slice_shape(const ITensorInfo &input,
  *
  * @return the calculated shape
  */
-inline TensorShape compute_slice_shape(const TensorShape &input_shape, const Coordinates &starts, const Coordinates &ends)
+inline TensorShape
+compute_slice_shape(const TensorShape &input_shape, const Coordinates &starts, const Coordinates &ends)
 {
     using namespace arm_compute::helpers::tensor_transform;
 
-    return compute_strided_slice_output_shape(input_shape,
-                                              starts, ends, BiStrides(),
-                                              0, construct_slice_end_mask(ends), 0);
+    return compute_strided_slice_output_shape(input_shape, starts, ends, BiStrides(), 0, construct_slice_end_mask(ends),
+                                              0);
 }
 
 /** Calculate the batch to space output shape of a tensor
@@ -1110,7 +1164,8 @@ inline TensorShape compute_slice_shape(const TensorShape &input_shape, const Coo
  *
  * @return the calculated shape
  */
-inline TensorShape compute_batch_to_space_shape(DataLayout data_layout, const TensorShape &input, int block_x, int block_y, const CropInfo &crop_info = CropInfo{})
+inline TensorShape compute_batch_to_space_shape(
+    DataLayout data_layout, const TensorShape &input, int block_x, int block_y, const CropInfo &crop_info = CropInfo{})
 {
     ARM_COMPUTE_ERROR_ON(block_x < 1 || block_y < 1);
 
@@ -1118,7 +1173,7 @@ inline TensorShape compute_batch_to_space_shape(DataLayout data_layout, const Te
     const int idx_height = get_data_layout_dimension_index(data_layout, DataLayoutDimension::HEIGHT);
     const int idx_batch  = get_data_layout_dimension_index(data_layout, DataLayoutDimension::BATCHES);
 
-    TensorShape output_shape{ input };
+    TensorShape output_shape{input};
 
     unsigned int       new_width   = input[idx_width] * static_cast<unsigned int>(block_x);
     unsigned int       new_height  = input[idx_height] * static_cast<unsigned int>(block_y);
@@ -1152,7 +1207,7 @@ inline TensorShape compute_depth_to_space_shape(const TensorShape &input_shape,
     const int idx_height  = get_data_layout_dimension_index(data_layout, DataLayoutDimension::HEIGHT);
     const int idx_channel = get_data_layout_dimension_index(data_layout, DataLayoutDimension::CHANNEL);
 
-    TensorShape output_shape{ input_shape };
+    TensorShape output_shape{input_shape};
     output_shape.set(idx_width, input_shape[idx_width] * block);
     output_shape.set(idx_height, input_shape[idx_height] * block);
     output_shape.set(idx_channel, input_shape[idx_channel] / (block * block));
@@ -1173,10 +1228,10 @@ inline TensorShape compute_split_shape(const ITensorInfo *input, unsigned int ax
     TensorShape empty_shape;
     empty_shape.set(0, 0);
 
-    TensorShape out_shape{ input->tensor_shape() };
+    TensorShape out_shape{input->tensor_shape()};
 
     // Return empty shape if axis is invalid
-    if(axis > input->tensor_shape().num_dimensions())
+    if (axis > input->tensor_shape().num_dimensions())
     {
         return empty_shape;
     }
@@ -1184,7 +1239,7 @@ inline TensorShape compute_split_shape(const ITensorInfo *input, unsigned int ax
     size_t axis_size = out_shape[axis];
 
     // Return empty shape if num_split is not valid
-    if(axis_size % num_splits)
+    if (axis_size % num_splits)
     {
         return empty_shape;
     }
@@ -1203,9 +1258,10 @@ inline TensorShape compute_split_shape(const ITensorInfo *input, unsigned int ax
  *
  * @return the calculated shape
  */
-inline TensorShape compute_space_to_batch_shape(const ITensorInfo *input, int block_x, int block_y, const Size2D &padding_left, const Size2D &padding_right)
+inline TensorShape compute_space_to_batch_shape(
+    const ITensorInfo *input, int block_x, int block_y, const Size2D &padding_left, const Size2D &padding_right)
 {
-    TensorShape output_shape{ input->tensor_shape() };
+    TensorShape output_shape{input->tensor_shape()};
 
     const DataLayout data_layout = input->data_layout();
     const int        idx_width   = get_data_layout_dimension_index(data_layout, DataLayoutDimension::WIDTH);
@@ -1231,7 +1287,7 @@ inline TensorShape compute_space_to_batch_shape(const ITensorInfo *input, int bl
  */
 inline TensorShape compute_space_to_depth_shape(const ITensorInfo *input, int32_t block_shape)
 {
-    TensorShape output_shape{ input->tensor_shape() };
+    TensorShape output_shape{input->tensor_shape()};
 
     const DataLayout data_layout = input->data_layout();
     const int        idx_width   = get_data_layout_dimension_index(data_layout, DataLayoutDimension::WIDTH);
@@ -1276,7 +1332,7 @@ inline TensorShape compute_prior_box_shape(const ITensorInfo &input, const Prior
 inline TensorShape compute_padded_shape(const TensorShape &input_shape, const PaddingList &padding)
 {
     TensorShape padded_shape = input_shape;
-    for(size_t dim = 0; dim < padding.size(); ++dim)
+    for (size_t dim = 0; dim < padding.size(); ++dim)
     {
         const auto    &padding_pair   = padding[dim];
         const uint32_t shape_on_index = (padded_shape.num_dimensions() <= dim) ? 1 : input_shape[dim];
@@ -1295,7 +1351,7 @@ inline TensorShape compute_padded_shape(const TensorShape &input_shape, const Pa
 inline TensorShape compute_tiled_shape(const TensorShape &input_shape, const Multiples &multiples)
 {
     TensorShape tiled_shape = input_shape;
-    for(size_t dim = 0; dim < multiples.size(); ++dim)
+    for (size_t dim = 0; dim < multiples.size(); ++dim)
     {
         tiled_shape.set(dim, input_shape[dim] * multiples[dim]);
     }
@@ -1312,9 +1368,9 @@ inline TensorShape compute_tiled_shape(const TensorShape &input_shape, const Mul
  */
 inline TensorShape compute_reduced_shape(const TensorShape &input, unsigned int axis, bool keep_dims = true)
 {
-    TensorShape output_shape{ input };
+    TensorShape output_shape{input};
 
-    if(!keep_dims)
+    if (!keep_dims)
     {
         output_shape.remove_dimension(axis);
     }
@@ -1407,14 +1463,14 @@ inline TensorShape calculate_concatenate_shape(const std::vector<T *> &input, si
 
 #if defined(ARM_COMPUTE_ASSERTS_ENABLED)
     // All dimensions must match except the axis one
-    for(unsigned int i = 0; i < MAX_DIMS; ++i)
+    for (unsigned int i = 0; i < MAX_DIMS; ++i)
     {
-        if(i == axis)
+        if (i == axis)
         {
             continue;
         }
 
-        for(const auto &tensor : input)
+        for (const auto &tensor : input)
         {
             ARM_COMPUTE_ERROR_ON(tensor == nullptr);
             const TensorShape shape = extract_shape(tensor);
@@ -1425,7 +1481,7 @@ inline TensorShape calculate_concatenate_shape(const std::vector<T *> &input, si
 
     // Calculate output shape
     size_t new_size = 0;
-    for(const auto &tensor : input)
+    for (const auto &tensor : input)
     {
         const TensorShape shape = extract_shape(tensor);
         new_size += shape[axis];
@@ -1448,14 +1504,14 @@ inline TensorShape compute_stack_shape(const ITensorInfo &a, unsigned int axis,
     ARM_COMPUTE_ERROR_ON(axis > a.num_dimensions());
     ARM_COMPUTE_ERROR_ON(a.num_dimensions() > 4);
 
-    TensorShape shape_out{ a.tensor_shape() };
+    TensorShape shape_out{a.tensor_shape()};
     shape_out.set(axis, num_tensors);
 
     unsigned int i_shift = 0;
 
-    for(unsigned int i = 0; i < a.num_dimensions(); ++i)
+    for (unsigned int i = 0; i < a.num_dimensions(); ++i)
     {
-        if(i == axis)
+        if (i == axis)
         {
             i_shift++;
         }
@@ -1473,7 +1529,8 @@ inline TensorShape compute_stack_shape(const ITensorInfo &a, unsigned int axis,
  *
  * @return the calculated shape
  */
-inline TensorShape compute_conv3d_shape(const TensorShape &src, const TensorShape &weights, const Conv3dInfo &conv3d_info)
+inline TensorShape
+compute_conv3d_shape(const TensorShape &src, const TensorShape &weights, const Conv3dInfo &conv3d_info)
 {
     // Weight tensor shape indices (D H W Cin Cout)
     constexpr unsigned int weights_depth_dim  = 4u;
@@ -1488,7 +1545,7 @@ inline TensorShape compute_conv3d_shape(const TensorShape &src, const TensorShap
     constexpr unsigned int width_dim   = 1u;
     constexpr unsigned int channel_dim = 0u;
 
-    TensorShape  output_shape{ src };
+    TensorShape  output_shape{src};
     const size_t pad_left   = conv3d_info.padding.left;
     const size_t pad_right  = conv3d_info.padding.right;
     const size_t pad_top    = conv3d_info.padding.top;
@@ -1506,17 +1563,41 @@ inline TensorShape compute_conv3d_shape(const TensorShape &src, const TensorShap
     int output_height_size = 0;
     int output_depth_size  = 0;
 
-    switch(conv3d_info.round_type)
+    switch (conv3d_info.round_type)
     {
         case DimensionRoundingType::FLOOR:
-            output_width_size  = static_cast<int>(std::floor((static_cast<float>(src[width_dim] + pad_left + pad_right - (dilation_x * (weights[weights_width_dim] - 1) + 1)) / stride_x) + 1));
-            output_height_size = static_cast<int>(std::floor((static_cast<float>(src[height_dim] + pad_top + pad_bottom - (dilation_y * (weights[weights_height_dim] - 1) + 1)) / stride_y) + 1));
-            output_depth_size  = static_cast<int>(std::floor((static_cast<float>(src[depth_dim] + pad_front + pad_back - (dilation_z * (weights[weights_depth_dim] - 1) + 1)) / stride_z) + 1));
+            output_width_size =
+                static_cast<int>(std::floor((static_cast<float>(src[width_dim] + pad_left + pad_right -
+                                                                (dilation_x * (weights[weights_width_dim] - 1) + 1)) /
+                                             stride_x) +
+                                            1));
+            output_height_size =
+                static_cast<int>(std::floor((static_cast<float>(src[height_dim] + pad_top + pad_bottom -
+                                                                (dilation_y * (weights[weights_height_dim] - 1) + 1)) /
+                                             stride_y) +
+                                            1));
+            output_depth_size =
+                static_cast<int>(std::floor((static_cast<float>(src[depth_dim] + pad_front + pad_back -
+                                                                (dilation_z * (weights[weights_depth_dim] - 1) + 1)) /
+                                             stride_z) +
+                                            1));
             break;
         case DimensionRoundingType::CEIL:
-            output_width_size  = static_cast<int>(std::ceil((static_cast<float>(src[width_dim] + pad_left + pad_right - (dilation_x * (weights[weights_width_dim] - 1) + 1)) / stride_x) + 1));
-            output_height_size = static_cast<int>(std::ceil((static_cast<float>(src[height_dim] + pad_top + pad_bottom - (dilation_y * (weights[weights_height_dim] - 1) + 1)) / stride_y) + 1));
-            output_depth_size  = static_cast<int>(std::ceil((static_cast<float>(src[depth_dim] + pad_front + pad_back - (dilation_z * (weights[weights_depth_dim] - 1) + 1)) / stride_z) + 1));
+            output_width_size =
+                static_cast<int>(std::ceil((static_cast<float>(src[width_dim] + pad_left + pad_right -
+                                                               (dilation_x * (weights[weights_width_dim] - 1) + 1)) /
+                                            stride_x) +
+                                           1));
+            output_height_size =
+                static_cast<int>(std::ceil((static_cast<float>(src[height_dim] + pad_top + pad_bottom -
+                                                               (dilation_y * (weights[weights_height_dim] - 1) + 1)) /
+                                            stride_y) +
+                                           1));
+            output_depth_size =
+                static_cast<int>(std::ceil((static_cast<float>(src[depth_dim] + pad_front + pad_back -
+                                                               (dilation_z * (weights[weights_depth_dim] - 1) + 1)) /
+                                            stride_z) +
+                                           1));
             break;
         default:
             ARM_COMPUTE_ERROR("Unsupported rounding type");
@@ -1539,7 +1620,7 @@ inline TensorShape compute_conv3d_shape(const TensorShape &src, const TensorShap
  */
 inline TensorShape compute_pool3d_shape(const TensorShape &src, Pooling3dLayerInfo pool3d_info)
 {
-    TensorShape output_shape{ src };
+    TensorShape output_shape{src};
 
     const auto data_layout      = DataLayout::NDHWC;
     const int  idx_width        = get_data_layout_dimension_index(data_layout, DataLayoutDimension::WIDTH);
@@ -1552,10 +1633,12 @@ inline TensorShape compute_pool3d_shape(const TensorShape &src, Pooling3dLayerIn
     int        output_height    = 0;
     int        output_depth     = 0;
 
-    std::tie(output_width, output_height, output_depth) = scaled_3d_dimensions_signed(src[idx_width], src[idx_height], src[idx_depth], pool_size_width, pool_size_height,
-                                                                                      pool_size_depth, pool3d_info);
+    std::tie(output_width, output_height, output_depth) =
+        scaled_3d_dimensions_signed(src[idx_width], src[idx_height], src[idx_depth], pool_size_width, pool_size_height,
+                                    pool_size_depth, pool3d_info);
 
-    ARM_COMPUTE_ERROR_ON_MSG((output_width < 1 || output_height < 1 || output_depth < 1), "Calculated output dimension size is invalid");
+    ARM_COMPUTE_ERROR_ON_MSG((output_width < 1 || output_height < 1 || output_depth < 1),
+                             "Calculated output dimension size is invalid");
 
     output_shape.set(idx_width, static_cast<size_t>(output_width));
     output_shape.set(idx_height, static_cast<size_t>(output_height));
@@ -1576,7 +1659,8 @@ inline TensorShape compute_pool3d_shape(const TensorShape &src, Pooling3dLayerIn
  *
  * @return the calculated shape
  */
-inline TensorShape compute_gather_shape(const TensorShape &input_shape, const TensorShape &indices_shape, uint32_t actual_axis)
+inline TensorShape
+compute_gather_shape(const TensorShape &input_shape, const TensorShape &indices_shape, uint32_t actual_axis)
 {
     const auto input_num_dims   = input_shape.num_dimensions();
     const auto indices_num_dims = indices_shape.num_dimensions();
@@ -1587,22 +1671,23 @@ inline TensorShape compute_gather_shape(const TensorShape &input_shape, const Te
     TensorShape output_shape;
     size_t      dim_no = 0;
 
-    for(; dim_no < actual_axis; ++dim_no)
+    for (; dim_no < actual_axis; ++dim_no)
     {
         output_shape.set(dim_no, input_shape[dim_no]);
     }
 
-    for(; dim_no < actual_axis + indices_num_dims; ++dim_no)
+    for (; dim_no < actual_axis + indices_num_dims; ++dim_no)
     {
         output_shape.set(dim_no, indices_shape[dim_no - actual_axis]);
     }
 
-    for(; dim_no < input_num_dims + indices_num_dims - 1; ++dim_no)
+    for (; dim_no < input_num_dims + indices_num_dims - 1; ++dim_no)
     {
         output_shape.set(dim_no, input_shape[dim_no + 1 - indices_num_dims]);
     }
 
-    ARM_COMPUTE_ERROR_ON(input_shape.total_size() * indices_shape.total_size() != output_shape.total_size() * input_shape[actual_axis]);
+    ARM_COMPUTE_ERROR_ON(input_shape.total_size() * indices_shape.total_size() !=
+                         output_shape.total_size() * input_shape[actual_axis]);
 
     return output_shape;
 }
diff --git a/arm_compute/core/utils/misc/Traits.h b/arm_compute/core/utils/misc/Traits.h
index 933922f63c..944fcb95f9 100644
--- a/arm_compute/core/utils/misc/Traits.h
+++ b/arm_compute/core/utils/misc/Traits.h
@@ -25,6 +25,7 @@
 #define ARM_COMPUTE_UTILS_TRAITS_TRAITS_H
 
 #include "arm_compute/core/Types.h"
+
 #include <type_traits>
 
 namespace arm_compute
diff --git a/arm_compute/core/utils/misc/Utility.h b/arm_compute/core/utils/misc/Utility.h
index e3e20d719f..22f10d74cc 100644
--- a/arm_compute/core/utils/misc/Utility.h
+++ b/arm_compute/core/utils/misc/Utility.h
@@ -44,7 +44,7 @@ struct index_sequence
 };
 
 template <std::size_t N, std::size_t... S>
-struct index_sequence_generator : index_sequence_generator < N - 1, N - 1, S... >
+struct index_sequence_generator : index_sequence_generator<N - 1, N - 1, S...>
 {
 };
 
@@ -58,17 +58,17 @@ template <std::size_t N>
 using index_sequence_t = typename index_sequence_generator<N>::type;
 
 template <typename T, std::size_t N, T val, T... vals>
-struct generate_array : generate_array < T, N - 1, val, val, vals... >
+struct generate_array : generate_array<T, N - 1, val, val, vals...>
 {
 };
 
 template <typename T, T val, T... vals>
 struct generate_array<T, 0, val, vals...>
 {
-    static constexpr std::array<T, sizeof...(vals)> value{ vals... };
+    static constexpr std::array<T, sizeof...(vals)> value{vals...};
 };
 
-template <typename T, T                  val, T... vals>
+template <typename T, T val, T... vals>
 constexpr std::array<T, sizeof...(vals)> generate_array<T, 0, val, vals...>::value;
 /** @endcond */
 
@@ -79,7 +79,7 @@ template <std::size_t... S,
           typename T = std::array<typename std::iterator_traits<Iterator>::value_type, sizeof...(S)>>
 T make_array(Iterator first, index_sequence<S...>)
 {
-    return T{ { first[S]... } };
+    return T{{first[S]...}};
 }
 } // namespace detail
 
@@ -87,7 +87,7 @@ template <std::size_t N, typename Iterator>
 std::array<typename std::iterator_traits<Iterator>::value_type, N> make_array(Iterator first, Iterator last)
 {
     ARM_COMPUTE_UNUSED(last);
-    return detail::make_array(first, index_sequence_t<N> {});
+    return detail::make_array(first, index_sequence_t<N>{});
 }
 
 /** Performs clamping among a lower and upper value.
@@ -119,7 +119,7 @@ inline void for_each(F &&)
  * @param[in] args Remaining arguments
  */
 template <typename F, typename T, typename... Ts>
-inline void for_each(F &&func, T &&arg, Ts &&... args)
+inline void for_each(F &&func, T &&arg, Ts &&...args)
 {
     func(std::forward<T>(arg));
     for_each(std::forward<F>(func), std::forward<Ts>(args)...);
@@ -143,9 +143,11 @@ inline T &&foldl(F &&, T &&value)
  * @param[in] values  Remaining arguments
  */
 template <typename F, typename T, typename U, typename... Us>
-inline auto foldl(F &&func, T &&initial, U &&value, Us &&... values) -> decltype(func(std::forward<T>(initial), std::forward<U>(value)))
+inline auto foldl(F &&func, T &&initial, U &&value, Us &&...values)
+    -> decltype(func(std::forward<T>(initial), std::forward<U>(value)))
 {
-    return foldl(std::forward<F>(func), func(std::forward<T>(initial), std::forward<U>(value)), std::forward<Us>(values)...);
+    return foldl(std::forward<F>(func), func(std::forward<T>(initial), std::forward<U>(value)),
+                 std::forward<Us>(values)...);
 }
 
 /** Perform an index sort of a given vector.
@@ -160,11 +162,7 @@ std::vector<size_t> sort_indices(const std::vector<T> &v)
     std::vector<size_t> idx(v.size());
     std::iota(idx.begin(), idx.end(), 0);
 
-    std::sort(idx.begin(), idx.end(),
-              [&v](size_t i1, size_t i2)
-    {
-        return v[i1] < v[i2];
-    });
+    std::sort(idx.begin(), idx.end(), [&v](size_t i1, size_t i2) { return v[i1] < v[i2]; });
 
     return idx;
 }
@@ -178,7 +176,7 @@ std::vector<size_t> sort_indices(const std::vector<T> &v)
  */
 inline bool endswith(const std::string &str, const std::string &suffix)
 {
-    if(str.size() < suffix.size())
+    if (str.size() < suffix.size())
     {
         return false;
     }
@@ -205,10 +203,7 @@ inline bool check_aligned(void *ptr, const size_t alignment)
  */
 inline std::string tolower(std::string string)
 {
-    std::transform(string.begin(), string.end(), string.begin(), [](unsigned char c)
-    {
-        return std::tolower(c);
-    });
+    std::transform(string.begin(), string.end(), string.begin(), [](unsigned char c) { return std::tolower(c); });
     return string;
 }
 
@@ -227,7 +222,7 @@ inline std::string getenv(const std::string &env_name)
     return std::string{};
 #else  // BARE_METAL
     const auto env_chr = std::getenv(env_name.c_str());
-    return env_chr == nullptr ? std::string{} : std::string{ env_chr };
+    return env_chr == nullptr ? std::string{} : std::string{env_chr};
 #endif // BARE_METAL
 }
 } // namespace utility
diff --git a/arm_compute/core/utils/quantization/AsymmHelpers.h b/arm_compute/core/utils/quantization/AsymmHelpers.h
index a15f3e5cde..2324fe1838 100644
--- a/arm_compute/core/utils/quantization/AsymmHelpers.h
+++ b/arm_compute/core/utils/quantization/AsymmHelpers.h
@@ -41,7 +41,10 @@ namespace quantization
  *
  * @return a status
  */
-Status calculate_quantized_multiplier(float multiplier, int32_t *quant_multiplier, int32_t *shift, bool ignore_epsilon = false);
+Status calculate_quantized_multiplier(float    multiplier,
+                                      int32_t *quant_multiplier,
+                                      int32_t *shift,
+                                      bool     ignore_epsilon = false);
 /** Calculate quantized representation of multiplier with value less than one.
  *
  * @param[in]  multiplier       Real multiplier.
@@ -51,7 +54,10 @@ Status calculate_quantized_multiplier(float multiplier, int32_t *quant_multiplie
  *
  * @return a status
  */
-Status calculate_quantized_multiplier_less_than_one(float multiplier, int32_t *quant_multiplier, int32_t *right_shift, bool ignore_epsilon = false);
+Status calculate_quantized_multiplier_less_than_one(float    multiplier,
+                                                    int32_t *quant_multiplier,
+                                                    int32_t *right_shift,
+                                                    bool     ignore_epsilon = false);
 /** Calculate quantized representation of multiplier having value greater than one.
  *
  * @param[in]  multiplier           Real multiplier.
@@ -60,7 +66,8 @@ Status calculate_quantized_multiplier_less_than_one(float multiplier, int32_t *q
  *
  * @return a status
  */
-Status calculate_quantized_multiplier_greater_than_one(float multiplier, int32_t *quantized_multiplier, int32_t *left_shift);
+Status
+calculate_quantized_multiplier_greater_than_one(float multiplier, int32_t *quantized_multiplier, int32_t *left_shift);
 
 /** Calculate quantized representation of per-channel multipliers
  *
@@ -71,9 +78,9 @@ Status calculate_quantized_multiplier_greater_than_one(float multiplier, int32_t
  *
  * @return a status
  */
-Status calculate_quantized_multipliers(const QuantizationInfo &iq_info,
-                                       const QuantizationInfo &wq_info,
-                                       const QuantizationInfo &oq_info,
+Status calculate_quantized_multipliers(const QuantizationInfo  &iq_info,
+                                       const QuantizationInfo  &wq_info,
+                                       const QuantizationInfo  &oq_info,
                                        GEMMLowpOutputStageInfo &stage_info);
 
 /** Get minimum and maximum values for the input quantized data type
@@ -147,7 +154,10 @@ int32_t saturating_rounding_multiply_by_pow2(int32_t exponent, int32_t v);
  * @param[out] output_shift    Shift for inverse square root
  *
  */
-void get_invsqrt_quantized_multiplier_exp(int32_t input, int32_t reverse_shift, int32_t &output_inv_sqrt, int32_t &output_shift);
+void get_invsqrt_quantized_multiplier_exp(int32_t  input,
+                                          int32_t  reverse_shift,
+                                          int32_t &output_inv_sqrt,
+                                          int32_t &output_shift);
 
 } // namespace quantization
 } // namespace arm_compute
-- 
cgit v1.2.1