Apply clang-format on repository

Code is formatted as per a revised clang format configuration file(not part of this delivery). Version 14.0.6 is used. Exclusion List: - files with .cl extension - files that are not strictly C/C++ (e.g. Android.bp, Sconscript ...) And the following directories - compute_kernel_writer/validation/ - tests/ - include/ - src/core/NEON/kernels/convolution/ - src/core/NEON/kernels/arm_gemm/ - src/core/NEON/kernels/arm_conv/ - data/ There will be a follow up for formatting of .cl files and the files under tests/ and compute_kernel_writer/validation/. Signed-off-by: Felix Thomasmathibalan <felixjohnny.thomasmathibalan@arm.com> Change-Id: Ib7eb1fcf4e7537b9feaefcfc15098a804a3fde0a Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/10391 Benchmark: Arm Jenkins <bsgcomp@arm.com> Tested-by: Arm Jenkins <bsgcomp@arm.com> Reviewed-by: Gunes Bayir <gunes.bayir@arm.com>
author: Felix Thomasmathibalan <felixjohnny.thomasmathibalan@arm.com> 2023-09-27 17:46:17 +0100
committer: felixjohnny.thomasmathibalan <felixjohnny.thomasmathibalan@arm.com> 2023-09-28 12:08:05 +0000
commit: afd38f0c617d6f89b2b4532c6c44f116617e2b6f (patch)
tree: 03bc7d5a762099989b16a656fa8d397b490ed70e /compute_kernel_writer
parent: bdcb4c148ee2fdeaaddf4cf1e57bbb0de02bb894 (diff)
download: ComputeLibrary-afd38f0c617d6f89b2b4532c6c44f116617e2b6f.tar.gz
69 files changed, 1390 insertions, 1057 deletions
diff --git a/compute_kernel_writer/include/ckw/Error.h b/compute_kernel_writer/include/ckw/Error.h
index 7da9544b9e..6b80778957 100644
--- a/compute_kernel_writer/include/ckw/Error.h
+++ b/compute_kernel_writer/include/ckw/Error.h
@@ -53,7 +53,7 @@ create_error_msg(const std::string &file, const std::string &func, const std::st
         const std::string arg2(std::to_string(__LINE__));             \
         const std::string arg3(msg);                                  \
         std::runtime_error(create_error_msg(arg0, arg1, arg2, arg3)); \
-    } while(false)
+    } while (false)
 
 /** Mark the variables as unused.
  *
@@ -74,16 +74,16 @@ inline void ignore_unused(T &&...)
  *
  * @param[in] msg The error message.
  */
-#define CKW_THROW_MSG(msg)                                                              \
-    do                                                                                  \
-    {                                                                                   \
-        const std::string file(__FILE__);                                               \
-        const std::string func(__func__);                                               \
-        const std::string line(std::to_string(__LINE__));                               \
-        const std::string message(msg);                                                 \
-                                                                                        \
+#define CKW_THROW_MSG(msg)                                                          \
+    do                                                                              \
+    {                                                                               \
+        const std::string file(__FILE__);                                           \
+        const std::string func(__func__);                                           \
+        const std::string line(std::to_string(__LINE__));                           \
+        const std::string message(msg);                                             \
+                                                                                    \
         throw std::runtime_error(ckw::create_error_msg(file, func, line, message)); \
-    } while(false)
+    } while (false)
 
 #ifdef COMPUTE_KERNEL_WRITER_ASSERTS_ENABLED
 
@@ -95,11 +95,11 @@ inline void ignore_unused(T &&...)
 #define CKW_ASSERT_MSG(cond, msg) \
     do                            \
     {                             \
-        if(!(cond))               \
+        if (!(cond))              \
         {                         \
             CKW_THROW_MSG(msg);   \
         }                         \
-    } while(false)
+    } while (false)
 
 #else // COMPUTE_KERNEL_WRITER_ASSERTS_ENABLED
 
diff --git a/compute_kernel_writer/include/ckw/Kernel.h b/compute_kernel_writer/include/ckw/Kernel.h
index dc0cad5503..f9b7bbb82e 100644
--- a/compute_kernel_writer/include/ckw/Kernel.h
+++ b/compute_kernel_writer/include/ckw/Kernel.h
@@ -26,6 +26,7 @@
 #define CKW_INCLUDE_CKW_KERNEL_H
 
 #include "ckw/KernelArgument.h"
+
 #include <string>
 #include <vector>
 
diff --git a/compute_kernel_writer/include/ckw/KernelArgument.h b/compute_kernel_writer/include/ckw/KernelArgument.h
index 530e2920eb..7e9bcbf1ee 100644
--- a/compute_kernel_writer/include/ckw/KernelArgument.h
+++ b/compute_kernel_writer/include/ckw/KernelArgument.h
@@ -27,6 +27,7 @@
 
 #include "ckw/types/TensorComponentType.h"
 #include "ckw/types/TensorStorageType.h"
+
 #include <cstdint>
 
 namespace ckw
@@ -90,7 +91,7 @@ private:
         TensorComponentType tensor_component_type;
     };
 
-    SubId _sub_id{ 0 };
+    SubId _sub_id{0};
 };
 
 } // namespace ckw
diff --git a/compute_kernel_writer/include/ckw/KernelWriter.h b/compute_kernel_writer/include/ckw/KernelWriter.h
index 15c99fe652..0d739e859a 100644
--- a/compute_kernel_writer/include/ckw/KernelWriter.h
+++ b/compute_kernel_writer/include/ckw/KernelWriter.h
@@ -115,7 +115,8 @@ public:
      * @param[in] first  The first source tile.
      * @param[in] second The second source tile.
      */
-    virtual void op_binary(const TileOperand &dst, BinaryOp op, const TileOperand &first, const TileOperand &second) = 0;
+    virtual void
+    op_binary(const TileOperand &dst, BinaryOp op, const TileOperand &first, const TileOperand &second) = 0;
 
     /** Write ternary expression statement: `<dst> = <op>(<first>, <second>, <third>);`.
      *
@@ -125,7 +126,11 @@ public:
      * @param[in] second The second source tile.
      * @param[in] third  The third source tile.
      */
-    virtual void op_ternary(const TileOperand &dst, TernaryOp op, const TileOperand &first, const TileOperand &second, const TileOperand &third) = 0;
+    virtual void op_ternary(const TileOperand &dst,
+                            TernaryOp          op,
+                            const TileOperand &first,
+                            const TileOperand &second,
+                            const TileOperand &third) = 0;
 
     // =============================================================================================
     // Flow control
@@ -138,7 +143,8 @@ public:
      * @param[in] rhs  The RHS tile of the condition.
      * @param[in] body The function that writes the body of the if block.
      */
-    virtual void op_if(const TileOperand &lhs, BinaryOp op, const TileOperand &rhs, const std::function<void()> &body) = 0;
+    virtual void
+    op_if(const TileOperand &lhs, BinaryOp op, const TileOperand &rhs, const std::function<void()> &body) = 0;
 
     /** Write else-if block: `else if(<lhs> <op> <rhs>) { <body> }`.
      *
@@ -147,7 +153,8 @@ public:
      * @param[in] rhs  The RHS tile of the condition.
      * @param[in] body The function that writes the body of the else-if block.
      */
-    virtual void op_else_if(const TileOperand &lhs, BinaryOp op, const TileOperand &rhs, const std::function<void()> &body) = 0;
+    virtual void
+    op_else_if(const TileOperand &lhs, BinaryOp op, const TileOperand &rhs, const std::function<void()> &body) = 0;
 
     /** Write an else block: `else { <body> }`.
      *
@@ -165,10 +172,13 @@ public:
      * @param[in] update_value The value which is updated at every iteration.
      * @param[in] body         The function that writes the body of the for-loop block.
      */
-    virtual void op_for_loop(
-        const TileOperand &var, BinaryOp cond_op, const TileOperand &cond_value,
-        const TileOperand &update_var, AssignmentOp update_op, const TileOperand &update_value,
-        const std::function<void()> &body) = 0;
+    virtual void op_for_loop(const TileOperand           &var,
+                             BinaryOp                     cond_op,
+                             const TileOperand           &cond_value,
+                             const TileOperand           &update_var,
+                             AssignmentOp                 update_op,
+                             const TileOperand           &update_value,
+                             const std::function<void()> &body) = 0;
 
     /** Write the return statement. */
     virtual void op_return() = 0;
@@ -271,9 +281,13 @@ public:
      * @param[in] z         z-coordinate
      * @param[in] batch     batch
      */
-    virtual void op_load(
-        const TileOperand &tile_op, const TensorOperand &tensor_op, TensorSampler &sampler,
-        const TileOperand &x, const TileOperand &y, const TileOperand &z, const TileOperand &batch) = 0;
+    virtual void op_load(const TileOperand   &tile_op,
+                         const TensorOperand &tensor_op,
+                         TensorSampler       &sampler,
+                         const TileOperand   &x,
+                         const TileOperand   &y,
+                         const TileOperand   &z,
+                         const TileOperand   &batch) = 0;
 
     /** Load the data from the tensor memory to the tile in a dilated way using the sampling information.
      *
@@ -282,27 +296,41 @@ public:
      * @param[in] dilation_x Dilation while reading in x-dimension
      * @param[in] dilation_y Dilation while reading in y-dimension
      */
-    virtual void op_load_dilated(
-        const TileOperand &tile_op, const TensorOperand &tensor_op, TensorSampler &sampler,
-        const TileOperand &x, const TileOperand &y, const TileOperand &z, const TileOperand &batch,
-        const TileOperand &dilation_x, const TileOperand &dilation_y) = 0;
+    virtual void op_load_dilated(const TileOperand   &tile_op,
+                                 const TensorOperand &tensor_op,
+                                 TensorSampler       &sampler,
+                                 const TileOperand   &x,
+                                 const TileOperand   &y,
+                                 const TileOperand   &z,
+                                 const TileOperand   &batch,
+                                 const TileOperand   &dilation_x,
+                                 const TileOperand   &dilation_y) = 0;
 
     /** Store the data to the tensor memory from the tile using the sampling information.
      *
      * Similar to @ref KernelWriter::op_load()
      */
-    virtual void op_store(
-        const TensorOperand &tensor_op, const TileOperand &tile_op, TensorSampler &sampler,
-        const TileOperand &x, const TileOperand &y, const TileOperand &z, const TileOperand &batch) = 0;
+    virtual void op_store(const TensorOperand &tensor_op,
+                          const TileOperand   &tile_op,
+                          TensorSampler       &sampler,
+                          const TileOperand   &x,
+                          const TileOperand   &y,
+                          const TileOperand   &z,
+                          const TileOperand   &batch) = 0;
 
     /** Store the data to the tensor memory from the tile in a dilated way using the sampling information.
      *
      * Similar to @ref KernelWriter::op_load_dilated()
      */
-    virtual void op_store_dilated(
-        const TensorOperand &tensor_op, const TileOperand &tile_op, TensorSampler &sampler,
-        const TileOperand &x, const TileOperand &y, const TileOperand &z, const TileOperand &batch,
-        const TileOperand &dilation_x, const TileOperand &dilation_y) = 0;
+    virtual void op_store_dilated(const TensorOperand &tensor_op,
+                                  const TileOperand   &tile_op,
+                                  TensorSampler       &sampler,
+                                  const TileOperand   &x,
+                                  const TileOperand   &y,
+                                  const TileOperand   &z,
+                                  const TileOperand   &batch,
+                                  const TileOperand   &dilation_x,
+                                  const TileOperand   &dilation_y) = 0;
 
     /** Load the data from the tensor memory to the tile using the indirect buffer approach and respecting the sampling information.
      *
@@ -314,8 +342,13 @@ public:
      * @param[in] z         z-coordinate
      * @param[in] batch     batch
      */
-    virtual void op_load_indirect(const TileOperand &tile_op, const TensorOperand &tensor_op, TensorSampler &sampler,
-                                  const TileOperand &x, const TileOperand &y, const TileOperand &z, const TileOperand &batch_op) = 0;
+    virtual void op_load_indirect(const TileOperand   &tile_op,
+                                  const TensorOperand &tensor_op,
+                                  TensorSampler       &sampler,
+                                  const TileOperand   &x,
+                                  const TileOperand   &y,
+                                  const TileOperand   &z,
+                                  const TileOperand   &batch_op) = 0;
 
 protected:
     // =============================================================================================
@@ -373,8 +406,8 @@ protected:
     static DataType get_data_type(const ConstantData &data);
 
 private:
-    int32_t _id_space{ 0 };
-    int32_t _last_created_id_space{ 0 };
+    int32_t _id_space{0};
+    int32_t _last_created_id_space{0};
 };
 
 } // namespace ckw
diff --git a/compute_kernel_writer/include/ckw/TensorInfo.h b/compute_kernel_writer/include/ckw/TensorInfo.h
index 87cf7c1426..5c87cb5b12 100644
--- a/compute_kernel_writer/include/ckw/TensorInfo.h
+++ b/compute_kernel_writer/include/ckw/TensorInfo.h
@@ -27,6 +27,7 @@
 
 #include "ckw/types/DataType.h"
 #include "ckw/types/TensorDataLayout.h"
+
 #include <array>
 #include <cstdint>
 
@@ -85,10 +86,10 @@ public:
     int32_t id() const;
 
 private:
-    TensorShape      _shape{ { 0 } };
-    DataType         _dt{ DataType::Unknown };
-    TensorDataLayout _dl{ TensorDataLayout::Unknown };
-    int32_t          _id{ -1 };
+    TensorShape      _shape{{0}};
+    DataType         _dt{DataType::Unknown};
+    TensorDataLayout _dl{TensorDataLayout::Unknown};
+    int32_t          _id{-1};
 };
 } // namespace ckw
 
diff --git a/compute_kernel_writer/include/ckw/TensorSampler.h b/compute_kernel_writer/include/ckw/TensorSampler.h
index 1b51636edb..117e8de2cf 100644
--- a/compute_kernel_writer/include/ckw/TensorSampler.h
+++ b/compute_kernel_writer/include/ckw/TensorSampler.h
@@ -25,8 +25,8 @@
 #ifndef CKW_INCLUDE_CKW_TENSORSAMPLER_H
 #define CKW_INCLUDE_CKW_TENSORSAMPLER_H
 
-#include "ckw/types/TensorStorageType.h"
 #include "ckw/types/TensorSamplerTypes.h"
+#include "ckw/types/TensorStorageType.h"
 
 namespace ckw
 {
@@ -53,12 +53,11 @@ public:
      * @param[in] address_mode_y The address mode of the y dimension.
      * @param[in] address_mode_z The address mode of the z dimension.
      */
-    TensorSampler(
-        TensorStorageType         storage,
-        TensorSamplerFormat       format,
-        TensorSamplerAddressModeX address_mode_x,
-        TensorSamplerAddressModeY address_mode_y,
-        TensorSamplerAddressModeZ address_mode_z);
+    TensorSampler(TensorStorageType         storage,
+                  TensorSamplerFormat       format,
+                  TensorSamplerAddressModeX address_mode_x,
+                  TensorSamplerAddressModeY address_mode_y,
+                  TensorSamplerAddressModeZ address_mode_z);
 
     /** Get the storage for the tensor */
     TensorStorageType storage() const;
@@ -91,11 +90,11 @@ public:
     TensorSampler &address_mode_z(TensorSamplerAddressModeZ address_mode_z);
 
 private:
-    TensorStorageType                _storage { TensorStorageType::BufferUint8Ptr };
-    TensorSamplerFormat              _format  { TensorSamplerFormat::Unknown };
-    TensorSamplerAddressModeX _address_mode_x { TensorSamplerAddressModeX::Unknown };
-    TensorSamplerAddressModeY _address_mode_y { TensorSamplerAddressModeY::Unknown };
-    TensorSamplerAddressModeZ _address_mode_z { TensorSamplerAddressModeZ::Unknown };
+    TensorStorageType         _storage{TensorStorageType::BufferUint8Ptr};
+    TensorSamplerFormat       _format{TensorSamplerFormat::Unknown};
+    TensorSamplerAddressModeX _address_mode_x{TensorSamplerAddressModeX::Unknown};
+    TensorSamplerAddressModeY _address_mode_y{TensorSamplerAddressModeY::Unknown};
+    TensorSamplerAddressModeZ _address_mode_z{TensorSamplerAddressModeZ::Unknown};
 };
 
 } // namespace ckw
diff --git a/compute_kernel_writer/include/ckw/TileInfo.h b/compute_kernel_writer/include/ckw/TileInfo.h
index b8094f79bf..678bb7aaf6 100644
--- a/compute_kernel_writer/include/ckw/TileInfo.h
+++ b/compute_kernel_writer/include/ckw/TileInfo.h
@@ -83,7 +83,7 @@ public:
     DataType data_type() const;
 
 private:
-    DataType  _dt{ DataType::Unknown };
+    DataType  _dt{DataType::Unknown};
     TileShape _shape{};
 };
 
diff --git a/compute_kernel_writer/include/ckw/types/ConstantData.h b/compute_kernel_writer/include/ckw/types/ConstantData.h
index 95425b2c65..7708818ca8 100644
--- a/compute_kernel_writer/include/ckw/types/ConstantData.h
+++ b/compute_kernel_writer/include/ckw/types/ConstantData.h
@@ -45,12 +45,12 @@ class KernelWriter;
 
 class ConstantData
 {
-    using String = std::string;
+    using String       = std::string;
     using StringVector = std::vector<String>;
 
 public:
     /** Templated constructor */
-    template<typename T>
+    template <typename T>
     ConstantData(std::initializer_list<std::initializer_list<T>> values, DataType data_type);
 
 private:
@@ -60,14 +60,14 @@ private:
      *
      * @return true if user provided data type and the template type are conformant
      */
-    template<typename T>
+    template <typename T>
     bool validate(DataType data_type);
 
     /** Get the constant data as a 2d vector of string values
      *
      * @return a 2d vector of strings that has the string-converted values
      */
-    const std::vector<StringVector>& values() const;
+    const std::vector<StringVector> &values() const;
 
     /** Get the underlying data type of the constant values
      *
@@ -81,7 +81,7 @@ private:
 private:
     // Data members
     std::vector<StringVector> _values{};
-    DataType _data_type{};
+    DataType                  _data_type{};
 };
 
 } // namespace ckw
diff --git a/compute_kernel_writer/include/ckw/types/MemoryOperation.h b/compute_kernel_writer/include/ckw/types/MemoryOperation.h
index 0466b82df2..f93f60c51a 100644
--- a/compute_kernel_writer/include/ckw/types/MemoryOperation.h
+++ b/compute_kernel_writer/include/ckw/types/MemoryOperation.h
@@ -27,11 +27,11 @@
 
 namespace ckw
 {
-    enum class MemoryOperation
-    {
-        Load  = 1,
-        Store = 2
-    };
+enum class MemoryOperation
+{
+    Load  = 1,
+    Store = 2
+};
 } // namespace ckw
 
 #endif /* CKW_INCLUDE_CKW_TYPES_MEMORYOPERATION */
diff --git a/compute_kernel_writer/include/ckw/types/TensorSamplerTypes.h b/compute_kernel_writer/include/ckw/types/TensorSamplerTypes.h
index 43dce1d4e4..512d0b4501 100644
--- a/compute_kernel_writer/include/ckw/types/TensorSamplerTypes.h
+++ b/compute_kernel_writer/include/ckw/types/TensorSamplerTypes.h
@@ -75,8 +75,8 @@ enum class TensorSamplerAddressModeY : int32_t
  */
 enum class TensorSamplerAddressModeZ : int32_t
 {
-    Unknown        = 0,
-    None           = 1,
+    Unknown = 0,
+    None    = 1,
 };
 
 } // namespace ckw
diff --git a/compute_kernel_writer/prototype/examples/add_exp_store.cpp b/compute_kernel_writer/prototype/examples/add_exp_store.cpp
index 6a9884543c..2b640ca01b 100644
--- a/compute_kernel_writer/prototype/examples/add_exp_store.cpp
+++ b/compute_kernel_writer/prototype/examples/add_exp_store.cpp
@@ -32,7 +32,6 @@
 #include "common/ExampleComponentArgument.h"
 #include "common/ExampleKernelWriter.h"
 #include "common/ExampleScopedKernelWriter.h"
-
 #include <iostream>
 #include <vector>
 
@@ -78,14 +77,14 @@ void op_binary_elementwise(ExampleScopedKernelWriter writer, std::vector<Example
     auto dst = operands.at(2);
 
     // Load the LHS and RHS tile and prepare the tensor sampler.
-    if(!lhs->has_tile() && !rhs->has_tile())
+    if (!lhs->has_tile() && !rhs->has_tile())
     {
         const auto sampler = create_simple_sampler(writer);
 
         writer->op_load_once(lhs, sampler);
         writer->op_load_once(rhs, sampler);
     }
-    else if(lhs->has_tile())
+    else if (lhs->has_tile())
     {
         const auto &sampler = lhs->tile_sampler();
         writer->op_load_once(rhs, sampler);
@@ -101,7 +100,7 @@ void op_binary_elementwise(ExampleScopedKernelWriter writer, std::vector<Example
     const auto &sampler  = lhs->tile_sampler();
 
     // Prepare the output tile.
-    if(!dst->has_tile())
+    if (!dst->has_tile())
     {
         auto &tile = writer->declare_tile("dst_tile", lhs_tile.tile_info());
         dst->init_virtual_tensor(tile, sampler);
@@ -119,7 +118,7 @@ void op_exp(ExampleScopedKernelWriter writer, std::vector<ExampleComponentArgume
     auto dst = operands.at(1);
 
     // Load the source tile and prepare the sampler.
-    if(!src->has_tile())
+    if (!src->has_tile())
     {
         const auto sampler = create_simple_sampler(writer);
         writer->op_load_once(src, sampler);
@@ -129,7 +128,7 @@ void op_exp(ExampleScopedKernelWriter writer, std::vector<ExampleComponentArgume
     const auto &sampler  = src->tile_sampler();
 
     // Prepare the output tile.
-    if(!dst->has_tile())
+    if (!dst->has_tile())
     {
         auto &tile = writer->declare_tile("dst_tile", src_tile.tile_info());
         dst->init_virtual_tensor(tile, sampler);
@@ -160,34 +159,38 @@ int main()
 
     ExampleScopedKernelWriter writer(&root_writer);
 
-    const TensorInfo src0_info(DataType::Fp32, TensorShape({ 3, 10, 20, 1, 1 }), TensorDataLayout::Nhwc, 0);
-    const TensorInfo src1_info(DataType::Fp32, TensorShape({ 3, 10, 20, 1, 1 }), TensorDataLayout::Nhwc, 1);
-    const TensorInfo dst_info(DataType::Fp32, TensorShape({ 3, 10, 20, 1, 1 }), TensorDataLayout::Nhwc, 2);
+    const TensorInfo src0_info(DataType::Fp32, TensorShape({3, 10, 20, 1, 1}), TensorDataLayout::Nhwc, 0);
+    const TensorInfo src1_info(DataType::Fp32, TensorShape({3, 10, 20, 1, 1}), TensorDataLayout::Nhwc, 1);
+    const TensorInfo dst_info(DataType::Fp32, TensorShape({3, 10, 20, 1, 1}), TensorDataLayout::Nhwc, 2);
 
-    ExampleComponentArgument src0(writer->declare_tensor_argument("src0", src0_info, TensorStorageType::BufferUint8Ptr));
-    ExampleComponentArgument src1(writer->declare_tensor_argument("src1", src1_info, TensorStorageType::BufferUint8Ptr));
+    ExampleComponentArgument src0(
+        writer->declare_tensor_argument("src0", src0_info, TensorStorageType::BufferUint8Ptr));
+    ExampleComponentArgument src1(
+        writer->declare_tensor_argument("src1", src1_info, TensorStorageType::BufferUint8Ptr));
     ExampleComponentArgument dst(writer->declare_tensor_argument("dst", dst_info, TensorStorageType::BufferUint8Ptr));
 
     ExampleComponentArgument ans;
 
-    op_binary_elementwise(writer, { &src0, &src1, &ans });
-    op_exp(writer, { &ans, &ans });
-    op_store(writer, { &ans, &dst });
+    op_binary_elementwise(writer, {&src0, &src1, &ans});
+    op_exp(writer, {&ans, &ans});
+    op_store(writer, {&ans, &dst});
 
     const auto arguments = kernel.arguments();
 
     std::cout << "\n====================\nArguments:\n====================\n";
 
-    for(auto &arg : arguments)
+    for (auto &arg : arguments)
     {
-        switch(arg.type())
+        switch (arg.type())
         {
             case ckw::KernelArgument::Type::TensorStorage:
-                std::cout << "* Tensor storage:   ID = " << arg.id() << ", type = " << std::hex << "0x" << static_cast<uint32_t>(arg.tensor_storage_type()) << std::dec << "\n";
+                std::cout << "* Tensor storage:   ID = " << arg.id() << ", type = " << std::hex << "0x"
+                          << static_cast<uint32_t>(arg.tensor_storage_type()) << std::dec << "\n";
                 break;
 
             case ckw::KernelArgument::Type::TensorComponent:
-                std::cout << "* Tensor component: ID = " << arg.id() << ", type = " << std::hex << "0x" << static_cast<uint32_t>(arg.tensor_component_type()) << std::dec << "\n";
+                std::cout << "* Tensor component: ID = " << arg.id() << ", type = " << std::hex << "0x"
+                          << static_cast<uint32_t>(arg.tensor_component_type()) << std::dec << "\n";
                 break;
 
             default:
diff --git a/compute_kernel_writer/prototype/examples/common/ExampleComponentArgument.cpp b/compute_kernel_writer/prototype/examples/common/ExampleComponentArgument.cpp
index 5a2ec526cc..55223dae0e 100644
--- a/compute_kernel_writer/prototype/examples/common/ExampleComponentArgument.cpp
+++ b/compute_kernel_writer/prototype/examples/common/ExampleComponentArgument.cpp
@@ -23,19 +23,19 @@
  */
 
 #include "ExampleComponentArgument.h"
+
 #include "ckw/Error.h"
 
 ExampleComponentArgument::ExampleComponentArgument()
 {
 }
 
-ExampleComponentArgument::ExampleComponentArgument(ckw::TensorOperand &tensor)
-    : _tensor(&tensor)
+ExampleComponentArgument::ExampleComponentArgument(ckw::TensorOperand &tensor) : _tensor(&tensor)
 {
 }
 
-ExampleComponentArgument &
-ExampleComponentArgument::init_virtual_tensor(ckw::TileOperand &tile, const ckw::TensorTileSampler &tile_sampler)
+ExampleComponentArgument &ExampleComponentArgument::init_virtual_tensor(ckw::TileOperand             &tile,
+                                                                        const ckw::TensorTileSampler &tile_sampler)
 {
     CKW_ASSERT(_tile == nullptr);
 
diff --git a/compute_kernel_writer/prototype/examples/common/ExampleComponentArgument.h b/compute_kernel_writer/prototype/examples/common/ExampleComponentArgument.h
index 9fdc50ba08..0e029b1157 100644
--- a/compute_kernel_writer/prototype/examples/common/ExampleComponentArgument.h
+++ b/compute_kernel_writer/prototype/examples/common/ExampleComponentArgument.h
@@ -104,8 +104,8 @@ public:
     const ckw::TensorTileSampler &tile_sampler() const;
 
 private:
-    ckw::TensorOperand    *_tensor{ nullptr };
-    ckw::TileOperand      *_tile{ nullptr };
+    ckw::TensorOperand    *_tensor{nullptr};
+    ckw::TileOperand      *_tile{nullptr};
     ckw::TensorTileSampler _tile_sampler{};
 };
 
diff --git a/compute_kernel_writer/prototype/examples/common/ExampleKernelWriter.cpp b/compute_kernel_writer/prototype/examples/common/ExampleKernelWriter.cpp
index 6b9f244735..1734ce8823 100644
--- a/compute_kernel_writer/prototype/examples/common/ExampleKernelWriter.cpp
+++ b/compute_kernel_writer/prototype/examples/common/ExampleKernelWriter.cpp
@@ -23,26 +23,27 @@
  */
 
 #include "ExampleKernelWriter.h"
-#include "ExampleComponentArgument.h"
+
 #include "ckw/Error.h"
 #include "ckw/TileInfo.h"
 
-ExampleKernelWriter::ExampleKernelWriter(ckw::Kernel &kernel)
-    : KernelWriter(kernel)
+#include "ExampleComponentArgument.h"
+
+ExampleKernelWriter::ExampleKernelWriter(ckw::Kernel &kernel) : KernelWriter(kernel)
 {
 }
 
 void ExampleKernelWriter::op_load_once(ExampleComponentArgument *tensor_or_tile, const ckw::TensorTileSampler &sampler)
 {
-    if(!tensor_or_tile->has_tile())
+    if (!tensor_or_tile->has_tile())
     {
         CKW_ASSERT(tensor_or_tile->has_tensor());
 
         auto &tensor = tensor_or_tile->tensor();
 
         const auto tile_name = tensor.name() + "_tile";
-        auto      &tile      = declare_tile(tile_name.c_str(),
-                                            ckw::TileInfo(tensor.data_type(), sampler.height(), sampler.width()));
+        auto      &tile =
+            declare_tile(tile_name.c_str(), ckw::TileInfo(tensor.data_type(), sampler.height(), sampler.width()));
 
         op_load(tile, tensor, sampler);
 
diff --git a/compute_kernel_writer/prototype/examples/common/ExampleScopedKernelWriter.cpp b/compute_kernel_writer/prototype/examples/common/ExampleScopedKernelWriter.cpp
index 7c44fa8749..784d5ffb96 100644
--- a/compute_kernel_writer/prototype/examples/common/ExampleScopedKernelWriter.cpp
+++ b/compute_kernel_writer/prototype/examples/common/ExampleScopedKernelWriter.cpp
@@ -23,6 +23,7 @@
  */
 
 #include "ExampleScopedKernelWriter.h"
+
 #include "ExampleKernelWriter.h"
 
 ExampleScopedKernelWriter::ExampleScopedKernelWriter(ExampleKernelWriter *writer)
diff --git a/compute_kernel_writer/prototype/examples/writer_helper.cpp b/compute_kernel_writer/prototype/examples/writer_helper.cpp
index ccef92dcdf..8623afbf50 100644
--- a/compute_kernel_writer/prototype/examples/writer_helper.cpp
+++ b/compute_kernel_writer/prototype/examples/writer_helper.cpp
@@ -23,14 +23,14 @@
 */
 
 #include "ckw/KernelWriter.h"
-#include "../include/ckw/KernelWriterHelper.h"
 #include "ckw/TensorTileSampler.h"
 
+#include "../include/ckw/KernelWriterHelper.h"
 #include <iostream>
 
 using namespace ckw;
 
-TensorTileSampler create_simple_sampler(KernelWriter& writer)
+TensorTileSampler create_simple_sampler(KernelWriter &writer)
 {
     TensorTileSampler sampler;
 
@@ -65,11 +65,11 @@ TensorTileSampler create_simple_sampler(KernelWriter& writer)
 
 int main()
 {
-    Kernel kernel("test", GpuTargetLanguage::OpenCL);
+    Kernel                           kernel("test", GpuTargetLanguage::OpenCL);
     KernelWriterHelper<KernelWriter> writer(kernel);
 
-    const TensorInfo src_info(DataType::Fp32, TensorShape({ 1, 1, 1, 1, 1 }), TensorDataLayout::Nhwc, 0);
-    const TensorInfo dst_info(DataType::Fp32, TensorShape({ 1, 1, 1, 1, 1 }), TensorDataLayout::Nhwc, 1);
+    const TensorInfo src_info(DataType::Fp32, TensorShape({1, 1, 1, 1, 1}), TensorDataLayout::Nhwc, 0);
+    const TensorInfo dst_info(DataType::Fp32, TensorShape({1, 1, 1, 1, 1}), TensorDataLayout::Nhwc, 1);
 
     auto &src_tensor = writer.declare_tensor_argument("src", src_info);
     auto &dst_tensor = writer.declare_tensor_argument("dst", dst_info);
@@ -77,27 +77,24 @@ int main()
     const auto sampler = create_simple_sampler(writer);
 
     auto &src = writer.declare_tile("src_tile", TileInfo(src_tensor.data_type(), sampler.height(), sampler.width()));
-    auto &other = writer.declare_tile("other_tile", TileInfo(src_tensor.data_type(), sampler.height(), sampler.width()));
+    auto &other =
+        writer.declare_tile("other_tile", TileInfo(src_tensor.data_type(), sampler.height(), sampler.width()));
     auto &dst = writer.declare_tile("dst_tile", TileInfo(src_tensor.data_type(), sampler.height(), sampler.width()));
 
     writer.op_load(src, src_tensor, sampler);
     writer.op_load(other, src_tensor, sampler);
     writer.op_load(dst, dst_tensor, sampler);
 
-    auto test = dst ^ src ^ other;
+    auto test       = dst ^ src ^ other;
     auto other_test = logical_and(dst, src, other);
     writer.op_assign(dst, logical_and(dst, src, other));
     writer.op_assign(dst, test);
     writer.op_assign(dst, other_test);
     writer.op_assign(dst, operator^(operator^(dst, src), other));
 
-    writer.op_if(exp(src) == dst, [&]{
-        writer.op_binary_expression(dst, src, BinaryOp::Add, src);
-    }).op_else_if(exp(src) > dst, [&]{
-        writer.op_binary_expression(dst, src, BinaryOp::Add, src);
-    }).op_else([&] {
-        writer.op_assign(dst, src);
-    });
+    writer.op_if(exp(src) == dst, [&] { writer.op_binary_expression(dst, src, BinaryOp::Add, src); })
+        .op_else_if(exp(src) > dst, [&] { writer.op_binary_expression(dst, src, BinaryOp::Add, src); })
+        .op_else([&] { writer.op_assign(dst, src); });
 
     writer.op_assign(dst, src + src * src);
     writer.op_assign(dst, src * max(src, dst) + src);
@@ -106,13 +103,11 @@ int main()
     writer.op_assign(dst, src ^ dst);
     writer.op_assign(dst, ~src);
 
-    writer.op_for_loop(dst < src, dst += src, [&]{
-        writer.op_assign(dst, src + dst);
-    });
+    writer.op_for_loop(dst < src, dst += src, [&] { writer.op_assign(dst, src + dst); });
 
     writer.op_assign(dst += src);
     writer.op_assign(dst += exp(src));
 
     std::cout << "======== KERNEL ========" << std::endl;
     std::cout << writer.generate_code() << std::endl;
-}
-\ No newline at end of file
+}
diff --git a/compute_kernel_writer/prototype/include/ckw/Error.h b/compute_kernel_writer/prototype/include/ckw/Error.h
index b18944eac5..aab713c817 100644
--- a/compute_kernel_writer/prototype/include/ckw/Error.h
+++ b/compute_kernel_writer/prototype/include/ckw/Error.h
@@ -39,11 +39,11 @@ namespace ckw
 #define CKW_ASSERT_MSG(cond, msg)            \
     do                                       \
     {                                        \
-        if(!(cond))                          \
+        if (!(cond))                         \
         {                                    \
             throw ::std::runtime_error(msg); \
         }                                    \
-    } while(false)
+    } while (false)
 
 /** If the condition is not met, throw an std::runtime_error.
  *
@@ -56,8 +56,7 @@ namespace ckw
  * @param[in] precond The condition if is met requires the consequence must also be met.
  * @param[in] cond    The condition that is expected to be true if the precondition is true.
  */
-#define CKW_ASSERT_IF(precond, cond) \
-    CKW_ASSERT_MSG(!(precond) || ((precond) && (cond)), #precond " |-> " #cond)
+#define CKW_ASSERT_IF(precond, cond) CKW_ASSERT_MSG(!(precond) || ((precond) && (cond)), #precond " |-> " #cond)
 
 /** Mark the variables as unused.
  *
diff --git a/compute_kernel_writer/prototype/include/ckw/KernelArgument.h b/compute_kernel_writer/prototype/include/ckw/KernelArgument.h
index af8bcde634..3384a20aef 100644
--- a/compute_kernel_writer/prototype/include/ckw/KernelArgument.h
+++ b/compute_kernel_writer/prototype/include/ckw/KernelArgument.h
@@ -26,6 +26,7 @@
 #define CKW_PROTOTYPE_INCLUDE_CKW_KERNELARGUMENT_H
 
 #include "ckw/TensorInfo.h"
+
 #include <cstdint>
 
 namespace ckw
@@ -98,7 +99,7 @@ private:
         TensorComponentType tensor_component_type;
     };
 
-    SubId _sub_id{ 0 };
+    SubId _sub_id{0};
 };
 
 } // namespace ckw
diff --git a/compute_kernel_writer/prototype/include/ckw/KernelWriter.h b/compute_kernel_writer/prototype/include/ckw/KernelWriter.h
index fdb5fedc59..f9e0066f91 100644
--- a/compute_kernel_writer/prototype/include/ckw/KernelWriter.h
+++ b/compute_kernel_writer/prototype/include/ckw/KernelWriter.h
@@ -94,7 +94,9 @@ public:
      *
      * @return The @ref TensorOperand object.
      */
-    TensorOperand &declare_tensor_argument(const std::string &name, const TensorInfo &info, TensorStorageType storage_type = TensorStorageType::BufferUint8Ptr);
+    TensorOperand &declare_tensor_argument(const std::string &name,
+                                           const TensorInfo  &info,
+                                           TensorStorageType  storage_type = TensorStorageType::BufferUint8Ptr);
 
     /** Declare a compile-time constant scalar argument.
      *
@@ -134,7 +136,10 @@ public:
      * @param[in]  sampler    The tensor sampling information.
      * @param[in]  dilation_y Dilation in the Y dimension.
      */
-    void op_load(TileOperand &tile, const TensorOperand &tensor, const TensorTileSampler &sampler, const TileOperand &dilation_y = TileOperand("dil_y", 1));
+    void op_load(TileOperand             &tile,
+                 const TensorOperand     &tensor,
+                 const TensorTileSampler &sampler,
+                 const TileOperand       &dilation_y = TileOperand("dil_y", 1));
 
     /** Load the data from the tensor memory to the tile using the indirect buffer approach and respective of the sampling information.
      *
@@ -221,7 +226,10 @@ public:
      * @param[in]  first    The first argument tile.
      * @param[in]  second   The second argument tile.
      */
-    void op_binary_elementwise_function(const TileOperand &dst, BinaryFunction func, const TileOperand &first, const TileOperand &second);
+    void op_binary_elementwise_function(const TileOperand &dst,
+                                        BinaryFunction     func,
+                                        const TileOperand &first,
+                                        const TileOperand &second);
 
     /** Write function applied to scalar value: `<dst> = <func>(<first>, <second>, <third>);`.
      *
@@ -231,7 +239,11 @@ public:
      * @param[in]  second   The second argument tile.
      * @param[in]  third    The third argument tile.
      */
-    void op_ternary_elementwise_function(const TileOperand &dst, TernaryFunction func, const TileOperand &first, const TileOperand &second, const TileOperand &third);
+    void op_ternary_elementwise_function(const TileOperand &dst,
+                                         TernaryFunction    func,
+                                         const TileOperand &first,
+                                         const TileOperand &second,
+                                         const TileOperand &third);
 
     /** Write if-statement: `if(<lhs> <op> <rhs>) { <body> }`.
      *
@@ -267,7 +279,13 @@ public:
      * @param[in, out]  update_value      The value which is updated at every iteration.
      * @param[in]       body              The body of the for-loop.
      */
-    void op_for_loop(const TileOperand &var_name, BinaryOp cond_op, const TileOperand &cond_value_name, const TileOperand &update_var_name, AssignmentOp update_op, const TileOperand &update_value_name, const std::function<void()> &body);
+    void op_for_loop(const TileOperand           &var_name,
+                     BinaryOp                     cond_op,
+                     const TileOperand           &cond_value_name,
+                     const TileOperand           &update_var_name,
+                     AssignmentOp                 update_op,
+                     const TileOperand           &update_value_name,
+                     const std::function<void()> &body);
 
     /** Write the return statement: `return;`
      */
@@ -311,8 +329,8 @@ private:
     ::std::unique_ptr<prototype::GpuKernelWriterAttribute> _impl_attr;
     ::std::unique_ptr<prototype::IGpuKernelWriter>         _impl;
 
-    int32_t _id_space{ 0 };
-    int32_t _max_id_space{ 0 };
+    int32_t _id_space{0};
+    int32_t _max_id_space{0};
 };
 
 } // namespace ckw
diff --git a/compute_kernel_writer/prototype/include/ckw/KernelWriterHelper.h b/compute_kernel_writer/prototype/include/ckw/KernelWriterHelper.h
index a8be859680..3ba079bbc2 100644
--- a/compute_kernel_writer/prototype/include/ckw/KernelWriterHelper.h
+++ b/compute_kernel_writer/prototype/include/ckw/KernelWriterHelper.h
@@ -32,8 +32,6 @@
 #include <iostream>
 #include <type_traits>
 
-#include <iostream>
-
 /*
  * By including this header file you will be able to supplement the default
  * Compute Kernel Writer API with additional syntax to help ease the use of CKW.
@@ -154,7 +152,9 @@ struct can_be_assigned<TileOperand &> : ::std::true_type
  * @tparam TLeft The type of the destination of the assignment.
  * @tparam TRight The type of the source assigned to the destination.
  */
-template <typename TLeft, typename TRight, typename = ::std::enable_if<can_be_operand<TRight>::value && can_be_assigned<TLeft>::value>>
+template <typename TLeft,
+          typename TRight,
+          typename = ::std::enable_if<can_be_operand<TRight>::value && can_be_assigned<TLeft>::value>>
 struct Assignment
 {
     TLeft        lhs;
@@ -173,7 +173,7 @@ struct Assignment
 template <typename TLeft, typename TRight>
 inline Assignment<TLeft, TRight> operator+=(TLeft &&lhs, TRight &&rhs)
 {
-    return Assignment<TLeft, TRight>{ std::forward<TLeft>(lhs), std::forward<TRight>(rhs), AssignmentOp::Increment };
+    return Assignment<TLeft, TRight>{std::forward<TLeft>(lhs), std::forward<TRight>(rhs), AssignmentOp::Increment};
 }
 
 /** Represents the expression: `\p lhs -= \p rhs`.
@@ -187,7 +187,7 @@ inline Assignment<TLeft, TRight> operator+=(TLeft &&lhs, TRight &&rhs)
 template <typename TLeft, typename TRight>
 inline Assignment<TLeft, TRight> operator-=(TLeft &&lhs, TRight &&rhs)
 {
-    return Assignment<TLeft, TRight>{ std::forward<TLeft>(lhs), std::forward<TRight>(rhs), AssignmentOp::Decrement };
+    return Assignment<TLeft, TRight>{std::forward<TLeft>(lhs), std::forward<TRight>(rhs), AssignmentOp::Decrement};
 }
 
 // ==================================================
@@ -221,7 +221,7 @@ struct can_be_operand<UnaryExpression<TLeft>> : ::std::true_type
 template <typename TSrc>
 inline UnaryExpression<TSrc> operator!(TSrc &&src)
 {
-    return UnaryExpression<TSrc>{ std::forward<TSrc>(src), UnaryOp::LogicalNot };
+    return UnaryExpression<TSrc>{std::forward<TSrc>(src), UnaryOp::LogicalNot};
 }
 
 /** Represents the expression: `~\p src`.
@@ -233,7 +233,7 @@ inline UnaryExpression<TSrc> operator!(TSrc &&src)
 template <typename TSrc>
 inline UnaryExpression<TSrc> operator~(TSrc &&src)
 {
-    return UnaryExpression<TSrc>{ std::forward<TSrc>(src), UnaryOp::BitwiseNot };
+    return UnaryExpression<TSrc>{std::forward<TSrc>(src), UnaryOp::BitwiseNot};
 }
 
 // ==================================================
@@ -247,7 +247,9 @@ inline UnaryExpression<TSrc> operator~(TSrc &&src)
  * @tparam TLeft  The type of the left argument of the expression.
  * @tparam TRight The type of the right argument of the expression.
  */
-template <typename TLeft, typename TRight, typename = ::std::enable_if_t<can_be_operand<TLeft>::value && can_be_operand<TRight>::value>>
+template <typename TLeft,
+          typename TRight,
+          typename = ::std::enable_if_t<can_be_operand<TLeft>::value && can_be_operand<TRight>::value>>
 struct BinaryExpression
 {
     TLeft    lhs;
@@ -271,7 +273,7 @@ struct can_be_operand<BinaryExpression<TLeft, TRight>> : ::std::true_type
 template <typename TLeft, typename TRight>
 inline BinaryExpression<TLeft, TRight> operator+(TLeft &&lhs, TRight &&rhs)
 {
-    return BinaryExpression<TLeft, TRight>{ std::forward<TLeft>(lhs), std::forward<TRight>(rhs), BinaryOp::Add };
+    return BinaryExpression<TLeft, TRight>{std::forward<TLeft>(lhs), std::forward<TRight>(rhs), BinaryOp::Add};
 }
 
 /** Represents the expression: `\p lhs - \p rhs`.
@@ -285,7 +287,7 @@ inline BinaryExpression<TLeft, TRight> operator+(TLeft &&lhs, TRight &&rhs)
 template <typename TLeft, typename TRight>
 inline BinaryExpression<TLeft, TRight> operator-(TLeft &&lhs, TRight &&rhs)
 {
-    return BinaryExpression<TLeft, TRight>{ std::forward<TLeft>(lhs), std::forward<TRight>(rhs), BinaryOp::Sub };
+    return BinaryExpression<TLeft, TRight>{std::forward<TLeft>(lhs), std::forward<TRight>(rhs), BinaryOp::Sub};
 }
 
 /** Represents the expression: `\p lhs * \p rhs`.
@@ -299,7 +301,7 @@ inline BinaryExpression<TLeft, TRight> operator-(TLeft &&lhs, TRight &&rhs)
 template <typename TLeft, typename TRight>
 inline BinaryExpression<TLeft, TRight> operator*(TLeft &&lhs, TRight &&rhs)
 {
-    return BinaryExpression<TLeft, TRight>{ std::forward<TLeft>(lhs), std::forward<TRight>(rhs), BinaryOp::Mul };
+    return BinaryExpression<TLeft, TRight>{std::forward<TLeft>(lhs), std::forward<TRight>(rhs), BinaryOp::Mul};
 }
 
 /** Represents the expression: `\p lhs / \p rhs`.
@@ -313,7 +315,7 @@ inline BinaryExpression<TLeft, TRight> operator*(TLeft &&lhs, TRight &&rhs)
 template <typename TLeft, typename TRight>
 inline BinaryExpression<TLeft, TRight> operator/(TLeft &&lhs, TRight &&rhs)
 {
-    return BinaryExpression<TLeft, TRight>{ std::forward<TLeft>(lhs), std::forward<TRight>(rhs), BinaryOp::Div };
+    return BinaryExpression<TLeft, TRight>{std::forward<TLeft>(lhs), std::forward<TRight>(rhs), BinaryOp::Div};
 }
 
 /** Represents the expression: `\p lhs % \p rhs`.
@@ -327,7 +329,7 @@ inline BinaryExpression<TLeft, TRight> operator/(TLeft &&lhs, TRight &&rhs)
 template <typename TLeft, typename TRight>
 inline BinaryExpression<TLeft, TRight> operator%(TLeft &&lhs, TRight &&rhs)
 {
-    return BinaryExpression<TLeft, TRight>{ std::forward<TLeft>(lhs), std::forward<TRight>(rhs), BinaryOp::Mod };
+    return BinaryExpression<TLeft, TRight>{std::forward<TLeft>(lhs), std::forward<TRight>(rhs), BinaryOp::Mod};
 }
 
 /** Represents the expression: `\p lhs == \p rhs`.
@@ -341,7 +343,7 @@ inline BinaryExpression<TLeft, TRight> operator%(TLeft &&lhs, TRight &&rhs)
 template <typename TLeft, typename TRight>
 inline BinaryExpression<TLeft, TRight> operator==(TLeft &&lhs, TRight &&rhs)
 {
-    return BinaryExpression<TLeft, TRight>{ std::forward<TLeft>(lhs), std::forward<TRight>(rhs), BinaryOp::Equal };
+    return BinaryExpression<TLeft, TRight>{std::forward<TLeft>(lhs), std::forward<TRight>(rhs), BinaryOp::Equal};
 }
 
 /** Represents the expression: `\p lhs < \p rhs`.
@@ -355,7 +357,7 @@ inline BinaryExpression<TLeft, TRight> operator==(TLeft &&lhs, TRight &&rhs)
 template <typename TLeft, typename TRight>
 inline BinaryExpression<TLeft, TRight> operator<(TLeft &&lhs, TRight &&rhs)
 {
-    return BinaryExpression<TLeft, TRight>{ std::forward<TLeft>(lhs), std::forward<TRight>(rhs), BinaryOp::Less };
+    return BinaryExpression<TLeft, TRight>{std::forward<TLeft>(lhs), std::forward<TRight>(rhs), BinaryOp::Less};
 }
 
 /** Represents the expression: `\p lhs <= \p rhs`.
@@ -369,7 +371,7 @@ inline BinaryExpression<TLeft, TRight> operator<(TLeft &&lhs, TRight &&rhs)
 template <typename TLeft, typename TRight>
 inline BinaryExpression<TLeft, TRight> operator<=(TLeft &&lhs, TRight &&rhs)
 {
-    return BinaryExpression<TLeft, TRight>{ std::forward<TLeft>(lhs), std::forward<TRight>(rhs), BinaryOp::LessEqual };
+    return BinaryExpression<TLeft, TRight>{std::forward<TLeft>(lhs), std::forward<TRight>(rhs), BinaryOp::LessEqual};
 }
 
 /** Represents the expression: `\p lhs > \p rhs`.
@@ -383,7 +385,7 @@ inline BinaryExpression<TLeft, TRight> operator<=(TLeft &&lhs, TRight &&rhs)
 template <typename TLeft, typename TRight>
 inline BinaryExpression<TLeft, TRight> operator>(TLeft &&lhs, TRight &&rhs)
 {
-    return BinaryExpression<TLeft, TRight>{ std::forward<TLeft>(lhs), std::forward<TRight>(rhs), BinaryOp::Greater };
+    return BinaryExpression<TLeft, TRight>{std::forward<TLeft>(lhs), std::forward<TRight>(rhs), BinaryOp::Greater};
 }
 
 /** Represents the expression: `\p lhs >= \p rhs`.
@@ -397,7 +399,7 @@ inline BinaryExpression<TLeft, TRight> operator>(TLeft &&lhs, TRight &&rhs)
 template <typename TLeft, typename TRight>
 inline BinaryExpression<TLeft, TRight> operator>=(TLeft &&lhs, TRight &&rhs)
 {
-    return BinaryExpression<TLeft, TRight>{ std::forward<TLeft>(lhs), std::forward<TRight>(rhs), BinaryOp::GreaterEqual };
+    return BinaryExpression<TLeft, TRight>{std::forward<TLeft>(lhs), std::forward<TRight>(rhs), BinaryOp::GreaterEqual};
 }
 
 /** Represents the expression: `\p lhs ^ \p rhs`.
@@ -411,7 +413,7 @@ inline BinaryExpression<TLeft, TRight> operator>=(TLeft &&lhs, TRight &&rhs)
 template <typename TLeft, typename TRight>
 inline BinaryExpression<TLeft, TRight> operator^(TLeft &&lhs, TRight &&rhs)
 {
-    return BinaryExpression<TLeft, TRight>{ std::forward<TLeft>(lhs), std::forward<TRight>(rhs), BinaryOp::BitwiseXOR };
+    return BinaryExpression<TLeft, TRight>{std::forward<TLeft>(lhs), std::forward<TRight>(rhs), BinaryOp::BitwiseXOR};
 }
 
 /** Represents the expression: `\p lhs && \p rhs`.
@@ -425,7 +427,7 @@ inline BinaryExpression<TLeft, TRight> operator^(TLeft &&lhs, TRight &&rhs)
 template <typename TLeft, typename TRight>
 inline BinaryExpression<TLeft, TRight> logical_and(TLeft &&lhs, TRight &&rhs)
 {
-    return BinaryExpression<TLeft, TRight>{ std::forward<TLeft>(lhs), std::forward<TRight>(rhs), BinaryOp::LogicalAnd };
+    return BinaryExpression<TLeft, TRight>{std::forward<TLeft>(lhs), std::forward<TRight>(rhs), BinaryOp::LogicalAnd};
 }
 
 /** Represents the expression: `\p lhs && \p rhs`.
@@ -440,7 +442,7 @@ template <typename TLeft, typename TRight, typename... TOps>
 inline BinaryExpression<BinaryExpression<TLeft, TRight>, TOps...> logical_and(TLeft &&lhs, TRight &&rhs, TOps &&...ops)
 {
     return logical_and(
-        BinaryExpression<TLeft, TRight>{ std::forward<TLeft>(lhs), std::forward<TRight>(rhs), BinaryOp::LogicalAnd },
+        BinaryExpression<TLeft, TRight>{std::forward<TLeft>(lhs), std::forward<TRight>(rhs), BinaryOp::LogicalAnd},
         std::forward<TOps>(ops)...);
 }
 
@@ -455,7 +457,7 @@ inline BinaryExpression<BinaryExpression<TLeft, TRight>, TOps...> logical_and(TL
 template <typename TLeft, typename TRight>
 inline BinaryExpression<TLeft, TRight> logical_or(TLeft &&lhs, TRight &&rhs)
 {
-    return BinaryExpression<TLeft, TRight>{ std::forward<TLeft>(lhs), std::forward<TRight>(rhs), BinaryOp::LogicalOr };
+    return BinaryExpression<TLeft, TRight>{std::forward<TLeft>(lhs), std::forward<TRight>(rhs), BinaryOp::LogicalOr};
 }
 
 /** Represents the expression: `\p lhs || \p rhs`.
@@ -470,7 +472,7 @@ template <typename TLeft, typename TRight, typename... TOps>
 inline BinaryExpression<BinaryExpression<TLeft, TRight>, TOps...> logical_or(TLeft &&lhs, TRight &&rhs, TOps &&...ops)
 {
     return logical_or(
-        BinaryExpression<TLeft, TRight>{ std::forward<TLeft>(lhs), std::forward<TRight>(rhs), BinaryOp::LogicalOr },
+        BinaryExpression<TLeft, TRight>{std::forward<TLeft>(lhs), std::forward<TRight>(rhs), BinaryOp::LogicalOr},
         std::forward<TOps>(ops)...);
 }
 
@@ -505,7 +507,7 @@ struct can_be_operand<UnaryElementwiseFunction<TLeft>> : ::std::true_type
 template <typename TSrc>
 UnaryElementwiseFunction<TSrc> exp(TSrc &&src)
 {
-    return UnaryElementwiseFunction<TSrc>{ std::forward<TSrc>(src), UnaryFunction::Exp };
+    return UnaryElementwiseFunction<TSrc>{std::forward<TSrc>(src), UnaryFunction::Exp};
 }
 
 /** Represents the expression: `tanh(\p src)`.
@@ -517,7 +519,7 @@ UnaryElementwiseFunction<TSrc> exp(TSrc &&src)
 template <typename TSrc>
 UnaryElementwiseFunction<TSrc> tanh(TSrc &&src)
 {
-    return UnaryElementwiseFunction<TSrc>{ std::forward<TSrc>(src), UnaryFunction::Tanh };
+    return UnaryElementwiseFunction<TSrc>{std::forward<TSrc>(src), UnaryFunction::Tanh};
 }
 
 /** Represents the expression: `sqrt(\p src)`.
@@ -529,7 +531,7 @@ UnaryElementwiseFunction<TSrc> tanh(TSrc &&src)
 template <typename TSrc>
 UnaryElementwiseFunction<TSrc> sqrt(TSrc &&src)
 {
-    return UnaryElementwiseFunction<TSrc>{ std::forward<TSrc>(src), UnaryFunction::Sqrt };
+    return UnaryElementwiseFunction<TSrc>{std::forward<TSrc>(src), UnaryFunction::Sqrt};
 }
 
 /** Represents the expression: `erf(\p src)`.
@@ -541,7 +543,7 @@ UnaryElementwiseFunction<TSrc> sqrt(TSrc &&src)
 template <typename TSrc>
 UnaryElementwiseFunction<TSrc> erf(TSrc &&src)
 {
-    return UnaryElementwiseFunction<TSrc>{ std::forward<TSrc>(src), UnaryFunction::Erf };
+    return UnaryElementwiseFunction<TSrc>{std::forward<TSrc>(src), UnaryFunction::Erf};
 }
 
 /** Represents the expression: `fabs(\p src)`.
@@ -553,7 +555,7 @@ UnaryElementwiseFunction<TSrc> erf(TSrc &&src)
 template <typename TSrc>
 UnaryElementwiseFunction<TSrc> fabs(TSrc &&src)
 {
-    return UnaryElementwiseFunction<TSrc>{ std::forward<TSrc>(src), UnaryFunction::Fabs };
+    return UnaryElementwiseFunction<TSrc>{std::forward<TSrc>(src), UnaryFunction::Fabs};
 }
 
 /** Represents the expression: `log(\p src)`.
@@ -565,7 +567,7 @@ UnaryElementwiseFunction<TSrc> fabs(TSrc &&src)
 template <typename TSrc>
 UnaryElementwiseFunction<TSrc> log(TSrc &&src)
 {
-    return UnaryElementwiseFunction<TSrc>{ std::forward<TSrc>(src), UnaryFunction::Log };
+    return UnaryElementwiseFunction<TSrc>{std::forward<TSrc>(src), UnaryFunction::Log};
 }
 
 /** Represents the expression: `round(\p src)`.
@@ -577,7 +579,7 @@ UnaryElementwiseFunction<TSrc> log(TSrc &&src)
 template <typename TSrc>
 UnaryElementwiseFunction<TSrc> round(TSrc &&src)
 {
-    return UnaryElementwiseFunction<TSrc>{ std::forward<TSrc>(src), UnaryFunction::Round };
+    return UnaryElementwiseFunction<TSrc>{std::forward<TSrc>(src), UnaryFunction::Round};
 }
 
 /** Represents the expression: `sizeof(\p src)`.
@@ -589,7 +591,7 @@ UnaryElementwiseFunction<TSrc> round(TSrc &&src)
 template <typename TSrc>
 UnaryElementwiseFunction<TSrc> sizeOf(TSrc &&src)
 {
-    return UnaryElementwiseFunction<TSrc>{ std::forward<TSrc>(src), UnaryFunction::SizeOf };
+    return UnaryElementwiseFunction<TSrc>{std::forward<TSrc>(src), UnaryFunction::SizeOf};
 }
 
 // ==================================================
@@ -603,7 +605,9 @@ UnaryElementwiseFunction<TSrc> sizeOf(TSrc &&src)
  * @tparam TFirst  The type of the left argument of the function.
  * @tparam TSecond The type of the right argument of the function.
  */
-template <typename TFirst, typename TSecond, typename = ::std::enable_if<can_be_operand<TFirst>::value && can_be_operand<TSecond>::value>>
+template <typename TFirst,
+          typename TSecond,
+          typename = ::std::enable_if<can_be_operand<TFirst>::value && can_be_operand<TSecond>::value>>
 struct BinaryElementwiseFunction
 {
     TFirst         first;
@@ -627,7 +631,8 @@ struct can_be_operand<BinaryElementwiseFunction<TFirst, TSecond>> : ::std::true_
 template <typename TFirst, typename TSecond>
 BinaryElementwiseFunction<TFirst, TSecond> max(TFirst &&first, TSecond &&second)
 {
-    return BinaryElementwiseFunction<TFirst, TSecond>{ std::forward<TFirst>(first), std::forward<TSecond>(second), BinaryFunction::Max };
+    return BinaryElementwiseFunction<TFirst, TSecond>{std::forward<TFirst>(first), std::forward<TSecond>(second),
+                                                      BinaryFunction::Max};
 }
 
 /** Represents the function call: `min(\p first, \p second)`.
@@ -641,7 +646,8 @@ BinaryElementwiseFunction<TFirst, TSecond> max(TFirst &&first, TSecond &&second)
 template <typename TFirst, typename TSecond>
 BinaryElementwiseFunction<TFirst, TSecond> min(TFirst &&first, TSecond &&second)
 {
-    return BinaryElementwiseFunction<TFirst, TSecond>{ std::forward<TFirst>(first), std::forward<TSecond>(second), BinaryFunction::Min };
+    return BinaryElementwiseFunction<TFirst, TSecond>{std::forward<TFirst>(first), std::forward<TSecond>(second),
+                                                      BinaryFunction::Min};
 }
 
 // ==================================================
@@ -656,7 +662,11 @@ BinaryElementwiseFunction<TFirst, TSecond> min(TFirst &&first, TSecond &&second)
  * @tparam TSecond The type of the second argument to the function.
  * @tparam TThird The type of the third argument to the function.
  */
-template <typename TFirst, typename TSecond, typename TThird, typename = ::std::enable_if<can_be_operand<TFirst>::value && can_be_operand<TSecond>::value && can_be_operand<TThird>::value>>
+template <typename TFirst,
+          typename TSecond,
+          typename TThird,
+          typename = ::std::enable_if<can_be_operand<TFirst>::value && can_be_operand<TSecond>::value &&
+                                      can_be_operand<TThird>::value>>
 struct TernaryElementwiseFunction
 {
     TFirst          first;
@@ -683,7 +693,9 @@ struct can_be_operand<TernaryElementwiseFunction<TFirst, TSecond, TThird>> : ::s
 template <typename TFirst, typename TSecond, typename TThird>
 TernaryElementwiseFunction<TFirst, TSecond, TThird> select(TFirst &&first, TSecond &&second, TThird &&third)
 {
-    return TernaryElementwiseFunction<TFirst, TSecond, TThird>{ std::forward<TFirst>(first), std::forward<TSecond>(second), std::forward<TThird>(third), TernaryFunction::Select };
+    return TernaryElementwiseFunction<TFirst, TSecond, TThird>{std::forward<TFirst>(first),
+                                                               std::forward<TSecond>(second),
+                                                               std::forward<TThird>(third), TernaryFunction::Select};
 }
 
 /** Helper class used to extend a KernelWriter with additional functionality
@@ -715,7 +727,8 @@ public:
      * @param[in] cond The BinaryExpression representing the condition.
      * @param[in] body The body of the if-statement.
      */
-    KernelWriterHelper<TWriter> &op_if(const BinaryExpression<TileOperand &, TileOperand &> &cond, const std::function<void()> &body)
+    KernelWriterHelper<TWriter> &op_if(const BinaryExpression<TileOperand &, TileOperand &> &cond,
+                                       const std::function<void()>                          &body)
     {
         TWriter::op_if(cond.lhs, cond.opcode, cond.rhs, body);
         return *this;
@@ -730,7 +743,8 @@ public:
      * @param[in] body The body of the if-statement.
      */
     template <typename TRight>
-    KernelWriterHelper<TWriter> &op_if(const BinaryExpression<TileOperand &, TRight> &cond, const std::function<void()> &body)
+    KernelWriterHelper<TWriter> &op_if(const BinaryExpression<TileOperand &, TRight> &cond,
+                                       const std::function<void()>                   &body)
     {
         auto &tmp1 = declare_temp_tile(cond.lhs.tile_info());
         op_assign(tmp1, cond.rhs);
@@ -747,7 +761,8 @@ public:
      * @param[in] body The body of the if-statement.
      */
     template <typename TLeft>
-    KernelWriterHelper<TWriter> &op_if(const BinaryExpression<TLeft, TileOperand &> &cond, const std::function<void()> &body)
+    KernelWriterHelper<TWriter> &op_if(const BinaryExpression<TLeft, TileOperand &> &cond,
+                                       const std::function<void()>                  &body)
     {
         auto &tmp1 = declare_temp_tile(cond.rhs.tile_info());
         op_assign(tmp1, cond.lhs);
@@ -766,7 +781,8 @@ public:
      * @param[in] cond The BinaryExpression representing the condition.
      * @param[in] body The body of the else-if-statement.
      */
-    KernelWriterHelper<TWriter> &op_else_if(const BinaryExpression<TileOperand &, TileOperand &> &cond, const std::function<void()> &body)
+    KernelWriterHelper<TWriter> &op_else_if(const BinaryExpression<TileOperand &, TileOperand &> &cond,
+                                            const std::function<void()>                          &body)
     {
         TWriter::op_else_if(cond.lhs, cond.opcode, cond.rhs, body);
         return *this;
@@ -781,7 +797,8 @@ public:
      * @param[in] body The body of the else-if-statement.
      */
     template <typename TRight>
-    KernelWriterHelper<TWriter> &op_else_if(const BinaryExpression<TileOperand &, TRight> &cond, const std::function<void()> &body)
+    KernelWriterHelper<TWriter> &op_else_if(const BinaryExpression<TileOperand &, TRight> &cond,
+                                            const std::function<void()>                   &body)
     {
         auto &tmp1 = declare_temp_tile(cond.lhs.tile_info());
         op_assign(tmp1, cond.rhs);
@@ -798,7 +815,8 @@ public:
      * @param[in] body The body of the else-if-statement.
      */
     template <typename TLeft>
-    KernelWriterHelper<TWriter> &op_else_if(const BinaryExpression<TLeft, TileOperand &> &cond, const std::function<void()> &body)
+    KernelWriterHelper<TWriter> &op_else_if(const BinaryExpression<TLeft, TileOperand &> &cond,
+                                            const std::function<void()>                  &body)
     {
         auto &tmp1 = declare_temp_tile(cond.rhs.tile_info());
         op_assign(tmp1, cond.lhs);
@@ -823,7 +841,9 @@ public:
      * @param[in] updater The Assignment representing the updater.
      * @param[in] body    The body of the for-loop.
      */
-    void op_for_loop(const BinaryExpression<TileOperand &, TileOperand &> &cond, const Assignment<TileOperand &, TileOperand &> &updater, const std::function<void()> &body)
+    void op_for_loop(const BinaryExpression<TileOperand &, TileOperand &> &cond,
+                     const Assignment<TileOperand &, TileOperand &>       &updater,
+                     const std::function<void()>                          &body)
     {
         TWriter::op_for_loop(cond.lhs, cond.opcode, cond.rhs, updater.lhs, updater.opcode, updater.rhs, body);
     }
@@ -1029,7 +1049,8 @@ public:
      * @param[in] dst The tile which is assigned to.
      * @param[in] exp The TernaryElementwiseFunction representing the expression to be evaluated and assigned.
      */
-    void op_assign(const TileOperand &dst, const TernaryElementwiseFunction<TileOperand &, TileOperand &, TileOperand &> &exp)
+    void op_assign(const TileOperand                                                             &dst,
+                   const TernaryElementwiseFunction<TileOperand &, TileOperand &, TileOperand &> &exp)
     {
         TWriter::op_ternary_elementwise_function(dst, exp.opcode, exp.first, exp.second, exp.third);
     }
@@ -1169,11 +1190,11 @@ public:
      */
     void op_assign(const Assignment<TileOperand &, TileOperand &> &exp)
     {
-        if(exp.opcode == AssignmentOp::Increment)
+        if (exp.opcode == AssignmentOp::Increment)
         {
             TWriter::op_binary_expression(exp.lhs, exp.lhs, BinaryOp::Add, exp.rhs);
         }
-        else if(exp.opcode == AssignmentOp::Decrement)
+        else if (exp.opcode == AssignmentOp::Decrement)
         {
             TWriter::op_binary_expression(exp.lhs, exp.lhs, BinaryOp::Sub, exp.rhs);
         }
@@ -1192,7 +1213,7 @@ public:
     {
         auto &tmp1 = declare_temp_tile(exp.lhs.tile_info());
         op_assign(tmp1, exp.rhs);
-        op_assign(Assignment<TileOperand &, TileOperand &>{ exp.lhs, tmp1, exp.opcode });
+        op_assign(Assignment<TileOperand &, TileOperand &>{exp.lhs, tmp1, exp.opcode});
     }
 
 private:
@@ -1241,11 +1262,8 @@ private:
     template <typename... TOps, typename = ::std::enable_if_t<std::is_same<TOps..., TileInfo>::value>>
     TileInfo get_largest_size(const TileInfo &first, const TileInfo &second, const TOps &...ops)
     {
-        TileInfo largest = {
-            first.data_type(),
-            std::max(first.width(), second.width()),
-            std::max(first.height(), second.height())
-        };
+        TileInfo largest = {first.data_type(), std::max(first.width(), second.width()),
+                            std::max(first.height(), second.height())};
         return get_largest_size(largest, ops...);
     }
 
diff --git a/compute_kernel_writer/prototype/include/ckw/OperandBase.h b/compute_kernel_writer/prototype/include/ckw/OperandBase.h
index 06d9f82756..9842127339 100644
--- a/compute_kernel_writer/prototype/include/ckw/OperandBase.h
+++ b/compute_kernel_writer/prototype/include/ckw/OperandBase.h
@@ -26,6 +26,7 @@
 #define CKW_PROTOTYPE_INCLUDE_CKW_OPERANDBASE_H
 
 #include "ckw/types/DataType.h"
+
 #include <string>
 
 namespace ckw
diff --git a/compute_kernel_writer/prototype/include/ckw/ScalarValue.h b/compute_kernel_writer/prototype/include/ckw/ScalarValue.h
index 16c3f6d441..2a9c42acc8 100644
--- a/compute_kernel_writer/prototype/include/ckw/ScalarValue.h
+++ b/compute_kernel_writer/prototype/include/ckw/ScalarValue.h
@@ -59,9 +59,9 @@ public:
 
         _size = sizeof(T);
 
-        if(::std::is_integral<T>::value)
+        if (::std::is_integral<T>::value)
         {
-            if(::std::is_signed<T>::value)
+            if (::std::is_signed<T>::value)
             {
                 _type      = Type::INT;
                 _value.i64 = value;
@@ -90,9 +90,9 @@ public:
         CKW_ASSERT(::std::is_integral<T>::value || ::std::is_floating_point<T>::value);
         CKW_ASSERT(sizeof(T) >= _size);
 
-        if(::std::is_integral<T>::value)
+        if (::std::is_integral<T>::value)
         {
-            if(::std::is_signed<T>::value)
+            if (::std::is_signed<T>::value)
             {
                 CKW_ASSERT(_type == Type::INT || _type == Type::UINT);
                 CKW_ASSERT_IF(_type == Type::UINT, sizeof(T) > _size);
diff --git a/compute_kernel_writer/prototype/include/ckw/TensorInfo.h b/compute_kernel_writer/prototype/include/ckw/TensorInfo.h
index 55f8101a53..24da7dc8ab 100644
--- a/compute_kernel_writer/prototype/include/ckw/TensorInfo.h
+++ b/compute_kernel_writer/prototype/include/ckw/TensorInfo.h
@@ -143,10 +143,10 @@ public:
     int32_t id() const;
 
 private:
-    TensorShape      _shape{ { 0 } };
-    DataType         _dt{ DataType::Unknown };
-    TensorDataLayout _dl{ TensorDataLayout::Unknown };
-    int32_t          _id{ -1 };
+    TensorShape      _shape{{0}};
+    DataType         _dt{DataType::Unknown};
+    TensorDataLayout _dl{TensorDataLayout::Unknown};
+    int32_t          _id{-1};
 };
 } // namespace ckw
 
diff --git a/compute_kernel_writer/prototype/include/ckw/TensorOperand.h b/compute_kernel_writer/prototype/include/ckw/TensorOperand.h
index 6d88932c66..c221b449fa 100644
--- a/compute_kernel_writer/prototype/include/ckw/TensorOperand.h
+++ b/compute_kernel_writer/prototype/include/ckw/TensorOperand.h
@@ -139,21 +139,21 @@ private:
     TensorInfo        _info;
     TensorStorageType _storage_type;
 
-    TileOperand      *_tile{ nullptr };
+    TileOperand      *_tile{nullptr};
     TensorTileSampler _tile_sampler{};
 
-    ::std::unique_ptr<TensorComponentOperand> _stride1{ nullptr };
-    ::std::unique_ptr<TensorComponentOperand> _stride2{ nullptr };
-    ::std::unique_ptr<TensorComponentOperand> _stride3{ nullptr };
-    ::std::unique_ptr<TensorComponentOperand> _stride4{ nullptr };
-    ::std::unique_ptr<TensorComponentOperand> _dim0{ nullptr };
-    ::std::unique_ptr<TensorComponentOperand> _dim1{ nullptr };
-    ::std::unique_ptr<TensorComponentOperand> _dim2{ nullptr };
-    ::std::unique_ptr<TensorComponentOperand> _dim3{ nullptr };
-    ::std::unique_ptr<TensorComponentOperand> _dim4{ nullptr };
-    ::std::unique_ptr<TensorComponentOperand> _dim1_dim2{ nullptr };
-    ::std::unique_ptr<TensorComponentOperand> _dim1_dim2_dim3{ nullptr };
-    ::std::unique_ptr<TensorComponentOperand> _offset_first_element_in_bytes{ nullptr };
+    ::std::unique_ptr<TensorComponentOperand> _stride1{nullptr};
+    ::std::unique_ptr<TensorComponentOperand> _stride2{nullptr};
+    ::std::unique_ptr<TensorComponentOperand> _stride3{nullptr};
+    ::std::unique_ptr<TensorComponentOperand> _stride4{nullptr};
+    ::std::unique_ptr<TensorComponentOperand> _dim0{nullptr};
+    ::std::unique_ptr<TensorComponentOperand> _dim1{nullptr};
+    ::std::unique_ptr<TensorComponentOperand> _dim2{nullptr};
+    ::std::unique_ptr<TensorComponentOperand> _dim3{nullptr};
+    ::std::unique_ptr<TensorComponentOperand> _dim4{nullptr};
+    ::std::unique_ptr<TensorComponentOperand> _dim1_dim2{nullptr};
+    ::std::unique_ptr<TensorComponentOperand> _dim1_dim2_dim3{nullptr};
+    ::std::unique_ptr<TensorComponentOperand> _offset_first_element_in_bytes{nullptr};
 };
 
 // =================================================================================================
diff --git a/compute_kernel_writer/prototype/include/ckw/TensorTileSampler.h b/compute_kernel_writer/prototype/include/ckw/TensorTileSampler.h
index e1bf0c52b8..606dec3535 100644
--- a/compute_kernel_writer/prototype/include/ckw/TensorTileSampler.h
+++ b/compute_kernel_writer/prototype/include/ckw/TensorTileSampler.h
@@ -26,6 +26,7 @@
 #define CKW_PROTOTYPE_INCLUDE_CKW_TENSORTILESAMPLER_H
 
 #include "ckw/types/TensorSamplerTypes.h"
+
 #include <functional>
 
 namespace ckw
@@ -55,12 +56,14 @@ public:
      * @param[in] address_mode_y The address mode of the y dimension.
      * @param[in] address_mode_z The address mode of the z dimension.
      */
-    TensorTileSampler(
-        TileOperand &x, TileOperand &y, TileOperand &z, TileOperand &b,
-        TensorSamplerFormat       format,
-        TensorSamplerAddressModeX address_mode_x,
-        TensorSamplerAddressModeY address_mode_y,
-        TensorSamplerAddressModeZ address_mode_z);
+    TensorTileSampler(TileOperand              &x,
+                      TileOperand              &y,
+                      TileOperand              &z,
+                      TileOperand              &b,
+                      TensorSamplerFormat       format,
+                      TensorSamplerAddressModeX address_mode_x,
+                      TensorSamplerAddressModeY address_mode_y,
+                      TensorSamplerAddressModeZ address_mode_z);
 
     /** Initialize a new instance of @ref TensorSampler class.
      *
@@ -75,13 +78,16 @@ public:
      * @param[in] address_mode_y The address mode of the y dimension.
      * @param[in] address_mode_z The address mode of the z dimension.
      */
-    TensorTileSampler(
-        TileOperand &x, TileOperand &y, TileOperand &z, TileOperand &b,
-        int32_t height, int32_t width,
-        TensorSamplerFormat       format,
-        TensorSamplerAddressModeX address_mode_x,
-        TensorSamplerAddressModeY address_mode_y,
-        TensorSamplerAddressModeZ address_mode_z);
+    TensorTileSampler(TileOperand              &x,
+                      TileOperand              &y,
+                      TileOperand              &z,
+                      TileOperand              &b,
+                      int32_t                   height,
+                      int32_t                   width,
+                      TensorSamplerFormat       format,
+                      TensorSamplerAddressModeX address_mode_x,
+                      TensorSamplerAddressModeY address_mode_y,
+                      TensorSamplerAddressModeZ address_mode_z);
 
     /** Get the coordinate in the x dimension. */
     const TileOperand &x() const;
@@ -144,18 +150,18 @@ public:
     TensorTileSampler &address_mode_z(TensorSamplerAddressModeZ address_mode_z);
 
 private:
-    TileOperand *_x{ nullptr };
-    TileOperand *_y{ nullptr };
-    TileOperand *_z{ nullptr };
-    TileOperand *_b{ nullptr };
-
-    int32_t _height{ 0 };
-    int32_t _width{ 0 };
-
-    TensorSamplerFormat       _format{ TensorSamplerFormat::Unknown };
-    TensorSamplerAddressModeX _address_mode_x{ TensorSamplerAddressModeX::Unknown };
-    TensorSamplerAddressModeY _address_mode_y{ TensorSamplerAddressModeY::Unknown };
-    TensorSamplerAddressModeZ _address_mode_z{ TensorSamplerAddressModeZ::Unknown };
+    TileOperand *_x{nullptr};
+    TileOperand *_y{nullptr};
+    TileOperand *_z{nullptr};
+    TileOperand *_b{nullptr};
+
+    int32_t _height{0};
+    int32_t _width{0};
+
+    TensorSamplerFormat       _format{TensorSamplerFormat::Unknown};
+    TensorSamplerAddressModeX _address_mode_x{TensorSamplerAddressModeX::Unknown};
+    TensorSamplerAddressModeY _address_mode_y{TensorSamplerAddressModeY::Unknown};
+    TensorSamplerAddressModeZ _address_mode_z{TensorSamplerAddressModeZ::Unknown};
 };
 
 } // namespace ckw
diff --git a/compute_kernel_writer/prototype/include/ckw/TileInfo.h b/compute_kernel_writer/prototype/include/ckw/TileInfo.h
index de9e47af2b..e0d064169e 100644
--- a/compute_kernel_writer/prototype/include/ckw/TileInfo.h
+++ b/compute_kernel_writer/prototype/include/ckw/TileInfo.h
@@ -83,7 +83,7 @@ public:
     DataType data_type() const;
 
 private:
-    DataType  _dt{ DataType::Unknown };
+    DataType  _dt{DataType::Unknown};
     TileShape _shape{};
 };
 
diff --git a/compute_kernel_writer/prototype/include/ckw/types/Functions.h b/compute_kernel_writer/prototype/include/ckw/types/Functions.h
index bc1f85c188..c6afaa0ac8 100644
--- a/compute_kernel_writer/prototype/include/ckw/types/Functions.h
+++ b/compute_kernel_writer/prototype/include/ckw/types/Functions.h
@@ -32,14 +32,14 @@ namespace ckw
 
 enum class UnaryFunction : int32_t
 {
-    Exp            = 0x0000,
-    Tanh           = 0x0001,
-    Sqrt           = 0x0002,
-    Erf            = 0x0003,
-    Fabs           = 0x0004,
-    Log            = 0x0006,
-    Round          = 0x0007,
-    Floor          = 0x0008,
+    Exp   = 0x0000,
+    Tanh  = 0x0001,
+    Sqrt  = 0x0002,
+    Erf   = 0x0003,
+    Fabs  = 0x0004,
+    Log   = 0x0006,
+    Round = 0x0007,
+    Floor = 0x0008,
 
     // Misc
     SizeOf = 0x0009,
@@ -47,8 +47,8 @@ enum class UnaryFunction : int32_t
 
 enum class BinaryFunction : int32_t
 {
-    Min  = 0x0000,
-    Max  = 0x0001,
+    Min = 0x0000,
+    Max = 0x0001,
 };
 
 enum class TernaryFunction : int32_t
diff --git a/compute_kernel_writer/prototype/include/ckw/types/Operators.h b/compute_kernel_writer/prototype/include/ckw/types/Operators.h
index 43241170a5..b560996837 100644
--- a/compute_kernel_writer/prototype/include/ckw/types/Operators.h
+++ b/compute_kernel_writer/prototype/include/ckw/types/Operators.h
@@ -69,8 +69,8 @@ enum class BinaryOp : int32_t
 enum class AssignmentOp : int32_t
 {
     // Unary
-    Increment  = 0x0000, // +=
-    Decrement  = 0x0001, // -=
+    Increment = 0x0000, // +=
+    Decrement = 0x0001, // -=
 };
 
 } // namespace ckw
diff --git a/compute_kernel_writer/prototype/include/ckw/types/TensorSamplerTypes.h b/compute_kernel_writer/prototype/include/ckw/types/TensorSamplerTypes.h
index 836bd13c95..63405a0764 100644
--- a/compute_kernel_writer/prototype/include/ckw/types/TensorSamplerTypes.h
+++ b/compute_kernel_writer/prototype/include/ckw/types/TensorSamplerTypes.h
@@ -39,34 +39,38 @@ enum class TensorSamplerFormat : int32_t
 
 enum class TensorSamplerAddressModeX : int32_t
 {
-    Unknown        = 0,
-    None           = 1, // The user guarantees that the X coordinate is always in-bound
-    OverlappingMin = 2  // (FIXED shapes only) Reduce the load/store length when x == 0 (MIN). The load length will be width % original length
-                        // Leftover elements can be handled using overlapping. This involves processing some of the elements in the array twice.
+    Unknown = 0,
+    None    = 1, // The user guarantees that the X coordinate is always in-bound
+    OverlappingMin =
+        2 // (FIXED shapes only) Reduce the load/store length when x == 0 (MIN). The load length will be width % original length
+    // Leftover elements can be handled using overlapping. This involves processing some of the elements in the array twice.
 };
 
 enum class TensorSamplerAddressModeY : int32_t
 {
-    Unknown                  = 0,
-    None                     = 1, // The user guarantees that the Y coordinate is always in-bound
-    OverlappingMin           = 2, // (FIXED shapes only) Reduce the load/store length when x == 0 (MIN). The load length will be width % original length
-    Skip                     = 3, // Skip the read/write
-    SkipMinEdgeOnly          = 4, // Skip greater than or equal to max only. The user guarantees that the Y coordinate is always >= 0
-    SkipMaxEdgeOnly          = 5, // Skip less than 0 only
-    ClampToNearest           = 6, // Clamp the coordinate to nearest edge (0 or max value allowed on Y)
-    ClampToMinEdgeOnly       = 7, // Clamp the negative coordinate to 0 only. Therefore, we expect Y to be always < MAX
-    ClampToMaxEdgeOnly       = 8, // Clamp the coordinate to the max value allowed on Y only. We expect Y to be always >= 0
-    ClampToBorder            = 9, // Clamp to border which always has 0 value
+    Unknown = 0,
+    None    = 1, // The user guarantees that the Y coordinate is always in-bound
+    OverlappingMin =
+        2, // (FIXED shapes only) Reduce the load/store length when x == 0 (MIN). The load length will be width % original length
+    Skip = 3, // Skip the read/write
+    SkipMinEdgeOnly =
+        4, // Skip greater than or equal to max only. The user guarantees that the Y coordinate is always >= 0
+    SkipMaxEdgeOnly    = 5, // Skip less than 0 only
+    ClampToNearest     = 6, // Clamp the coordinate to nearest edge (0 or max value allowed on Y)
+    ClampToMinEdgeOnly = 7, // Clamp the negative coordinate to 0 only. Therefore, we expect Y to be always < MAX
+    ClampToMaxEdgeOnly = 8, // Clamp the coordinate to the max value allowed on Y only. We expect Y to be always >= 0
+    ClampToBorder      = 9, // Clamp to border which always has 0 value
     ClampToBorderMinEdgeOnly = 10,
     ClampToBorderMaxEdgeOnly = 11
 };
 
 enum class TensorSamplerAddressModeZ : int32_t
 {
-    Unknown            = 0,
-    None               = 1, // The user guarantees that the Y coordinate is always in-bound
-    Skip               = 3, // Skip the read/write
-    SkipMinEdgeOnly    = 4, // Skip greater than or equal to max only. The user guarantees that the Y coordinate is always >= 0
+    Unknown = 0,
+    None    = 1, // The user guarantees that the Y coordinate is always in-bound
+    Skip    = 3, // Skip the read/write
+    SkipMinEdgeOnly =
+        4, // Skip greater than or equal to max only. The user guarantees that the Y coordinate is always >= 0
     SkipMaxEdgeOnly    = 5, // Skip less than 0 only
     ClampToNearest     = 6, // Clamp the coordinate to nearest edge (0 or max value allowed on Y)
     ClampToMinEdgeOnly = 7, // Clamp the negative coordinate to 0 only. Therefore, we expect Y to be always < MAX
diff --git a/compute_kernel_writer/prototype/src/Kernel.cpp b/compute_kernel_writer/prototype/src/Kernel.cpp
index 095ac879f1..6228ed17d0 100644
--- a/compute_kernel_writer/prototype/src/Kernel.cpp
+++ b/compute_kernel_writer/prototype/src/Kernel.cpp
@@ -23,24 +23,27 @@
  */
 
 #include "ckw/Kernel.h"
+
 #include "ckw/TensorOperand.h"
 #include "ckw/types/GpuTargetLanguage.h"
+
 #include "src/Prototype.h"
 
 namespace ckw
 {
 
-Kernel::Kernel(GpuTargetLanguage language)
-    : Kernel{"unnamed", language}
+Kernel::Kernel(GpuTargetLanguage language) : Kernel{"unnamed", language}
 {
 }
 
 Kernel::Kernel(const char *name, GpuTargetLanguage language)
-    : _name(name), _kernel(std::make_unique<prototype::GpuKernelWriterDataHolder>(language)), _operands{}, _tensor_id_operands{}
+    : _name(name),
+      _kernel(std::make_unique<prototype::GpuKernelWriterDataHolder>(language)),
+      _operands{},
+      _tensor_id_operands{}
 {
 }
 
-
 Kernel::~Kernel()
 {
 }
@@ -50,7 +53,7 @@ const std::string &Kernel::name() const
     return _name;
 }
 
-void Kernel::name(const std::string& name)
+void Kernel::name(const std::string &name)
 {
     _name = name;
 }
@@ -60,14 +63,14 @@ std::vector<KernelArgument> Kernel::arguments() const
 
     const auto impl_args = _kernel->arguments.tensor_argument_declarations();
 
-    for(auto tensor_arg : impl_args)
+    for (auto tensor_arg : impl_args)
     {
         auto tensor = _tensor_id_operands.at(tensor_arg->format().id);
         arguments.push_back(*tensor);
 
-        for(auto component_arg : tensor_arg->component_declarations())
+        for (auto component_arg : tensor_arg->component_declarations())
         {
-            switch(component_arg)
+            switch (component_arg)
             {
                 case TensorComponentType::OffsetFirstElement:
                     arguments.push_back(tensor->offset_first_element_in_bytes());
diff --git a/compute_kernel_writer/prototype/src/KernelArgument.cpp b/compute_kernel_writer/prototype/src/KernelArgument.cpp
index 2b4d7c8cee..24ace28eb3 100644
--- a/compute_kernel_writer/prototype/src/KernelArgument.cpp
+++ b/compute_kernel_writer/prototype/src/KernelArgument.cpp
@@ -23,14 +23,14 @@
  */
 
 #include "ckw/KernelArgument.h"
+
 #include "ckw/Error.h"
 #include "ckw/TensorOperand.h"
 
 namespace ckw
 {
 
-KernelArgument::KernelArgument(TensorOperand &tensor)
-    : _type(Type::TensorStorage), _id(tensor.info().id())
+KernelArgument::KernelArgument(TensorOperand &tensor) : _type(Type::TensorStorage), _id(tensor.info().id())
 {
     _sub_id.tensor_storage_type = tensor.storage_type();
 }
diff --git a/compute_kernel_writer/prototype/src/KernelWriter.cpp b/compute_kernel_writer/prototype/src/KernelWriter.cpp
index 5c9a16ee33..9f58d9fefa 100644
--- a/compute_kernel_writer/prototype/src/KernelWriter.cpp
+++ b/compute_kernel_writer/prototype/src/KernelWriter.cpp
@@ -23,9 +23,11 @@
  */
 
 #include "ckw/KernelWriter.h"
+
 #include "ckw/Error.h"
 #include "ckw/TensorInfo.h"
 #include "ckw/TensorOperand.h"
+
 #include "src/Prototype.h"
 
 #include <sstream>
@@ -38,7 +40,7 @@ namespace
 
 inline prototype::TensorInfo create_impl_tensor_info(const TensorInfo &info)
 {
-    return prototype::TensorInfo{ info.shape(), info.data_type(), info.data_layout(), info.id() };
+    return prototype::TensorInfo{info.shape(), info.data_type(), info.data_layout(), info.id()};
 }
 
 } // namespace
@@ -86,7 +88,8 @@ int32_t KernelWriter::next_id_space()
 // Tensor and tile declaration
 // =================================================================================================
 
-TensorOperand &KernelWriter::declare_tensor_argument(const std::string &name, const TensorInfo &info, TensorStorageType storage_type)
+TensorOperand &
+KernelWriter::declare_tensor_argument(const std::string &name, const TensorInfo &info, TensorStorageType storage_type)
 {
     const auto var_name = generate_variable_name(name);
 
@@ -120,13 +123,11 @@ TileOperand &KernelWriter::declare_tile_operand(std::unique_ptr<TileOperand> ope
     auto       &operand = _kernel->register_operand(std::move(operand_ptr));
     const auto &name    = operand.name();
 
-    if(!operand.is_constant())
+    if (!operand.is_constant())
     {
         const auto &info = operand.tile_info();
 
-        _impl->declare_tile(
-            name,
-            prototype::TileInfo(info.data_type(), info.width(), info.height()));
+        _impl->declare_tile(name, prototype::TileInfo(info.data_type(), info.width(), info.height()));
     }
     else
     {
@@ -140,16 +141,15 @@ TileOperand &KernelWriter::declare_tile_operand(std::unique_ptr<TileOperand> ope
 // Load and store
 // =================================================================================================
 
-void KernelWriter::op_load(TileOperand &tile, const TensorOperand &tensor, const TensorTileSampler &sampler, const TileOperand &dilation_y)
+void KernelWriter::op_load(TileOperand             &tile,
+                           const TensorOperand     &tensor,
+                           const TensorTileSampler &sampler,
+                           const TileOperand       &dilation_y)
 {
     prototype::TensorOperand impl_tensor(
         tensor.name(),
-        prototype::GpuSampler{
-            sampler.format(),
-            prototype::to_gpu_tensor_storage(tensor.storage_type()),
-            sampler.address_mode_x(),
-            sampler.address_mode_y(),
-            sampler.address_mode_z() });
+        prototype::GpuSampler{sampler.format(), prototype::to_gpu_tensor_storage(tensor.storage_type()),
+                              sampler.address_mode_x(), sampler.address_mode_y(), sampler.address_mode_z()});
 
     auto impl_x = sampler.x().create_impl_operand(_impl.get());
     auto impl_y = sampler.y().create_impl_operand(_impl.get());
@@ -167,12 +167,8 @@ void KernelWriter::op_load_indirect(TileOperand &tile, const TensorOperand &tens
 {
     prototype::TensorOperand impl_tensor(
         tensor.name(),
-        prototype::GpuSampler{
-            sampler.format(),
-            prototype::to_gpu_tensor_storage(tensor.storage_type()),
-            sampler.address_mode_x(),
-            sampler.address_mode_y(),
-            sampler.address_mode_z() });
+        prototype::GpuSampler{sampler.format(), prototype::to_gpu_tensor_storage(tensor.storage_type()),
+                              sampler.address_mode_x(), sampler.address_mode_y(), sampler.address_mode_z()});
 
     auto impl_x = sampler.x().create_impl_operand(_impl.get());
     auto impl_y = sampler.y().create_impl_operand(_impl.get());
@@ -194,12 +190,8 @@ void KernelWriter::util_get_indirect_buffer(TileOperand             &tile,
 {
     prototype::TensorOperand impl_tensor(
         tensor.name(),
-        prototype::GpuSampler{
-            sampler.format(),
-            prototype::to_gpu_tensor_storage(tensor.storage_type()),
-            sampler.address_mode_x(),
-            sampler.address_mode_y(),
-            sampler.address_mode_z() });
+        prototype::GpuSampler{sampler.format(), prototype::to_gpu_tensor_storage(tensor.storage_type()),
+                              sampler.address_mode_x(), sampler.address_mode_y(), sampler.address_mode_z()});
 
     auto impl_x     = x.create_impl_operand(_impl.get());
     auto impl_y     = y.create_impl_operand(_impl.get());
@@ -215,12 +207,8 @@ void KernelWriter::op_store(TensorOperand &tensor, const TileOperand &tile, cons
 {
     prototype::TensorOperand impl_tensor(
         tensor.name(),
-        prototype::GpuSampler{
-            sampler.format(),
-            prototype::to_gpu_tensor_storage(tensor.storage_type()),
-            sampler.address_mode_x(),
-            sampler.address_mode_y(),
-            sampler.address_mode_z() });
+        prototype::GpuSampler{sampler.format(), prototype::to_gpu_tensor_storage(tensor.storage_type()),
+                              sampler.address_mode_x(), sampler.address_mode_y(), sampler.address_mode_z()});
     auto impl_src = tile.create_impl_operand(_impl.get());
     auto impl_x   = sampler.x().create_impl_operand(_impl.get());
     auto impl_y   = sampler.y().create_impl_operand(_impl.get());
@@ -250,7 +238,10 @@ void KernelWriter::op_cast_expression(const TileOperand &dst, const TileOperand
     _impl->op_cast_expression(impl_dst, impl_src, policy);
 }
 
-void KernelWriter::op_binary_expression(const TileOperand &dst, const TileOperand &lhs, BinaryOp op, const TileOperand &rhs)
+void KernelWriter::op_binary_expression(const TileOperand &dst,
+                                        const TileOperand &lhs,
+                                        BinaryOp           op,
+                                        const TileOperand &rhs)
 {
     auto impl_lhs = lhs.create_impl_operand(_impl.get());
     auto impl_rhs = rhs.create_impl_operand(_impl.get());
@@ -275,7 +266,10 @@ void KernelWriter::op_unary_elementwise_function(const TileOperand &dst, UnaryFu
     _impl->op_unary_elementwise_function(impl_dst, opcode, impl_src);
 }
 
-void KernelWriter::op_binary_elementwise_function(const TileOperand &dst, BinaryFunction opcode, const TileOperand &first, const TileOperand &second)
+void KernelWriter::op_binary_elementwise_function(const TileOperand &dst,
+                                                  BinaryFunction     opcode,
+                                                  const TileOperand &first,
+                                                  const TileOperand &second)
 {
     auto impl_dst    = dst.create_impl_operand(_impl.get());
     auto impl_first  = first.create_impl_operand(_impl.get());
@@ -284,7 +278,11 @@ void KernelWriter::op_binary_elementwise_function(const TileOperand &dst, Binary
     _impl->op_binary_elementwise_function(impl_dst, opcode, impl_first, impl_second);
 }
 
-void KernelWriter::op_ternary_elementwise_function(const TileOperand &dst, TernaryFunction opcode, const TileOperand &first, const TileOperand &second, const TileOperand &third)
+void KernelWriter::op_ternary_elementwise_function(const TileOperand &dst,
+                                                   TernaryFunction    opcode,
+                                                   const TileOperand &first,
+                                                   const TileOperand &second,
+                                                   const TileOperand &third)
 {
     auto impl_dst    = dst.create_impl_operand(_impl.get());
     auto impl_first  = first.create_impl_operand(_impl.get());
@@ -305,7 +303,10 @@ void KernelWriter::op_if(const TileOperand &lhs, BinaryOp op, const TileOperand
     _impl->compound_statement_end();
 }
 
-void KernelWriter::op_else_if(const TileOperand &lhs, BinaryOp op, const TileOperand &rhs, const std::function<void()> &body)
+void KernelWriter::op_else_if(const TileOperand           &lhs,
+                              BinaryOp                     op,
+                              const TileOperand           &rhs,
+                              const std::function<void()> &body)
 {
     auto impl_lhs = lhs.create_impl_operand(_impl.get());
     auto impl_rhs = rhs.create_impl_operand(_impl.get());
@@ -324,14 +325,21 @@ void KernelWriter::op_else(const std::function<void()> &body)
     _impl->compound_statement_end();
 }
 
-void KernelWriter::op_for_loop(const TileOperand &var_name, BinaryOp cond_op, const TileOperand &cond_value_name, const TileOperand &update_var_name, AssignmentOp update_op, const TileOperand &update_value_name, const std::function<void()> &body)
+void KernelWriter::op_for_loop(const TileOperand           &var_name,
+                               BinaryOp                     cond_op,
+                               const TileOperand           &cond_value_name,
+                               const TileOperand           &update_var_name,
+                               AssignmentOp                 update_op,
+                               const TileOperand           &update_value_name,
+                               const std::function<void()> &body)
 {
     auto impl_var_name          = var_name.create_impl_operand(_impl.get());
     auto impl_cond_value_name   = cond_value_name.create_impl_operand(_impl.get());
     auto impl_update_var_name   = update_var_name.create_impl_operand(_impl.get());
     auto impl_update_value_name = update_value_name.create_impl_operand(_impl.get());
 
-    _impl->op_for_loop_header(impl_var_name, cond_op, impl_cond_value_name, impl_update_var_name, update_op, impl_update_value_name);
+    _impl->op_for_loop_header(impl_var_name, cond_op, impl_cond_value_name, impl_update_var_name, update_op,
+                              impl_update_value_name);
     _impl->compound_statement_begin();
     body();
     _impl->compound_statement_end();
diff --git a/compute_kernel_writer/prototype/src/OperandBase.cpp b/compute_kernel_writer/prototype/src/OperandBase.cpp
index 59cf846cc7..e0617fdc06 100644
--- a/compute_kernel_writer/prototype/src/OperandBase.cpp
+++ b/compute_kernel_writer/prototype/src/OperandBase.cpp
@@ -27,8 +27,7 @@
 namespace ckw
 {
 
-OperandBase::OperandBase(const std::string &name)
-    : _name(name)
+OperandBase::OperandBase(const std::string &name) : _name(name)
 {
 }
 
diff --git a/compute_kernel_writer/prototype/src/Prototype.h b/compute_kernel_writer/prototype/src/Prototype.h
index eb9d7198a9..433eef9e7b 100644
--- a/compute_kernel_writer/prototype/src/Prototype.h
+++ b/compute_kernel_writer/prototype/src/Prototype.h
@@ -25,12 +25,21 @@
 #ifndef CKW_PROTOTYPE_SRC_PROTOTYPE_H
 #define CKW_PROTOTYPE_SRC_PROTOTYPE_H
 
+#include "ckw/Error.h"
+#include "ckw/TensorInfo.h"
+#include "ckw/types/ConvertPolicy.h"
+#include "ckw/types/DataType.h"
+#include "ckw/types/Functions.h"
+#include "ckw/types/GpuTargetLanguage.h"
+#include "ckw/types/Operators.h"
+#include "ckw/types/TensorSamplerTypes.h"
+
 #include <algorithm>
 #include <array>
 #include <cassert> // assert (to be removed)
 #include <chrono>
 #include <cmath>
-#include <cstdint>  // int32_t
+#include <cstdint> // int32_t
 #include <functional>
 #include <iostream> // cout (to be removed)
 #include <map>
@@ -40,15 +49,6 @@
 #include <unordered_map>
 #include <vector>
 
-#include "ckw/Error.h"
-#include "ckw/TensorInfo.h"
-#include "ckw/types/ConvertPolicy.h"
-#include "ckw/types/DataType.h"
-#include "ckw/types/Functions.h"
-#include "ckw/types/GpuTargetLanguage.h"
-#include "ckw/types/Operators.h"
-#include "ckw/types/TensorSamplerTypes.h"
-
 namespace ckw
 {
 namespace prototype
@@ -83,21 +83,21 @@ enum class GpuExtensions
 
 struct TensorInfo
 {
-    TensorShape      shape{ { 0 } };
-    DataType         data_type{ DataType::Unknown };
-    TensorDataLayout data_layout{ TensorDataLayout::Nhwc };
-    int32_t          id{ -1 };
+    TensorShape      shape{{0}};
+    DataType         data_type{DataType::Unknown};
+    TensorDataLayout data_layout{TensorDataLayout::Nhwc};
+    int32_t          id{-1};
 };
 
 struct ComponentAttribute
 {
-    GpuCompilationSpeed compilation_speed{ GpuCompilationSpeed::Fast };
-    bool                overwrite_tile{ true };
+    GpuCompilationSpeed compilation_speed{GpuCompilationSpeed::Fast};
+    bool                overwrite_tile{true};
 };
 
 inline std::string data_type_to_cl_type(DataType dt)
 {
-    switch(dt)
+    switch (dt)
     {
         case DataType::Fp32:
             return "float";
@@ -125,7 +125,7 @@ inline std::string data_type_to_cl_type(DataType dt)
 
 inline int32_t width_to_cl_vector_size(int32_t width)
 {
-    switch(width)
+    switch (width)
     {
         case 1:
             return 1;
@@ -160,7 +160,7 @@ inline std::string get_cl_data_type(DataType dt, int32_t width)
     std::string data_type;
     int32_t     w = width_to_cl_vector_size(width);
     data_type += data_type_to_cl_type(dt);
-    if(w != 1)
+    if (w != 1)
     {
         data_type += std::to_string(w);
     }
@@ -169,7 +169,7 @@ inline std::string get_cl_data_type(DataType dt, int32_t width)
 
 inline std::string to_opencl_store(int32_t vector_length)
 {
-    if(vector_length != 1)
+    if (vector_length != 1)
     {
         return "vstore" + std::to_string(vector_length) + "(";
     }
@@ -185,24 +185,21 @@ struct TileInfo
     {
     }
 
-    TileInfo(DataType dt)
-        : dt(dt), w(1), h(1)
+    TileInfo(DataType dt) : dt(dt), w(1), h(1)
     {
     }
 
-    TileInfo(DataType dt, int32_t width)
-        : dt(dt), w(width), h(1)
+    TileInfo(DataType dt, int32_t width) : dt(dt), w(width), h(1)
     {
     }
 
-    TileInfo(DataType dt, int32_t width, int32_t height)
-        : dt(dt), w(width), h(height)
+    TileInfo(DataType dt, int32_t width, int32_t height) : dt(dt), w(width), h(height)
     {
     }
 
-    DataType dt{ DataType::Unknown }; // Data type of the tile
-    int32_t  w{ 0 };                  // Width (i.e. c0 - portion of the channels)
-    int32_t  h{ 0 };                  // Height (i.e. s0 - portion of the spatial dimensions)
+    DataType dt{DataType::Unknown}; // Data type of the tile
+    int32_t  w{0};                  // Width (i.e. c0 - portion of the channels)
+    int32_t  h{0};                  // Height (i.e. s0 - portion of the spatial dimensions)
 };
 
 inline std::ostream &operator<<(std::ostream &o, const TileInfo &a)
@@ -213,14 +210,14 @@ inline std::ostream &operator<<(std::ostream &o, const TileInfo &a)
 
 struct DataTypeAsString
 {
-    std::string str{ "" };
-    DataType    dt{ DataType::Unknown };
-    int32_t     size{ 1 };
+    std::string str{""};
+    DataType    dt{DataType::Unknown};
+    int32_t     size{1};
 };
 
 struct ValueAsString
 {
-    std::string      str{ "" };
+    std::string      str{""};
     DataTypeAsString type{};
 };
 
@@ -276,8 +273,8 @@ public:
     virtual bool need_declaration() const = 0;
 
 protected:
-    TileInfo    _format{};       // Tile format
-    std::string _basename{ "" }; // Tile name
+    TileInfo    _format{};     // Tile format
+    std::string _basename{""}; // Tile name
 };
 
 // A tile is a collection of variables used to express a 2D data. The variables are vectors in the GPU context.
@@ -329,7 +326,7 @@ public:
         t.type.size = 1;
 
         // Check required because if the width has only one element, we cannot use .s0
-        if(_format.w != 1)
+        if (_format.w != 1)
         {
             // Automatic broadcasting
             t.str += ".s" + std::to_string(x);
@@ -360,10 +357,10 @@ public:
         t.type.dt   = _format.dt;
         t.type.size = width;
 
-        if(_format.w != 1)
+        if (_format.w != 1)
         {
             t.str += ".s";
-            for(int i = 0; i < width; ++i)
+            for (int i = 0; i < width; ++i)
             {
                 t.str += to_scalar_hex(x_start + i);
             }
@@ -374,7 +371,7 @@ public:
     std::vector<ValueAsString> underlying_source_variables() const override
     {
         std::vector<ValueAsString> vars;
-        for(int32_t y = 0; y < _format.h; ++y)
+        for (int32_t y = 0; y < _format.h; ++y)
         {
             ValueAsString t;
             t.str       = build_variable_name(y);
@@ -401,7 +398,7 @@ private:
     {
         std::string var_name = _basename;
 
-        if(_format.h == 1)
+        if (_format.h == 1)
         {
             return var_name;
         }
@@ -416,7 +413,7 @@ private:
 
     std::string to_scalar_hex(int32_t x) const
     {
-        switch(x)
+        switch (x)
         {
             case 0:
             case 1:
@@ -461,9 +458,9 @@ public:
 
         _data = std::vector<std::vector<std::string>>(_format.h, std::vector<std::string>(_format.w));
 
-        for(int32_t y = 0; y < _format.h; ++y)
+        for (int32_t y = 0; y < _format.h; ++y)
         {
-            for(int32_t x = 0; x < _format.w; ++x)
+            for (int32_t x = 0; x < _format.w; ++x)
             {
                 _data[y][x] = in[y][x];
             }
@@ -501,20 +498,20 @@ public:
         t.type.dt   = _format.dt;
         t.type.size = width;
 
-        if(width > 1)
+        if (width > 1)
         {
             t.str += "((" + get_cl_data_type(_format.dt, width) + ")(";
         }
 
         int32_t x = x_start;
-        for(; x < width - 1; ++x)
+        for (; x < width - 1; ++x)
         {
             t.str += scalar(x, y).str;
             t.str += ", ";
         }
         t.str += scalar(x, y).str;
 
-        if(width > 1)
+        if (width > 1)
         {
             t.str += "))";
         }
@@ -526,9 +523,9 @@ public:
     {
         std::vector<ValueAsString> vars;
 
-        for(int32_t y = 0; y < _format.h; ++y)
+        for (int32_t y = 0; y < _format.h; ++y)
         {
-            for(int32_t x = 0; x < _format.w; ++x)
+            for (int32_t x = 0; x < _format.w; ++x)
             {
                 ValueAsString t;
                 t.str       = _data[y][x];
@@ -572,7 +569,7 @@ enum class TensorComponentGroup : int32_t
 
 inline std::string to_string(TensorComponentType x)
 {
-    switch(x)
+    switch (x)
     {
         case TensorComponentType::Unknown:
             return "Unknown";
@@ -672,7 +669,7 @@ enum class GpuTensorStorage : int32_t
 
 inline GpuTensorStorage to_gpu_tensor_storage(TensorStorageType s)
 {
-    switch(s)
+    switch (s)
     {
         case TensorStorageType::Unknown:
             return GpuTensorStorage::Unknown;
@@ -694,7 +691,7 @@ inline GpuTensorStorage to_gpu_tensor_storage(TensorStorageType s)
 
 inline TensorStorageType to_tensor_storage(GpuTensorStorage s)
 {
-    switch(s)
+    switch (s)
     {
         case GpuTensorStorage::Unknown:
             return TensorStorageType::Unknown;
@@ -755,23 +752,23 @@ public:
     // Methods to override
     std::string component(TensorComponentType x) override
     {
-        if((static_cast<int32_t>(x) & static_cast<int32_t>(TensorComponentGroup::Constant)))
+        if ((static_cast<int32_t>(x) & static_cast<int32_t>(TensorComponentGroup::Constant)))
         {
             int32_t idx = static_cast<int32_t>(x) & static_cast<int32_t>(TensorComponentIndex::IndexMask);
             return std::to_string(idx - 1);
         }
 
-        if(_return_by_value_when_possible)
+        if (_return_by_value_when_possible)
         {
-            if((static_cast<int32_t>(x) & static_cast<int32_t>(TensorComponentGroup::Dimension)))
+            if ((static_cast<int32_t>(x) & static_cast<int32_t>(TensorComponentGroup::Dimension)))
             {
                 int32_t idx = static_cast<int32_t>(x) & static_cast<int32_t>(TensorComponentIndex::IndexMask);
                 return std::to_string(_format.shape[idx]);
             }
 
-            if((static_cast<int32_t>(x) & static_cast<int32_t>(TensorComponentGroup::FoldedDimension)))
+            if ((static_cast<int32_t>(x) & static_cast<int32_t>(TensorComponentGroup::FoldedDimension)))
             {
-                switch(x)
+                switch (x)
                 {
                     case TensorComponentType::Dim1xDim2:
                         return std::to_string(_format.shape[1] * _format.shape[2]);
@@ -784,7 +781,7 @@ public:
             }
         }
 
-        if(std::find(_components_required.begin(), _components_required.end(), x) == _components_required.end())
+        if (std::find(_components_required.begin(), _components_required.end(), x) == _components_required.end())
         {
             _components_required.push_back(x);
         }
@@ -804,7 +801,7 @@ public:
 
     std::string storage(GpuTensorStorage x) override
     {
-        if(std::find(_storage_required.begin(), _storage_required.end(), x) == _storage_required.end())
+        if (std::find(_storage_required.begin(), _storage_required.end(), x) == _storage_required.end())
         {
             _storage_required.push_back(x);
         }
@@ -814,7 +811,7 @@ public:
 
     std::string storage_type_declaration(GpuTensorStorage x) const override
     {
-        switch(x)
+        switch (x)
         {
             case GpuTensorStorage::BufferUint8Ptr:
                 return "__global uchar*";
@@ -848,7 +845,7 @@ private:
     {
         std::string var_name = _basename;
 
-        switch(x)
+        switch (x)
         {
             case GpuTensorStorage::BufferUint8Ptr:
                 return var_name + "_ptr";
@@ -870,7 +867,7 @@ private:
     {
         std::string var_name = _basename;
 
-        switch(x)
+        switch (x)
         {
             case TensorComponentType::OffsetFirstElement:
                 return var_name + "_offset_first_element";
@@ -900,9 +897,9 @@ private:
         return var_name;
     }
 
-    bool                          _return_by_value_when_possible{ false };
-    std::vector<GpuTensorStorage> _storage_required{};
-    std::vector<TensorComponentType>  _components_required{};
+    bool                             _return_by_value_when_possible{false};
+    std::vector<GpuTensorStorage>    _storage_required{};
+    std::vector<TensorComponentType> _components_required{};
 };
 
 /**
@@ -930,16 +927,16 @@ public:
 
     struct RegistryTileTableEntry
     {
-        RegistryLevel                registry_level{ 0 };
-        std::unique_ptr<IVectorTile> tile_object{ nullptr };
+        RegistryLevel                registry_level{0};
+        std::unique_ptr<IVectorTile> tile_object{nullptr};
     };
 
     struct RegistryTileTypeTableEntry
     {
-        RegistryTileType tile_type{ RegistryTileType::Tile };
+        RegistryTileType tile_type{RegistryTileType::Tile};
         RegistryTileName tile_name{};
-        RegistryIdSpace  registry_idspace{ 0 };
-        RegistryLevel    registry_level{ 0 };
+        RegistryIdSpace  registry_idspace{0};
+        RegistryLevel    registry_level{0};
     };
 
     using RegistryTileTable     = std::map<RegistryIdSpace, std::map<RegistryTileName, RegistryTileTableEntry>>;
@@ -1002,7 +999,7 @@ public:
 
         auto it = _frags.begin();
 
-        while(it != _frags.end())
+        while (it != _frags.end())
         {
             x.push_back(it->first);
 
@@ -1026,7 +1023,7 @@ public:
         // First check whether a tile with the same name exists
         IVectorTile *result = (*this)[key_var_name];
         assert(result == nullptr);
-        if(result == nullptr)
+        if (result == nullptr)
         {
             std::unique_ptr<ClTile> tile = std::make_unique<ClTile>(var_name, format);
 
@@ -1058,7 +1055,7 @@ public:
         // First check whether a tile with the same name exists
         IVectorTile *result = (*this)[key_var_name];
         assert(result == nullptr);
-        if(result == nullptr)
+        if (result == nullptr)
         {
             std::unique_ptr<ClTile> tile                     = std::make_unique<ClTile>(var_name, format);
             _frags[key_IdSpace][key_var_name].tile_object    = std::move(tile);
@@ -1090,7 +1087,7 @@ public:
         // First check whether a tile with the same name exists
         IVectorTile *result = (*this)[key_var_name];
         assert(result == nullptr);
-        if(result == nullptr)
+        if (result == nullptr)
         {
             std::unique_ptr<ClConstantTile> tile             = std::make_unique<ClConstantTile>(in, dt);
             _frags[key_IdSpace][key_var_name].tile_object    = std::move(tile);
@@ -1123,7 +1120,7 @@ public:
         // First check whether a tile with the same name exists
         IVectorTile *result = (*this)[key_var_name];
         assert(result == nullptr);
-        if(result == nullptr)
+        if (result == nullptr)
         {
             std::unique_ptr<ClConstantTile> tile             = std::make_unique<ClConstantTile>(in, dt);
             _frags[key_IdSpace][key_var_name].tile_object    = std::move(tile);
@@ -1153,10 +1150,10 @@ public:
 
         IVectorTile *result         = nullptr;
         auto         search_IdSpace = _frags.find(key_IdSpace);
-        if(search_IdSpace != _frags.end())
+        if (search_IdSpace != _frags.end())
         {
             auto search_tile = _frags[key_IdSpace].find(key_var_name);
-            if(search_tile != _frags[key_IdSpace].end())
+            if (search_tile != _frags[key_IdSpace].end())
             {
                 result = search_tile->second.tile_object.get();
                 assert(result != nullptr);
@@ -1224,7 +1221,7 @@ public:
 
         std::map<RegistryTileName, RegistryTileTypeTableEntry>::iterator it = _frag_types[IdSpace].begin();
 
-        while(it != _frag_types[IdSpace].end())
+        while (it != _frag_types[IdSpace].end())
         {
             // The following line should be enabled. However, we cannot at this stage
             // because it used to retrieve the output tile produced by each component.
@@ -1259,9 +1256,9 @@ public:
         // Remove all variables in the local scope
         std::map<RegistryTileName, RegistryTileTableEntry>::iterator it = _frags[_IdSpace].begin();
 
-        while(it != _frags[_IdSpace].end())
+        while (it != _frags[_IdSpace].end())
         {
-            if(it->second.registry_level == _registry_level)
+            if (it->second.registry_level == _registry_level)
             {
                 it = _frags[_IdSpace].erase(it);
             }
@@ -1273,9 +1270,9 @@ public:
 
         std::map<RegistryTileName, RegistryTileTypeTableEntry>::iterator it_type = _frag_types[_IdSpace].begin();
 
-        while(it_type != _frag_types[_IdSpace].end())
+        while (it_type != _frag_types[_IdSpace].end())
         {
-            if(it_type->second.registry_level == _registry_level)
+            if (it_type->second.registry_level == _registry_level)
             {
                 it_type = _frag_types[_IdSpace].erase(it_type);
             }
@@ -1302,7 +1299,7 @@ private:
     std::string generate_tile_name(const std::string &name)
     {
         assert(_IdSpace >= 0);
-        if(_registry_level == 0)
+        if (_registry_level == 0)
         {
             return "_G" + std::to_string(_IdSpace) + "_" + name;
         }
@@ -1314,10 +1311,10 @@ private:
 
     RegistryTileTable     _frags{};
     RegistryTileTypeTable _frag_types{};
-    RegistryLevel         _registry_level{ 0 };
-    RegistryIdSpace       _IdSpace{ -1 };
-    int32_t               _anonymous_frag_count{ 0 };              // Counter used to create the anonymous tiles
-    GpuTargetLanguage     _language{ GpuTargetLanguage::Unknown }; // Gpu programming language
+    RegistryLevel         _registry_level{0};
+    RegistryIdSpace       _IdSpace{-1};
+    int32_t               _anonymous_frag_count{0};              // Counter used to create the anonymous tiles
+    GpuTargetLanguage     _language{GpuTargetLanguage::Unknown}; // Gpu programming language
 };
 
 using TensorEntry = std::unique_ptr<IGpuTensorArgument>;
@@ -1388,7 +1385,7 @@ public:
 
         auto it = _refs.begin();
 
-        while(it != _refs.end())
+        while (it != _refs.end())
         {
             x.push_back(it->first);
 
@@ -1420,12 +1417,12 @@ public:
 
         // Check whether a tensor with that tensorID exists
         auto result = _tensor_arguments.find(tensor_id);
-        if(result == _tensor_arguments.end())
+        if (result == _tensor_arguments.end())
         {
             // It means that we haven't added a tensor with that tensor_id yet. Create a IGpuTensorArgument before creating the reference
-            std::unique_ptr<ClTensorArgument> arg = std::make_unique<ClTensorArgument>(var_name, x,
-                                                                                       return_by_value_when_possible);
-            _tensor_arguments[tensor_id]          = std::move(arg);
+            std::unique_ptr<ClTensorArgument> arg =
+                std::make_unique<ClTensorArgument>(var_name, x, return_by_value_when_possible);
+            _tensor_arguments[tensor_id] = std::move(arg);
         }
 
         _refs[key_IdSpace][key_var_name] = tensor_id;
@@ -1445,15 +1442,15 @@ public:
 
         IGpuTensorArgument *result         = nullptr;
         auto                search_IdSpace = _refs.find(key_IdSpace);
-        if(search_IdSpace != _refs.end())
+        if (search_IdSpace != _refs.end())
         {
             auto search_tensor_id = _refs[key_IdSpace].find(key_var_name);
 
-            if(search_tensor_id != _refs[key_IdSpace].end())
+            if (search_tensor_id != _refs[key_IdSpace].end())
             {
                 const int32_t tensor_id              = search_tensor_id->second;
                 auto          search_tensor_argument = _tensor_arguments.find(tensor_id);
-                if(search_tensor_argument != _tensor_arguments.end())
+                if (search_tensor_argument != _tensor_arguments.end())
                 {
                     result = search_tensor_argument->second.get();
                 }
@@ -1475,7 +1472,7 @@ public:
 
         auto it = _tensor_arguments.begin();
 
-        while(it != _tensor_arguments.end())
+        while (it != _tensor_arguments.end())
         {
             args.push_back(it->second.get());
             it++;
@@ -1499,7 +1496,7 @@ public:
 
         auto search_IdSpace = _refs.find(key_IdSpace);
 
-        if(search_IdSpace != _refs.end())
+        if (search_IdSpace != _refs.end())
         {
             auto search_tensor_id = _refs[key_IdSpace].find(key_var_name);
 
@@ -1527,7 +1524,7 @@ public:
 
         auto search_IdSpace = _refs.find(key_IdSpace);
 
-        if(search_IdSpace != _refs.end())
+        if (search_IdSpace != _refs.end())
         {
             auto search_tensor_id = _refs[key_IdSpace].find(key_var_name);
 
@@ -1550,8 +1547,8 @@ private:
 
     std::map<int32_t, TensorEntry>                    _tensor_arguments{};
     std::map<int32_t, std::map<std::string, int32_t>> _refs{};
-    int32_t                                           _IdSpace{ -1 };
-    GpuTargetLanguage                                 _language{ GpuTargetLanguage::Unknown }; // Gpu programming language
+    int32_t                                           _IdSpace{-1};
+    GpuTargetLanguage                                 _language{GpuTargetLanguage::Unknown}; // Gpu programming language
 };
 
 enum class OpType : int32_t
@@ -1563,7 +1560,7 @@ enum class OpType : int32_t
 
 inline std::string to_string(AssignmentOp op)
 {
-    switch(op)
+    switch (op)
     {
         case AssignmentOp::Decrement:
             return "-=";
@@ -1577,7 +1574,7 @@ inline std::string to_string(AssignmentOp op)
 
 inline std::string to_string(UnaryOp op)
 {
-    switch(op)
+    switch (op)
     {
         case UnaryOp::LogicalNot:
             return "!";
@@ -1593,7 +1590,7 @@ inline std::string to_string(UnaryOp op)
 
 inline std::string to_string(BinaryOp op)
 {
-    switch(op)
+    switch (op)
     {
         case BinaryOp::Add:
             return "+";
@@ -1629,7 +1626,7 @@ inline std::string to_string(BinaryOp op)
 
 inline std::string binary_op_string(BinaryOp op)
 {
-    switch(op)
+    switch (op)
     {
         case BinaryOp::Add:
             return "add";
@@ -1698,13 +1695,12 @@ struct ScalarTileCoord
     {
     }
 
-    ScalarTileCoord(int32_t x0, int32_t y0)
-        : x(x0), y(y0)
+    ScalarTileCoord(int32_t x0, int32_t y0) : x(x0), y(y0)
     {
     }
 
-    int32_t x{ -1 };
-    int32_t y{ -1 };
+    int32_t x{-1};
+    int32_t y{-1};
 };
 
 /**
@@ -1768,7 +1764,7 @@ public:
 
 private:
     std::string     _str{};
-    OperandType     _type{ OperandType::Unknown };
+    OperandType     _type{OperandType::Unknown};
     ScalarTileCoord _coord{};
 };
 
@@ -1778,16 +1774,15 @@ struct GpuSampler
 {
     GpuSampler() = default;
 
-    TensorSamplerFormat       format{ TensorSamplerFormat::Unknown };
-    GpuSamplerTensorStorage   storage{ GpuSamplerTensorStorage::Unknown };
-    TensorSamplerAddressModeX address_mode_x{ TensorSamplerAddressModeX::Unknown };
-    TensorSamplerAddressModeY address_mode_y{ TensorSamplerAddressModeY::Unknown };
-    TensorSamplerAddressModeZ address_mode_z{ TensorSamplerAddressModeZ::Unknown };
+    TensorSamplerFormat       format{TensorSamplerFormat::Unknown};
+    GpuSamplerTensorStorage   storage{GpuSamplerTensorStorage::Unknown};
+    TensorSamplerAddressModeX address_mode_x{TensorSamplerAddressModeX::Unknown};
+    TensorSamplerAddressModeY address_mode_y{TensorSamplerAddressModeY::Unknown};
+    TensorSamplerAddressModeZ address_mode_z{TensorSamplerAddressModeZ::Unknown};
 };
 
-inline GpuSampler
-create_simple_sampler(const TensorInfo *tensor_info_id, GpuSampler sampler, int32_t step_x, int32_t step_y,
-                      int32_t step_z)
+inline GpuSampler create_simple_sampler(
+    const TensorInfo *tensor_info_id, GpuSampler sampler, int32_t step_x, int32_t step_y, int32_t step_z)
 {
     CKW_UNUSED(step_x, step_y, step_z);
 
@@ -1804,7 +1799,7 @@ create_simple_sampler(const TensorInfo *tensor_info_id, GpuSampler sampler, int3
     int32_t dim_y = 0;
     int32_t dim_z = 0;
 
-    switch(sampler.format)
+    switch (sampler.format)
     {
         case TensorSamplerFormat::C_W_H:
             dim_x = tensor[0];
@@ -1822,19 +1817,19 @@ create_simple_sampler(const TensorInfo *tensor_info_id, GpuSampler sampler, int3
             break;
     }
 
-    if(dim_x == 1)
+    if (dim_x == 1)
     {
         assert(step_x == 1);
         dst_sampler.address_mode_x = TensorSamplerAddressModeX::None;
     }
 
-    if(dim_y == 1)
+    if (dim_y == 1)
     {
         assert(step_y == 1);
         dst_sampler.address_mode_y = TensorSamplerAddressModeY::None;
     }
 
-    if(dim_z == 1)
+    if (dim_z == 1)
     {
         assert(step_z == 1);
         dst_sampler.address_mode_z = TensorSamplerAddressModeZ::None;
@@ -1858,8 +1853,12 @@ public:
      * @param[in] step_y  Increment step in the Y direction. Not necessarily it is the same of m0 of tile!
      * @param[in] step_z  Increment step in the Z direction. Not necessarily it is the same of d0 of tile!
      */
-    void initialize(const TensorInfo *tensor_info_id, GpuSamplerTensorStorage tensor_storage,
-                    TensorSamplerFormat tensor_format, int32_t step_x, int32_t step_y, int32_t step_z)
+    void initialize(const TensorInfo       *tensor_info_id,
+                    GpuSamplerTensorStorage tensor_storage,
+                    TensorSamplerFormat     tensor_format,
+                    int32_t                 step_x,
+                    int32_t                 step_y,
+                    int32_t                 step_z)
     {
         assert(_is_initialized == false);
 
@@ -1908,13 +1907,13 @@ private:
         sampler.address_mode_z = TensorSamplerAddressModeZ::None;
 
         // In the case of texture, we do not need any special checks at the border
-        if(tensor_storage == GpuSamplerTensorStorage::BufferUint8Ptr)
+        if (tensor_storage == GpuSamplerTensorStorage::BufferUint8Ptr)
         {
             int32_t dim_x = 0;
             int32_t dim_y = 0;
             int32_t dim_z = 0;
 
-            switch(tensor_format)
+            switch (tensor_format)
             {
                 case TensorSamplerFormat::C_W_H:
                     dim_x = tensor[0];
@@ -1932,17 +1931,17 @@ private:
                     break;
             }
 
-            if((dim_x % _step_x) != 0 && dim_x != 1)
+            if ((dim_x % _step_x) != 0 && dim_x != 1)
             {
                 sampler.address_mode_x = TensorSamplerAddressModeX::OverlappingMin;
             }
 
-            if((dim_y % _step_y) != 0 && dim_y != 1)
+            if ((dim_y % _step_y) != 0 && dim_y != 1)
             {
                 sampler.address_mode_y = TensorSamplerAddressModeY::ClampToMaxEdgeOnly;
             }
 
-            if((dim_z % _step_z) != 0 && dim_z != 1)
+            if ((dim_z % _step_z) != 0 && dim_z != 1)
             {
                 sampler.address_mode_z = TensorSamplerAddressModeZ::ClampToMaxEdgeOnly;
             }
@@ -1952,11 +1951,11 @@ private:
     }
 
     GpuSampler        _sampler{}; // GpuSampler
-    int32_t           _step_x{ 1 };
-    int32_t           _step_y{ 1 };
-    int32_t           _step_z{ 1 };
-    const TensorInfo *_tensor_info_id{ nullptr };
-    bool              _is_initialized{ false };
+    int32_t           _step_x{1};
+    int32_t           _step_y{1};
+    int32_t           _step_z{1};
+    const TensorInfo *_tensor_info_id{nullptr};
+    bool              _is_initialized{false};
 };
 
 /**
@@ -1965,8 +1964,7 @@ private:
 class TensorOperand
 {
 public:
-    TensorOperand(const std::string &val, GpuSampler sampler)
-        : _str(val), _sampler(sampler)
+    TensorOperand(const std::string &val, GpuSampler sampler) : _str(val), _sampler(sampler)
     {
     }
 
@@ -2050,9 +2048,9 @@ private:
 
 struct LWS
 {
-    int32_t x{ 1 };
-    int32_t y{ 1 };
-    int32_t z{ 1 };
+    int32_t x{1};
+    int32_t y{1};
+    int32_t z{1};
 };
 
 /**
@@ -2062,8 +2060,7 @@ struct LWS
 class OperandUnpacker
 {
 public:
-    OperandUnpacker(GpuTileRegistry &tiles, GpuTensorArgumentRegistry &arguments)
-        : _tiles(tiles), _arguments(arguments)
+    OperandUnpacker(GpuTileRegistry &tiles, GpuTensorArgumentRegistry &arguments) : _tiles(tiles), _arguments(arguments)
     {
         // Increase the level of the stack to allocate possible temporary tiles
         _tiles.increment_registry_level();
@@ -2078,26 +2075,26 @@ public:
     IVectorTile *unpack(const Operand &src)
     {
         // Get the tile
-        if(src.type() == OperandType::Tile)
+        if (src.type() == OperandType::Tile)
         {
             assert(_tiles.has_tile(src.value()));
             return _tiles[src.value()];
         }
         // Create an anonymous tile with a constant
-        else if(static_cast<int32_t>(src.type()) & 0x00001000)
+        else if (static_cast<int32_t>(src.type()) & 0x00001000)
         {
-            if(src.type() == OperandType::ScalarTile)
+            if (src.type() == OperandType::ScalarTile)
             {
                 ScalarTileCoord coord = src.scalar_tile_coordinate();
                 assert(_tiles.has_tile(src.value()));
                 assert(coord.x >= 0);
                 assert(coord.y >= 0);
                 auto val = _tiles[src.value()]->scalar(coord.x, coord.y);
-                return _tiles.insert({ { { val.str } } }, val.type.dt);
+                return _tiles.insert({{{val.str}}}, val.type.dt);
             }
             else
             {
-                return _tiles.insert({ { { src.value() } } }, to_tile_data_type(src.type()));
+                return _tiles.insert({{{src.value()}}}, to_tile_data_type(src.type()));
             }
         }
         // Create an anonymous tile with the tensor component
@@ -2107,7 +2104,7 @@ public:
             auto              x   = _arguments[src.value()];
             const std::string val = x->component(to_tensor_component(src.type()));
             const DataType    dt  = x->component_data_type();
-            return _tiles.insert({ { { val } } }, dt);
+            return _tiles.insert({{{val}}}, dt);
         }
     }
 
@@ -2119,7 +2116,7 @@ private:
 
     TensorComponentType to_tensor_component(OperandType x)
     {
-        switch(x)
+        switch (x)
         {
             case OperandType::TensorDim0:
                 return TensorComponentType::Dim0;
@@ -2163,8 +2160,7 @@ private:
 class TensorOperandUnpacker
 {
 public:
-    TensorOperandUnpacker(GpuTensorArgumentRegistry &arguments)
-        : _arguments(arguments){};
+    TensorOperandUnpacker(GpuTensorArgumentRegistry &arguments) : _arguments(arguments){};
 
     IGpuTensorArgument *unpack(const TensorOperand &src)
     {
@@ -2191,9 +2187,11 @@ struct GpuKernel
     std::string      config_id{}; // Unique id, required for the tuning stage
     std::vector<LWS> list_lws{};  // LWS to test, required for the tuning stage
     // Dispatch stage
-    GpuOutputSampler                                  output_sampler{};       // GpuOutputSampler, required for the dispatch stage
-    std::vector<std::pair<int32_t, GpuTensorStorage>> list_tensor_storages;   // List of tensor storages, required for the dispatch stage
-    std::vector<std::pair<int32_t, TensorComponentType>>  list_tensor_components; // List of tensor components (width, stride,..), required for the dispatch stage)
+    GpuOutputSampler output_sampler{}; // GpuOutputSampler, required for the dispatch stage
+    std::vector<std::pair<int32_t, GpuTensorStorage>>
+        list_tensor_storages; // List of tensor storages, required for the dispatch stage
+    std::vector<std::pair<int32_t, TensorComponentType>>
+        list_tensor_components; // List of tensor components (width, stride,..), required for the dispatch stage)
 };
 
 // Generate all extension pragmas (hardcoded for now)
@@ -2234,13 +2232,13 @@ inline std::string generate_code(GpuKernelWriterDataHolder &in, const std::strin
 
     auto tensor_args = in.arguments.tensor_argument_declarations();
 
-    for(auto &i : tensor_args)
+    for (auto &i : tensor_args)
     {
         // For each tensor used, get the storage and tensor components
         auto storages   = i->storage_declarations();
         auto components = i->component_declarations();
 
-        for(auto &y : storages)
+        for (auto &y : storages)
         {
             std::string str;
             str += i->storage_type_declaration(y);
@@ -2249,7 +2247,7 @@ inline std::string generate_code(GpuKernelWriterDataHolder &in, const std::strin
             arg_str.push_back(str);
         }
 
-        for(auto &y : components)
+        for (auto &y : components)
         {
             std::string str;
             str += i->component_type_declaration();
@@ -2259,10 +2257,10 @@ inline std::string generate_code(GpuKernelWriterDataHolder &in, const std::strin
         }
     }
 
-    for(size_t i = 0; i < arg_str.size(); ++i)
+    for (size_t i = 0; i < arg_str.size(); ++i)
     {
         code += arg_str[i];
-        if(i + 1 < arg_str.size())
+        if (i + 1 < arg_str.size())
         {
             code += ",\n";
         }
@@ -2284,13 +2282,12 @@ inline std::string generate_code(GpuKernelWriterDataHolder &in, const std::strin
 class GpuTensor3dMapper
 {
 public:
-    GpuTensor3dMapper(IGpuTensorArgument *tensor, GpuSampler sampler)
-        : _sampler(sampler), _tensor(tensor){};
+    GpuTensor3dMapper(IGpuTensorArgument *tensor, GpuSampler sampler) : _sampler(sampler), _tensor(tensor){};
 
     std::string tensor_component_x() const
     {
         const auto format = _sampler.format;
-        switch(format)
+        switch (format)
         {
             case TensorSamplerFormat::C_WH_1:
             case TensorSamplerFormat::C_W_H:
@@ -2305,7 +2302,7 @@ public:
     std::string tensor_component_y() const
     {
         const auto format = _sampler.format;
-        switch(format)
+        switch (format)
         {
             case TensorSamplerFormat::C_WH_1:
                 return _tensor->component(TensorComponentType::Dim1xDim2);
@@ -2321,7 +2318,7 @@ public:
     std::string tensor_component_z() const
     {
         const auto format = _sampler.format;
-        switch(format)
+        switch (format)
         {
             case TensorSamplerFormat::C_WH_1:
                 return "1";
@@ -2337,7 +2334,7 @@ public:
     std::string tensor_component_stride_y() const
     {
         const auto format = _sampler.format;
-        switch(format)
+        switch (format)
         {
             case TensorSamplerFormat::C_WH_1:
             case TensorSamplerFormat::C_W_H:
@@ -2352,7 +2349,7 @@ public:
     std::string tensor_component_stride_z() const
     {
         const auto format = _sampler.format;
-        switch(format)
+        switch (format)
         {
             case TensorSamplerFormat::C_WH_1:
                 return "0";
@@ -2368,7 +2365,7 @@ public:
     std::string tensor_component_stride_batch() const
     {
         const auto format = _sampler.format;
-        switch(format)
+        switch (format)
         {
             case TensorSamplerFormat::C_WH_1:
             case TensorSamplerFormat::C_W_H:
@@ -2384,7 +2381,7 @@ public:
     {
         auto       t      = _tensor->format();
         const auto format = _sampler.format;
-        switch(format)
+        switch (format)
         {
             case TensorSamplerFormat::C_WH_1:
             case TensorSamplerFormat::C_W_H:
@@ -2400,7 +2397,7 @@ public:
     {
         auto       t      = _tensor->format();
         const auto format = _sampler.format;
-        switch(format)
+        switch (format)
         {
             case TensorSamplerFormat::C_WH_1:
                 return (t.shape[1] * t.shape[2]) == 1;
@@ -2417,7 +2414,7 @@ public:
     {
         auto       t      = _tensor->format();
         const auto format = _sampler.format;
-        switch(format)
+        switch (format)
         {
             case TensorSamplerFormat::C_WH_1:
                 return true;
@@ -2434,7 +2431,7 @@ public:
     {
         auto       t      = _tensor->format();
         const auto format = _sampler.format;
-        switch(format)
+        switch (format)
         {
             case TensorSamplerFormat::C_WH_1:
             case TensorSamplerFormat::C_W_H:
@@ -2463,7 +2460,7 @@ private:
 
 struct GpuKernelWriterAttribute
 {
-    bool return_tensor_component_by_value{ false };
+    bool return_tensor_component_by_value{false};
 };
 
 enum class RoundingMode
@@ -2489,7 +2486,8 @@ public:
 
     virtual void declare_tile(const std::string &name, const TileInfo &info) = 0;
 
-    virtual void declare_const_tile(const std::string &name, const std::vector<std::vector<std::string>> &in, DataType dt) = 0;
+    virtual void
+    declare_const_tile(const std::string &name, const std::vector<std::vector<std::string>> &in, DataType dt) = 0;
 
     virtual void write_text(const std::string &x) = 0;
 
@@ -2498,48 +2496,82 @@ public:
     virtual void compound_statement_end() = 0;
 
     // Operations
-    virtual void op_get_global_id(const Operand &dst_var, int32_t dim)                                                                                                                                                                                                   = 0;
+    virtual void op_get_global_id(const Operand &dst_var, int32_t dim) = 0;
 
-    virtual void op_get_global_coord(const Operand &dst, const Operand &step, const TensorOperand &tensor, int32_t dim)                                                                                                                                                  = 0;
+    virtual void
+    op_get_global_coord(const Operand &dst, const Operand &step, const TensorOperand &tensor, int32_t dim) = 0;
 
-    virtual void op_get_global_batch(const Operand &dst, const TensorOperand &tensor)                                                                                                                                                                                    = 0;
+    virtual void op_get_global_batch(const Operand &dst, const TensorOperand &tensor) = 0;
 
-    virtual void op_get_global_size(const Operand &dst_var, int32_t dim)                                                                                                                                                                                                 = 0;
+    virtual void op_get_global_size(const Operand &dst_var, int32_t dim) = 0;
 
-    virtual void op_unary_expression(const Operand &dst, UnaryOp op, const Operand &src)                                                                                                                                                                                 = 0;
+    virtual void op_unary_expression(const Operand &dst, UnaryOp op, const Operand &src) = 0;
 
-    virtual void op_binary_expression(const Operand &dst, const Operand &lhs, BinaryOp op, const Operand &rhs)                                                                                                                                                           = 0;
+    virtual void op_binary_expression(const Operand &dst, const Operand &lhs, BinaryOp op, const Operand &rhs) = 0;
 
-    virtual void op_assign(const Operand &dst_name, const Operand &src_name)                                                                                                                                                                                             = 0;
+    virtual void op_assign(const Operand &dst_name, const Operand &src_name) = 0;
 
-    virtual void op_unary_elementwise_function(const Operand &dst_name, UnaryFunction func, const Operand &src_name)                                                                                                                                                     = 0;
+    virtual void
+    op_unary_elementwise_function(const Operand &dst_name, UnaryFunction func, const Operand &src_name) = 0;
 
-    virtual void op_binary_elementwise_function(const Operand &dst_name, BinaryFunction func, const Operand &first_name, const Operand &second_name)                                                                                                                     = 0;
+    virtual void op_binary_elementwise_function(const Operand &dst_name,
+                                                BinaryFunction func,
+                                                const Operand &first_name,
+                                                const Operand &second_name) = 0;
 
-    virtual void op_ternary_elementwise_function(const Operand &dst_name, TernaryFunction func, const Operand &first_name, const Operand &second_name, const Operand &third_name)                                                                                        = 0;
+    virtual void op_ternary_elementwise_function(const Operand  &dst_name,
+                                                 TernaryFunction func,
+                                                 const Operand  &first_name,
+                                                 const Operand  &second_name,
+                                                 const Operand  &third_name) = 0;
 
-    virtual void op_if_header(const Operand &lhs, BinaryOp op, const Operand &rhs)                                                                                                                                                                                       = 0;
+    virtual void op_if_header(const Operand &lhs, BinaryOp op, const Operand &rhs) = 0;
 
-    virtual void op_else_if_header(const Operand &lhs, BinaryOp op, const Operand &rhs)                                                                                                                                                                                  = 0;
+    virtual void op_else_if_header(const Operand &lhs, BinaryOp op, const Operand &rhs) = 0;
 
-    virtual void op_else_header()                                                                                                                                                                                                                                        = 0;
+    virtual void op_else_header() = 0;
 
-    virtual void op_for_loop_header(const Operand &var_name, BinaryOp cond_op, const Operand &cond_value, const Operand &update_var, AssignmentOp update_op, const Operand &update_value)                                                                                                           = 0;
+    virtual void op_for_loop_header(const Operand &var_name,
+                                    BinaryOp       cond_op,
+                                    const Operand &cond_value,
+                                    const Operand &update_var,
+                                    AssignmentOp   update_op,
+                                    const Operand &update_value) = 0;
 
-    virtual void op_load_indirect(const TensorOperand &tensor, const Operand &dst, const Operand &x, const Operand &y_indirect, const Operand &z, const Operand &b = Operand("0", OperandType::ScalarInt32))                                                             = 0;
+    virtual void op_load_indirect(const TensorOperand &tensor,
+                                  const Operand       &dst,
+                                  const Operand       &x,
+                                  const Operand       &y_indirect,
+                                  const Operand       &z,
+                                  const Operand       &b = Operand("0", OperandType::ScalarInt32)) = 0;
 
-    virtual void op_load_immediate(const TensorOperand &tensor, const Operand &dst, const Operand &x, const Operand &y, const Operand &z, const Operand &b = Operand("0", OperandType::ScalarInt32), const Operand &dilation_y = Operand("1", OperandType::ScalarInt32)) = 0;
+    virtual void op_load_immediate(const TensorOperand &tensor,
+                                   const Operand       &dst,
+                                   const Operand       &x,
+                                   const Operand       &y,
+                                   const Operand       &z,
+                                   const Operand       &b          = Operand("0", OperandType::ScalarInt32),
+                                   const Operand       &dilation_y = Operand("1", OperandType::ScalarInt32)) = 0;
 
-    virtual void op_store_immediate(const TensorOperand &tensor, const Operand &src, const Operand &x, const Operand &y, const Operand &z, const Operand &b = Operand("0", OperandType::ScalarInt32))                                                                    = 0;
+    virtual void op_store_immediate(const TensorOperand &tensor,
+                                    const Operand       &src,
+                                    const Operand       &x,
+                                    const Operand       &y,
+                                    const Operand       &z,
+                                    const Operand       &b = Operand("0", OperandType::ScalarInt32)) = 0;
 
-    virtual void op_cast_expression(const Operand &dst, const Operand &src, ConvertPolicy policy)                                                                                                                                                                        = 0;
+    virtual void op_cast_expression(const Operand &dst, const Operand &src, ConvertPolicy policy) = 0;
 
-    virtual void op_return()                                                                                                                                                                                                                                             = 0;
+    virtual void op_return() = 0;
 
     // Utils
     // It is the process of converting
-    virtual void util_get_indirect_buffer(const Operand &dst, const TensorOperand &tensor, const Operand &x,
-                                          const Operand &y, const Operand &x_off, const Operand &y_off) = 0;
+    virtual void util_get_indirect_buffer(const Operand       &dst,
+                                          const TensorOperand &tensor,
+                                          const Operand       &x,
+                                          const Operand       &y,
+                                          const Operand       &x_off,
+                                          const Operand       &y_off) = 0;
 };
 
 enum class GpuLoadStoreType
@@ -2586,12 +2618,11 @@ public:
 
     ClLoadStoreBufferHelperWriter &operator=(const ClLoadStoreBufferHelperWriter &) = default;
 
-    static bool
-    validate(IGpuKernelWriter *x, GpuTensor3dMapper mapper, GpuLoadStoreType type, IVectorTile *dst)
+    static bool validate(IGpuKernelWriter *x, GpuTensor3dMapper mapper, GpuLoadStoreType type, IVectorTile *dst)
     {
         CKW_UNUSED(x, type, dst);
 
-        if(mapper.gpu_sampler().storage != GpuSamplerTensorStorage::BufferUint8Ptr)
+        if (mapper.gpu_sampler().storage != GpuSamplerTensorStorage::BufferUint8Ptr)
         {
             return false;
         }
@@ -2675,10 +2706,10 @@ public:
         out_of_bound_finalize_y(dst);
 
         // The left over load/store will be written in the finalize stage
-        if(_ls_width_part.size() != 0)
+        if (_ls_width_part.size() != 0)
         {
             int32_t w = 0;
-            for(auto &p : _ls_width_part)
+            for (auto &p : _ls_width_part)
             {
                 const std::string dst0    = _dst->vector(w, p, idx_y).str;
                 const std::string coord_x = _coord_x + " + " + std::to_string(w);
@@ -2698,8 +2729,8 @@ public:
     }
 
 private:
-    IVectorTile                                                             *_dst{ nullptr };
-    int32_t                                                                  _ls_width_full{ 0 };
+    IVectorTile                                                             *_dst{nullptr};
+    int32_t                                                                  _ls_width_full{0};
     std::vector<int32_t>                                                     _ls_width_part{};
     std::vector<std::pair<std::pair<std::string, std::string>, std::string>> _leftovers_x{};
     std::string                                                              _coord_x{};
@@ -2709,13 +2740,13 @@ private:
 
     void out_of_bound_initialize_x(std::string &coord)
     {
-        if(_mapper.gpu_sampler().address_mode_x == TensorSamplerAddressModeX::OverlappingMin)
+        if (_mapper.gpu_sampler().address_mode_x == TensorSamplerAddressModeX::OverlappingMin)
         {
             auto tensor_format = _mapper.tensor_argument()->format();
             auto shape         = tensor_format.shape;
 
             _ls_width_part = decompose_leftover_ls_vector_width(shape[0] % _ls_width_full);
-            if(_ls_width_part.size() != 0)
+            if (_ls_width_part.size() != 0)
             {
                 _writer->write_text("if(" + coord + " > 0)\n");
                 _writer->compound_statement_begin();
@@ -2725,16 +2756,16 @@ private:
 
     void out_of_bound_finalize_x()
     {
-        if(_mapper.gpu_sampler().address_mode_x == TensorSamplerAddressModeX::OverlappingMin)
+        if (_mapper.gpu_sampler().address_mode_x == TensorSamplerAddressModeX::OverlappingMin)
         {
-            if(_ls_width_part.size() != 0)
+            if (_ls_width_part.size() != 0)
             {
                 _writer->compound_statement_end();
                 _writer->write_text("else\n");
                 _writer->compound_statement_begin();
 
                 out_of_bound_initialize_z(_coord_orig_z);
-                for(auto &i : _leftovers_x)
+                for (auto &i : _leftovers_x)
                 {
                     out_of_bound_initialize_y(i.first.second);
                     _writer->write_text(i.second);
@@ -2753,7 +2784,7 @@ private:
 
         const auto address_mode_y = _mapper.gpu_sampler().address_mode_y;
 
-        switch(address_mode_y)
+        switch (address_mode_y)
         {
             case TensorSamplerAddressModeY::Skip:
             case TensorSamplerAddressModeY::ClampToBorder:
@@ -2799,7 +2830,7 @@ private:
     {
         const auto address_mode_y = _mapper.gpu_sampler().address_mode_y;
 
-        switch(address_mode_y)
+        switch (address_mode_y)
         {
             case TensorSamplerAddressModeY::ClampToBorder:
             case TensorSamplerAddressModeY::ClampToBorderMaxEdgeOnly:
@@ -2816,7 +2847,7 @@ private:
                 assert(false);
         }
 
-        switch(address_mode_y)
+        switch (address_mode_y)
         {
             case TensorSamplerAddressModeY::ClampToBorder:
             case TensorSamplerAddressModeY::ClampToBorderMinEdgeOnly:
@@ -2841,7 +2872,7 @@ private:
 
         const auto address_mode_z = _mapper.gpu_sampler().address_mode_z;
 
-        switch(address_mode_z)
+        switch (address_mode_z)
         {
             case TensorSamplerAddressModeZ::Skip:
                 max = _mapper.tensor_component_z();
@@ -2880,7 +2911,7 @@ private:
     {
         const auto address_mode_z = _mapper.gpu_sampler().address_mode_z;
 
-        switch(address_mode_z)
+        switch (address_mode_z)
         {
             case TensorSamplerAddressModeZ::Skip:
             case TensorSamplerAddressModeZ::SkipMinEdgeOnly:
@@ -2899,7 +2930,7 @@ private:
     {
         std::vector<int32_t> x;
 
-        switch(ls_leftover_vector_width)
+        switch (ls_leftover_vector_width)
         {
             case 0:
                 break;
@@ -2961,13 +2992,13 @@ private:
         return x;
     }
 
-    std::string to_ls_buffer(GpuLoadStoreType type, int32_t vector_width, const std::string &data,
-                             const std::string &address)
+    std::string
+    to_ls_buffer(GpuLoadStoreType type, int32_t vector_width, const std::string &data, const std::string &address)
     {
-        switch(type)
+        switch (type)
         {
             case GpuLoadStoreType::Load:
-                if(vector_width != 1)
+                if (vector_width != 1)
                 {
                     return data + " = vload" + std::to_string(vector_width) + "(0, " + address + ")";
                 }
@@ -2977,7 +3008,7 @@ private:
                 }
                 break;
             case GpuLoadStoreType::Store:
-                if(vector_width != 1)
+                if (vector_width != 1)
                 {
                     return "vstore" + std::to_string(vector_width) + "(" + data + ", 0, " + address + ")";
                 }
@@ -2993,25 +3024,25 @@ private:
         }
     }
 
-    std::string to_ls_buffer_address(const std::string &x, const std::string &y, const std::string &z,
-                                     const std::string &b) const
+    std::string
+    to_ls_buffer_address(const std::string &x, const std::string &y, const std::string &z, const std::string &b) const
     {
-        auto tensor_storage            = static_cast<GpuTensorStorage>(_mapper.gpu_sampler().storage);
+        auto tensor_storage = static_cast<GpuTensorStorage>(_mapper.gpu_sampler().storage);
         assert(tensor_storage == GpuTensorStorage::BufferUint8Ptr);
-        const std::string ptr_buf      = _mapper.tensor_argument()->storage(tensor_storage);
-        const std::string dst_type     = get_cl_data_type(_dst->format().dt, 1);
+        const std::string ptr_buf  = _mapper.tensor_argument()->storage(tensor_storage);
+        const std::string dst_type = get_cl_data_type(_dst->format().dt, 1);
 
         std::string address;
         address += "(__global ";
         address += dst_type;
         address += "*)(";
         address += ptr_buf;
-        if(x != "0" && (_mapper.is_one_component_x() != true))
+        if (x != "0" && (_mapper.is_one_component_x() != true))
         {
             address += " + (";
             address += x + ") * sizeof(" + dst_type + ")";
         }
-        if(y != "0")
+        if (y != "0")
         {
             const std::string stride_y = _mapper.tensor_component_stride_y();
             address += " + (";
@@ -3019,7 +3050,7 @@ private:
             address += " * ";
             address += stride_y;
         }
-        if(z != "0" && (_mapper.is_one_component_z() != true))
+        if (z != "0" && (_mapper.is_one_component_z() != true))
         {
             const std::string stride_z = _mapper.tensor_component_stride_z();
             address += " + (";
@@ -3027,7 +3058,7 @@ private:
             address += " * ";
             address += stride_z;
         }
-        if(b != "0" && (_mapper.is_one_component_batch() != true))
+        if (b != "0" && (_mapper.is_one_component_batch() != true))
         {
             const std::string stride_b = _mapper.tensor_component_stride_batch();
             address += " + (";
@@ -3043,32 +3074,32 @@ private:
 class ClLoadStoreImage2dHelperWriter : public IGpuLoadStoreHelperWriter
 {
 public:
-    static bool
-    validate(IGpuKernelWriter *x, const GpuTensor3dMapper &mapper, GpuLoadStoreType type, IVectorTile *dst)
+    static bool validate(IGpuKernelWriter *x, const GpuTensor3dMapper &mapper, GpuLoadStoreType type, IVectorTile *dst)
     {
         CKW_UNUSED(x);
 
-        if(dst->format().w != 4)
+        if (dst->format().w != 4)
         {
             return false;
         }
-        if(mapper.gpu_sampler().address_mode_x != TensorSamplerAddressModeX::None)
+        if (mapper.gpu_sampler().address_mode_x != TensorSamplerAddressModeX::None)
         {
             return false;
         }
-        if(mapper.gpu_sampler().address_mode_z != TensorSamplerAddressModeZ::None)
+        if (mapper.gpu_sampler().address_mode_z != TensorSamplerAddressModeZ::None)
         {
             return false;
         }
-        if(mapper.gpu_sampler().storage != GpuSamplerTensorStorage::Image2dReadOnly && type == GpuLoadStoreType::Load)
+        if (mapper.gpu_sampler().storage != GpuSamplerTensorStorage::Image2dReadOnly && type == GpuLoadStoreType::Load)
         {
             return false;
         }
-        if(mapper.gpu_sampler().storage != GpuSamplerTensorStorage::Image2dWriteOnly && type == GpuLoadStoreType::Store)
+        if (mapper.gpu_sampler().storage != GpuSamplerTensorStorage::Image2dWriteOnly &&
+            type == GpuLoadStoreType::Store)
         {
             return false;
         }
-        if((dst->format().dt != DataType::Fp32) && (dst->format().dt != DataType::Fp16))
+        if ((dst->format().dt != DataType::Fp32) && (dst->format().dt != DataType::Fp16))
         {
             return false;
         }
@@ -3134,8 +3165,8 @@ public:
     }
 
 private:
-    IVectorTile *_dst{ nullptr };
-    int32_t      _ls_width_full{ 0 };
+    IVectorTile *_dst{nullptr};
+    int32_t      _ls_width_full{0};
     std::string  _coord_x{};
     std::string  _coord_z{};
     std::string  _coord_b{};
@@ -3146,7 +3177,7 @@ private:
 
         const auto address_mode_y = _mapper.gpu_sampler().address_mode_y;
 
-        switch(address_mode_y)
+        switch (address_mode_y)
         {
             case TensorSamplerAddressModeY::Skip:
                 max = _mapper.tensor_component_y();
@@ -3182,7 +3213,7 @@ private:
 
         const auto address_mode_y = _mapper.gpu_sampler().address_mode_y;
 
-        switch(address_mode_y)
+        switch (address_mode_y)
         {
             case TensorSamplerAddressModeY::Skip:
             case TensorSamplerAddressModeY::SkipMinEdgeOnly:
@@ -3195,16 +3226,19 @@ private:
         }
     };
 
-    std::string to_ls_image2d(GpuLoadStoreType type, int32_t vector_width, const std::string &data,
-                              const std::string &sampler, const std::string &coord)
+    std::string to_ls_image2d(GpuLoadStoreType   type,
+                              int32_t            vector_width,
+                              const std::string &data,
+                              const std::string &sampler,
+                              const std::string &coord)
     {
         CKW_UNUSED(vector_width);
 
         auto              tensor_storage = static_cast<GpuTensorStorage>(_mapper.gpu_sampler().storage);
         const std::string image2d_obj    = _mapper.tensor_argument()->storage(tensor_storage);
-        const std::string post_fix = _dst->format().dt == DataType::Fp32 ? "f" : "h";
+        const std::string post_fix       = _dst->format().dt == DataType::Fp32 ? "f" : "h";
 
-        switch(type)
+        switch (type)
         {
             case GpuLoadStoreType::Load:
                 return data + " = read_image" + post_fix + "(" + image2d_obj + ", " + sampler + ", " + coord + ")";
@@ -3223,7 +3257,7 @@ private:
     {
         const auto address_mode_y = _mapper.gpu_sampler().address_mode_y;
 
-        switch(address_mode_y)
+        switch (address_mode_y)
         {
             case TensorSamplerAddressModeY::None:
                 return "CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_NONE | CLK_FILTER_NEAREST";
@@ -3245,17 +3279,17 @@ private:
         }
     }
 
-    std::string to_ls_image2d_coord(const std::string &x, const std::string &y, const std::string &z,
-                                    const std::string &b) const
+    std::string
+    to_ls_image2d_coord(const std::string &x, const std::string &y, const std::string &z, const std::string &b) const
     {
         std::string coord_x = "(" + x + ") >> 2";
         std::string coord_y = "(";
 
-        if(y != "0")
+        if (y != "0")
         {
             coord_y += y;
         }
-        if(z != "0" && (_mapper.is_one_component_z() != true))
+        if (z != "0" && (_mapper.is_one_component_z() != true))
         {
             const std::string dim = _mapper.tensor_component_y();
             coord_y += " + (";
@@ -3263,7 +3297,7 @@ private:
             coord_y += " * ";
             coord_y += dim;
         }
-        if(b != "0" && (_mapper.is_one_component_batch() != true))
+        if (b != "0" && (_mapper.is_one_component_batch() != true))
         {
             const std::string dim0 = _mapper.tensor_component_y();
             const std::string dim1 = _mapper.tensor_component_z();
@@ -3292,7 +3326,7 @@ public:
     create(IGpuKernelWriter *x, const GpuTensor3dMapper &mapper, GpuLoadStoreType type)
     {
         const auto tensor_storage = mapper.gpu_sampler().storage;
-        switch(tensor_storage)
+        switch (tensor_storage)
         {
             case GpuSamplerTensorStorage::BufferUint8Ptr:
                 return std::make_unique<ClLoadStoreBufferHelperWriter>(x, mapper, type);
@@ -3352,14 +3386,14 @@ public:
 
         IVectorTile *x = _data->tiles[name];
 
-        for(auto &t : x->underlying_source_variables())
+        for (auto &t : x->underlying_source_variables())
         {
             _data->code += t.type.str + " " + t.str + ";\n";
         }
     }
 
-    void declare_const_tile(const std::string &name, const std::vector<std::vector<std::string>> &in,
-                            DataType dt) override
+    void
+    declare_const_tile(const std::string &name, const std::vector<std::vector<std::string>> &in, DataType dt) override
     {
         assert(_data->tiles[name] == nullptr);
         _data->tiles.insert(name, in, dt);
@@ -3387,7 +3421,8 @@ public:
     {
         assert(dst_var.type() == OperandType::Tile);
         assert(_data->tiles.has_tile(dst_var.value()));
-        assert(_data->tiles[dst_var.value()]->format().w == 1 && _data->tiles[dst_var.value()]->format().h == 1); // It must be a scalar variable
+        assert(_data->tiles[dst_var.value()]->format().w == 1 &&
+               _data->tiles[dst_var.value()]->format().h == 1); // It must be a scalar variable
 
         auto var = _data->tiles[dst_var.value()];
 
@@ -3397,8 +3432,10 @@ public:
         _data->code += ");\n";
     };
 
-    void op_get_global_coord(const Operand &o_dst, const Operand &o_step, const TensorOperand &o_tensor,
-                             int32_t dim) override
+    void op_get_global_coord(const Operand       &o_dst,
+                             const Operand       &o_step,
+                             const TensorOperand &o_tensor,
+                             int32_t              dim) override
     {
         OperandUnpacker operands(_data->tiles, _data->arguments);
         auto            dst  = operands.unpack(o_dst);
@@ -3412,17 +3449,17 @@ public:
 
         GpuTensor3dMapper mapper(tensor, gpu_sampler);
 
-        switch(dim)
+        switch (dim)
         {
             case 0:
-                if(mapper.is_one_component_x())
+                if (mapper.is_one_component_x())
                 {
                     _data->code += dst->scalar(0, 0).str;
                     _data->code += " = 0;\n";
                 }
                 else
                 {
-                    if(mapper.gpu_sampler().address_mode_x == TensorSamplerAddressModeX::OverlappingMin)
+                    if (mapper.gpu_sampler().address_mode_x == TensorSamplerAddressModeX::OverlappingMin)
                     {
                         // Validation: Check: fixed tensor shape
                         // TO BE CHANGED
@@ -3441,14 +3478,14 @@ public:
                 }
                 break;
             case 1:
-                if(mapper.is_one_component_y())
+                if (mapper.is_one_component_y())
                 {
                     _data->code += dst->scalar(0, 0).str;
                     _data->code += " = 0;\n";
                 }
                 else
                 {
-                    if(mapper.gpu_sampler().address_mode_y == TensorSamplerAddressModeY::OverlappingMin)
+                    if (mapper.gpu_sampler().address_mode_y == TensorSamplerAddressModeY::OverlappingMin)
                     {
                     }
                     else
@@ -3461,7 +3498,7 @@ public:
                 }
                 break;
             case 2:
-                if(mapper.is_one_component_z())
+                if (mapper.is_one_component_z())
                 {
                     _data->code += dst->scalar(0, 0).str;
                     _data->code += " = 0;\n";
@@ -3490,7 +3527,7 @@ public:
 
         GpuTensor3dMapper mapper(tensor, gpu_sampler);
 
-        if(mapper.is_one_component_batch())
+        if (mapper.is_one_component_batch())
         {
             _data->code += dst->scalar(0, 0).str;
             _data->code += " = 0;\n";
@@ -3506,7 +3543,8 @@ public:
     {
         assert(dst_var.type() == OperandType::Tile);
         assert(_data->tiles.has_tile(dst_var.value()));
-        assert(_data->tiles[dst_var.value()]->format().w == 1 && _data->tiles[dst_var.value()]->format().h == 1); // It must be a scalar variable
+        assert(_data->tiles[dst_var.value()]->format().w == 1 &&
+               _data->tiles[dst_var.value()]->format().h == 1); // It must be a scalar variable
 
         auto var = _data->tiles[dst_var.value()];
 
@@ -3532,7 +3570,7 @@ public:
         const std::string src_prefix = broadcast_src_x ? "(" + dt + ")" : "";
 
         // Broadcasting on Y is automatic
-        for(int32_t y = 0; y < dst_h; ++y)
+        for (int32_t y = 0; y < dst_h; ++y)
         {
             _data->code += dst->vector(y).str;
             _data->code += " = ";
@@ -3542,7 +3580,9 @@ public:
         }
     }
 
-    void op_binary_expression(const Operand &dst_name, const Operand &lhs_name, BinaryOp op,
+    void op_binary_expression(const Operand &dst_name,
+                              const Operand &lhs_name,
+                              BinaryOp       op,
                               const Operand &rhs_name) override
     {
         OperandUnpacker    operands(_data->tiles, _data->arguments);
@@ -3556,14 +3596,14 @@ public:
         const int32_t lhs_w = lhs->format().w;
         const int32_t rhs_w = rhs->format().w;
 
-        if(op == BinaryOp::MatMul_Nt_T)
+        if (op == BinaryOp::MatMul_Nt_T)
         {
             assert((dst->format().dt == DataType::Fp32) || (dst->format().dt == DataType::Fp16));
-            for(int32_t y = 0; y < dst_h; ++y)
+            for (int32_t y = 0; y < dst_h; ++y)
             {
-                for(int32_t x = 0; x < dst_w; ++x)
+                for (int32_t x = 0; x < dst_w; ++x)
                 {
-                    for(int32_t k = 0; k < lhs_w; ++k)
+                    for (int32_t k = 0; k < lhs_w; ++k)
                     {
                         _data->code += dst->scalar(x, y).str;
                         _data->code += " = fma(";
@@ -3583,12 +3623,14 @@ public:
         const bool broadcast_lhs_x = dst_w != 1 && lhs_w == 1;
         const bool broadcast_rhs_x = dst_w != 1 && rhs_w == 1;
 
-        const std::string lhs_prefix = broadcast_lhs_x ? "(" + dst->underlying_source_variables()[0].type.str + ")" : "";
-        const std::string rhs_prefix = broadcast_rhs_x ? "(" + dst->underlying_source_variables()[0].type.str + ")" : "";
-        const std::string op_str     = to_string(op);
+        const std::string lhs_prefix =
+            broadcast_lhs_x ? "(" + dst->underlying_source_variables()[0].type.str + ")" : "";
+        const std::string rhs_prefix =
+            broadcast_rhs_x ? "(" + dst->underlying_source_variables()[0].type.str + ")" : "";
+        const std::string op_str = to_string(op);
 
         // Broadcasting on Y is automatic
-        for(int32_t y = 0; y < dst_h; ++y)
+        for (int32_t y = 0; y < dst_h; ++y)
         {
             _data->code += dst->vector(y).str;
             _data->code += " = ";
@@ -3607,13 +3649,13 @@ public:
         const IVectorTile *src = operands.unpack(o_src);
         const IVectorTile *dst = operands.unpack(o_dst);
         // const int32_t dst_w  = dst->format().w;
-        const int32_t     dst_h = dst->format().h;
-        const std::string dt    = dst->underlying_source_variables()[0].type.str;
-        const bool is_float     = (dst->format().dt == DataType::Fp32) || (dst->format().dt == DataType::Fp16);
-        const std::string sat   = ((policy == ConvertPolicy::Saturate && !is_float) ? "_sat" : "");
+        const int32_t     dst_h    = dst->format().h;
+        const std::string dt       = dst->underlying_source_variables()[0].type.str;
+        const bool        is_float = (dst->format().dt == DataType::Fp32) || (dst->format().dt == DataType::Fp16);
+        const std::string sat      = ((policy == ConvertPolicy::Saturate && !is_float) ? "_sat" : "");
 
         // Broadcasting on Y is automatic
-        for(int32_t y = 0; y < dst_h; ++y)
+        for (int32_t y = 0; y < dst_h; ++y)
         {
             _data->code += dst->vector(y).str;
             _data->code += " = convert_" + dt + sat + "(";
@@ -3638,7 +3680,7 @@ public:
         const std::string src_prefix = broadcast_src_x ? "(" + dt + ")" : "";
 
         // Broadcasting on Y is automatic
-        for(int32_t y = 0; y < dst_h; ++y)
+        for (int32_t y = 0; y < dst_h; ++y)
         {
             _data->code += dst->vector(y).str;
             _data->code += " = ";
@@ -3647,8 +3689,7 @@ public:
         }
     }
 
-    void
-    op_unary_elementwise_function(const Operand &dst_name, UnaryFunction func, const Operand &src_name) override
+    void op_unary_elementwise_function(const Operand &dst_name, UnaryFunction func, const Operand &src_name) override
     {
         OperandUnpacker    operands(_data->tiles, _data->arguments);
         const IVectorTile *src = operands.unpack(src_name);
@@ -3665,12 +3706,12 @@ public:
         const std::string src_prefix = "(" + dt + ")";
 
         // Broadcasting on Y is automatic
-        for(int32_t y = 0; y < dst_h; ++y)
+        for (int32_t y = 0; y < dst_h; ++y)
         {
             _data->code += dst->vector(y).str;
             _data->code += " = ";
 
-            switch(func)
+            switch (func)
             {
                 case UnaryFunction::Exp:
                     _data->code += "exp(";
@@ -3708,7 +3749,10 @@ public:
         }
     }
 
-    void op_binary_elementwise_function(const Operand &dst_name, BinaryFunction func, const Operand &first_name, const Operand &second_name) override
+    void op_binary_elementwise_function(const Operand &dst_name,
+                                        BinaryFunction func,
+                                        const Operand &first_name,
+                                        const Operand &second_name) override
     {
         OperandUnpacker    operands(_data->tiles, _data->arguments);
         const IVectorTile *first  = operands.unpack(first_name);
@@ -3726,12 +3770,12 @@ public:
         const bool is_float = (datatype.dt == DataType::Fp32 || datatype.dt == DataType::Fp16);
 
         // Broadcasting on Y is automatic
-        for(int32_t y = 0; y < dst_h; ++y)
+        for (int32_t y = 0; y < dst_h; ++y)
         {
             _data->code += dst->vector(y).str;
             _data->code += " = ";
 
-            switch(func)
+            switch (func)
             {
                 case BinaryFunction::Min:
                     _data->code += is_float ? "fmin(" : "min(";
@@ -3750,7 +3794,11 @@ public:
         }
     }
 
-    void op_ternary_elementwise_function(const Operand &dst_name, TernaryFunction func, const Operand &first_name, const Operand &second_name, const Operand &third_name) override
+    void op_ternary_elementwise_function(const Operand  &dst_name,
+                                         TernaryFunction func,
+                                         const Operand  &first_name,
+                                         const Operand  &second_name,
+                                         const Operand  &third_name) override
     {
         OperandUnpacker    operands(_data->tiles, _data->arguments);
         const IVectorTile *first  = operands.unpack(first_name);
@@ -3758,8 +3806,8 @@ public:
         const IVectorTile *third  = operands.unpack(third_name);
         const IVectorTile *dst    = operands.unpack(dst_name);
 
-        const int32_t     dst_h    = dst->format().h;
-        const std::string dt       = dst->underlying_source_variables()[0].type.str;
+        const int32_t     dst_h = dst->format().h;
+        const std::string dt    = dst->underlying_source_variables()[0].type.str;
 
         // Always perform an explicit cast. See similar comments in op_unary_elementwise_function
         const std::string first_prefix  = "(" + dt + ")";
@@ -3767,12 +3815,12 @@ public:
         const std::string third_prefix  = "(" + dt + ")";
 
         // Broadcasting on Y is automatic
-        for(int32_t y = 0; y < dst_h; ++y)
+        for (int32_t y = 0; y < dst_h; ++y)
         {
             _data->code += dst->vector(y).str;
             _data->code += " = ";
 
-            switch(func)
+            switch (func)
             {
                 case TernaryFunction::Select:
                     _data->code += "select(";
@@ -3822,7 +3870,12 @@ public:
         _data->code += "else\n";
     }
 
-    void op_for_loop_header(const Operand& var_name, BinaryOp cond_op, const Operand& cond_value_name, const Operand &update_var_name, AssignmentOp update_op, const Operand& update_value_name) override
+    void op_for_loop_header(const Operand &var_name,
+                            BinaryOp       cond_op,
+                            const Operand &cond_value_name,
+                            const Operand &update_var_name,
+                            AssignmentOp   update_op,
+                            const Operand &update_value_name) override
     {
         OperandUnpacker    operands(_data->tiles, _data->arguments);
         const IVectorTile *var          = operands.unpack(var_name);
@@ -3850,9 +3903,13 @@ public:
         _data->code += "\n";
     }
 
-    void op_load_immediate(const TensorOperand &o_tensor, const Operand &o_dst, const Operand &o_x,
-                           const Operand &o_y, const Operand &o_z, const Operand &o_batch_idx,
-                           const Operand &dilation_y) override
+    void op_load_immediate(const TensorOperand &o_tensor,
+                           const Operand       &o_dst,
+                           const Operand       &o_x,
+                           const Operand       &o_y,
+                           const Operand       &o_z,
+                           const Operand       &o_batch_idx,
+                           const Operand       &dilation_y) override
     {
         OperandUnpacker operands(_data->tiles, _data->arguments);
 
@@ -3875,10 +3932,10 @@ public:
         // Initialize the constant part
         load_writer->initialize(dst, x, z, b);
 
-        for(int i = 0; i < dst->format().h; ++i)
+        for (int i = 0; i < dst->format().h; ++i)
         {
             std::string coord_y = y->scalar(0, 0).str + " + " + std::to_string(i);
-            if(dil_y->scalar(0, 0).str != "1")
+            if (dil_y->scalar(0, 0).str != "1")
             {
                 coord_y += " * " + dil_y->scalar(0, 0).str;
             }
@@ -3888,9 +3945,12 @@ public:
         load_writer->finalize();
     }
 
-    void op_load_indirect(const TensorOperand &o_tensor, const Operand &o_dst, const Operand &o_x,
-                          const Operand &o_indirect_h, const Operand &o_z,
-                          const Operand &o_batch_idx) override
+    void op_load_indirect(const TensorOperand &o_tensor,
+                          const Operand       &o_dst,
+                          const Operand       &o_x,
+                          const Operand       &o_indirect_h,
+                          const Operand       &o_z,
+                          const Operand       &o_batch_idx) override
     {
         OperandUnpacker operands(_data->tiles, _data->arguments);
 
@@ -3912,7 +3972,7 @@ public:
         // Initialize the constant part
         load_writer->initialize(dst, x, z, b);
 
-        for(int i = 0; i < dst->format().h; ++i)
+        for (int i = 0; i < dst->format().h; ++i)
         {
             load_writer->write(std::make_pair(i, y_ind->scalar(0, i).str));
         }
@@ -3920,9 +3980,12 @@ public:
         load_writer->finalize();
     }
 
-    void op_store_immediate(const TensorOperand &tensor_name, const Operand &src_name, const Operand &x_name,
-                            const Operand &y_name, const Operand &z_name,
-                            const Operand &batch_index_name) override
+    void op_store_immediate(const TensorOperand &tensor_name,
+                            const Operand       &src_name,
+                            const Operand       &x_name,
+                            const Operand       &y_name,
+                            const Operand       &z_name,
+                            const Operand       &batch_index_name) override
     {
         OperandUnpacker operands(_data->tiles, _data->arguments);
 
@@ -3946,7 +4009,7 @@ public:
 
         int32_t tile_h = src->format().h;
 
-        for(int m0 = tile_h - 1; m0 >= 0; m0--)
+        for (int m0 = tile_h - 1; m0 >= 0; m0--)
         {
             store_writer->write(std::make_pair(m0, y->scalar(0, 0).str + " + " + std::to_string(m0)));
         }
@@ -3959,8 +4022,12 @@ public:
         _data->code += "return;\n";
     }
 
-    void util_get_indirect_buffer(const Operand &o_dst, const TensorOperand &o_tensor, const Operand &o_x,
-                                  const Operand &o_y, const Operand &o_x_off, const Operand &o_y_off) override
+    void util_get_indirect_buffer(const Operand       &o_dst,
+                                  const TensorOperand &o_tensor,
+                                  const Operand       &o_x,
+                                  const Operand       &o_y,
+                                  const Operand       &o_x_off,
+                                  const Operand       &o_y_off) override
     {
         OperandUnpacker    operands(_data->tiles, _data->arguments);
         const IVectorTile *dst   = operands.unpack(o_dst);
@@ -4002,7 +4069,7 @@ public:
         declare_tile("_y_s", TileInfo(DataType::Int32));
         auto x_s = operands.unpack(Operand("_x_s"));
         auto y_s = operands.unpack(Operand("_y_s"));
-        for(int i = 0; i < dst->format().h; ++i)
+        for (int i = 0; i < dst->format().h; ++i)
         {
             // x_s = (xi_0 + x_k);
             // y_s = (yi_0 + y_k);
@@ -4060,8 +4127,8 @@ public:
     }
 
 private:
-    GpuKernelWriterDataHolder *_data{ nullptr };
-    GpuKernelWriterAttribute  *_attr{ nullptr };
+    GpuKernelWriterDataHolder *_data{nullptr};
+    GpuKernelWriterAttribute  *_attr{nullptr};
 };
 
 /** IGpuKernelWriter factory class */
@@ -4074,10 +4141,9 @@ public:
      *
      * @return IGpuKernelWriter
      */
-    static std::unique_ptr<IGpuKernelWriter>
-    create(GpuKernelWriterAttribute *attr, GpuKernelWriterDataHolder *x)
+    static std::unique_ptr<IGpuKernelWriter> create(GpuKernelWriterAttribute *attr, GpuKernelWriterDataHolder *x)
     {
-        switch(x->programming_language())
+        switch (x->programming_language())
         {
             case GpuTargetLanguage::OpenCL:
                 return std::make_unique<ClKernelWriter>(attr, x);
@@ -4094,9 +4160,9 @@ adjust_step(TensorSamplerFormat tensor_format, int32_t step, const TensorInfo *t
 {
     auto tensor = tensor_info_id->shape;
 
-    int32_t dim[3] = { 0 };
+    int32_t dim[3] = {0};
 
-    switch(tensor_format)
+    switch (tensor_format)
     {
         case TensorSamplerFormat::C_W_H:
             dim[0] = tensor[0];
diff --git a/compute_kernel_writer/prototype/src/TensorOperand.cpp b/compute_kernel_writer/prototype/src/TensorOperand.cpp
index c6725d3b26..d1aefbbb71 100644
--- a/compute_kernel_writer/prototype/src/TensorOperand.cpp
+++ b/compute_kernel_writer/prototype/src/TensorOperand.cpp
@@ -23,10 +23,12 @@
  */
 
 #include "ckw/TensorOperand.h"
+
 #include "ckw/Error.h"
 #include "ckw/Kernel.h"
 #include "ckw/TensorInfo.h"
 #include "ckw/TileOperand.h"
+
 #include "src/Prototype.h"
 
 namespace ckw
@@ -35,9 +37,11 @@ namespace ckw
 namespace
 {
 
-TensorComponentOperand &get_or_create_component(TensorOperand &tensor, std::unique_ptr<TensorComponentOperand> &ptr, TensorComponentType component)
+TensorComponentOperand &get_or_create_component(TensorOperand                           &tensor,
+                                                std::unique_ptr<TensorComponentOperand> &ptr,
+                                                TensorComponentType                      component)
 {
-    if(ptr == nullptr)
+    if (ptr == nullptr)
     {
         ptr = std::make_unique<TensorComponentOperand>(tensor, component);
     }
@@ -59,7 +63,7 @@ TensorOperand::TensorOperand(const std::string &name, const TensorInfo &info, Te
 prototype::Operand TensorOperand::create_impl_operand(prototype::IGpuKernelWriter *writer) const
 {
     CKW_UNUSED(writer);
-    return { name() };
+    return {name()};
 }
 
 const TensorInfo &TensorOperand::info() const
@@ -206,9 +210,9 @@ TensorComponentType TensorComponentOperand::component_type() const
 prototype::Operand TensorComponentOperand::create_impl_operand(prototype::IGpuKernelWriter *writer) const
 {
     CKW_UNUSED(writer);
-    prototype::OperandType type{ prototype::OperandType::Unknown };
+    prototype::OperandType type{prototype::OperandType::Unknown};
 
-    switch(_component)
+    switch (_component)
     {
         case TensorComponentType::OffsetFirstElement:
             type = prototype::OperandType::TensorDataOffset;
diff --git a/compute_kernel_writer/prototype/src/TensorTileSampler.cpp b/compute_kernel_writer/prototype/src/TensorTileSampler.cpp
index 28e54df3a5..bf9f946ce8 100644
--- a/compute_kernel_writer/prototype/src/TensorTileSampler.cpp
+++ b/compute_kernel_writer/prototype/src/TensorTileSampler.cpp
@@ -23,6 +23,7 @@
  */
 
 #include "ckw/TensorTileSampler.h"
+
 #include "ckw/TileOperand.h"
 #include "ckw/types/TensorSamplerTypes.h"
 
@@ -33,24 +34,47 @@ TensorTileSampler::TensorTileSampler()
 {
 }
 
-TensorTileSampler::TensorTileSampler(
-    TileOperand &x, TileOperand &y, TileOperand &z, TileOperand &b,
-    TensorSamplerFormat       format,
-    TensorSamplerAddressModeX address_mode_x,
-    TensorSamplerAddressModeY address_mode_y,
-    TensorSamplerAddressModeZ address_mode_z)
-    : _x(&x), _y(&y), _z(&z), _b(&b), _height(0), _width(0), _format(format), _address_mode_x(address_mode_x), _address_mode_y(address_mode_y), _address_mode_z(address_mode_z)
-{
-}
-
-TensorTileSampler::TensorTileSampler(
-    TileOperand &x, TileOperand &y, TileOperand &z, TileOperand &b,
-    int32_t height, int32_t width,
-    TensorSamplerFormat       format,
-    TensorSamplerAddressModeX address_mode_x,
-    TensorSamplerAddressModeY address_mode_y,
-    TensorSamplerAddressModeZ address_mode_z)
-    : _x(&x), _y(&y), _z(&z), _b(&b), _height(height), _width(width), _format(format), _address_mode_x(address_mode_x), _address_mode_y(address_mode_y), _address_mode_z(address_mode_z)
+TensorTileSampler::TensorTileSampler(TileOperand              &x,
+                                     TileOperand              &y,
+                                     TileOperand              &z,
+                                     TileOperand              &b,
+                                     TensorSamplerFormat       format,
+                                     TensorSamplerAddressModeX address_mode_x,
+                                     TensorSamplerAddressModeY address_mode_y,
+                                     TensorSamplerAddressModeZ address_mode_z)
+    : _x(&x),
+      _y(&y),
+      _z(&z),
+      _b(&b),
+      _height(0),
+      _width(0),
+      _format(format),
+      _address_mode_x(address_mode_x),
+      _address_mode_y(address_mode_y),
+      _address_mode_z(address_mode_z)
+{
+}
+
+TensorTileSampler::TensorTileSampler(TileOperand              &x,
+                                     TileOperand              &y,
+                                     TileOperand              &z,
+                                     TileOperand              &b,
+                                     int32_t                   height,
+                                     int32_t                   width,
+                                     TensorSamplerFormat       format,
+                                     TensorSamplerAddressModeX address_mode_x,
+                                     TensorSamplerAddressModeY address_mode_y,
+                                     TensorSamplerAddressModeZ address_mode_z)
+    : _x(&x),
+      _y(&y),
+      _z(&z),
+      _b(&b),
+      _height(height),
+      _width(width),
+      _format(format),
+      _address_mode_x(address_mode_x),
+      _address_mode_y(address_mode_y),
+      _address_mode_z(address_mode_z)
 {
 }
 
diff --git a/compute_kernel_writer/prototype/src/TileInfo.cpp b/compute_kernel_writer/prototype/src/TileInfo.cpp
index 66d8cb1620..273266eedc 100644
--- a/compute_kernel_writer/prototype/src/TileInfo.cpp
+++ b/compute_kernel_writer/prototype/src/TileInfo.cpp
@@ -26,18 +26,15 @@
 
 namespace ckw
 {
-TileInfo::TileInfo(DataType dt)
-    : _dt(dt), _shape({ { 1, 1 } })
+TileInfo::TileInfo(DataType dt) : _dt(dt), _shape({{1, 1}})
 {
 }
 
-TileInfo::TileInfo(DataType dt, int32_t w)
-    : _dt(dt), _shape({ { w, 1 } })
+TileInfo::TileInfo(DataType dt, int32_t w) : _dt(dt), _shape({{w, 1}})
 {
 }
 
-TileInfo::TileInfo(DataType dt, int32_t h, int32_t w)
-    : _dt(dt), _shape({ { w, h } })
+TileInfo::TileInfo(DataType dt, int32_t h, int32_t w) : _dt(dt), _shape({{w, h}})
 {
 }
 
diff --git a/compute_kernel_writer/prototype/src/TileOperand.cpp b/compute_kernel_writer/prototype/src/TileOperand.cpp
index 0eb2ca6a64..e09c833d96 100644
--- a/compute_kernel_writer/prototype/src/TileOperand.cpp
+++ b/compute_kernel_writer/prototype/src/TileOperand.cpp
@@ -23,47 +23,43 @@
  */
 
 #include "ckw/TileOperand.h"
+
 #include "ckw/Error.h"
+
 #include "src/Prototype.h"
 
 namespace ckw
 {
 
 TileOperand::TileOperand(const std::string &name, const TileInfo &info)
-    : OperandBase(name),
-      _info(info),
-      _value{ std::vector<std::string>{ "0" } },
-      _constant(false)
+    : OperandBase(name), _info(info), _value{std::vector<std::string>{"0"}}, _constant(false)
 {
 }
 
 TileOperand::TileOperand(const std::string &name, DataType data_type)
-    : OperandBase(name),
-      _info(TileInfo{ data_type }),
-      _value{ std::vector<std::string>{ "0" } },
-      _constant(false)
+    : OperandBase(name), _info(TileInfo{data_type}), _value{std::vector<std::string>{"0"}}, _constant(false)
 {
 }
 
 TileOperand::TileOperand(const std::string &name, int32_t value)
     : OperandBase(name),
-      _info(TileInfo{ DataType::Int32 }),
-      _value{ std::vector<std::string>{ std::to_string(value) } },
+      _info(TileInfo{DataType::Int32}),
+      _value{std::vector<std::string>{std::to_string(value)}},
       _constant(true)
 {
 }
 
 TileOperand::TileOperand(const std::string &name, float value)
     : OperandBase(name),
-      _info(TileInfo{ DataType::Fp32 }),
-      _value{ std::vector<std::string>{ std::to_string(value) } },
+      _info(TileInfo{DataType::Fp32}),
+      _value{std::vector<std::string>{std::to_string(value)}},
       _constant(true)
 {
 }
 
 TileOperand::TileOperand(const std::string &name, const TileContainer &vals, DataType dt)
     : OperandBase(name),
-      _info(TileInfo{ dt, static_cast<int32_t>(vals.size()), static_cast<int32_t>(vals[0].size()) }),
+      _info(TileInfo{dt, static_cast<int32_t>(vals.size()), static_cast<int32_t>(vals[0].size())}),
       _value(vals),
       _constant(true)
 {
@@ -73,11 +69,11 @@ prototype::Operand TileOperand::create_impl_operand(prototype::IGpuKernelWriter
 {
     CKW_UNUSED(writer);
 
-    if(_constant)
+    if (_constant)
     {
-        if(is_scalar())
+        if (is_scalar())
         {
-            switch(_info.data_type())
+            switch (_info.data_type())
             {
                 case DataType::Int32:
                     return prototype::Operand(_value[0][0], prototype::OperandType::ScalarInt32);
diff --git a/compute_kernel_writer/src/Error.cpp b/compute_kernel_writer/src/Error.cpp
index c5dae2eb75..e1e4bffcec 100644
--- a/compute_kernel_writer/src/Error.cpp
+++ b/compute_kernel_writer/src/Error.cpp
@@ -28,8 +28,8 @@
 
 namespace ckw
 {
-std::string create_error_msg(const std::string &file, const std::string &func, const std::string &line,
-                             const std::string &msg)
+std::string
+create_error_msg(const std::string &file, const std::string &func, const std::string &line, const std::string &msg)
 {
     std::string err;
     err += "[COMPUTE_KERNEL_WRITER][ERROR]:";
@@ -38,4 +38,4 @@ std::string create_error_msg(const std::string &file, const std::string &func, c
     err += " " + msg;
     return err;
 }
-} // namespace ckw
-\ No newline at end of file
+} // namespace ckw
diff --git a/compute_kernel_writer/src/Helpers.cpp b/compute_kernel_writer/src/Helpers.cpp
index 799f79a187..82d4c4e917 100644
--- a/compute_kernel_writer/src/Helpers.cpp
+++ b/compute_kernel_writer/src/Helpers.cpp
@@ -22,15 +22,15 @@
  * SOFTWARE.
  */
 
-#include "ckw/Error.h"
-
 #include "src/Helpers.h"
 
+#include "ckw/Error.h"
+
 namespace ckw
 {
 std::string dec_to_hex_as_string(int32_t dec)
 {
-    switch(dec)
+    switch (dec)
     {
         case 0:
         case 1:
diff --git a/compute_kernel_writer/src/ITensorArgument.h b/compute_kernel_writer/src/ITensorArgument.h
index 838bd40f85..ece45a4dc4 100644
--- a/compute_kernel_writer/src/ITensorArgument.h
+++ b/compute_kernel_writer/src/ITensorArgument.h
@@ -28,6 +28,7 @@
 #include "ckw/TensorInfo.h"
 #include "ckw/types/TensorComponentType.h"
 #include "ckw/types/TensorStorageType.h"
+
 #include "src/ITile.h"
 
 #include <string>
@@ -41,8 +42,8 @@ class ITensorComponent;
 /** Tensor storage variable */
 struct TensorStorageVariable
 {
-    std::string       val{ "" };                          /** Tensor storage as a string */
-    TensorStorageType type{ TensorStorageType::Unknown }; /** Tensor storage type */
+    std::string       val{""};                          /** Tensor storage as a string */
+    TensorStorageType type{TensorStorageType::Unknown}; /** Tensor storage type */
 };
 
 /** Tensor argument base class.
@@ -83,8 +84,8 @@ public:
     }
 
 protected:
-    TensorInfo  _info{};         // Tensor info
-    std::string _basename{ "" }; // Tensor name
+    TensorInfo  _info{};       // Tensor info
+    std::string _basename{""}; // Tensor name
 };
 
 /** Tensor component argument base class */
diff --git a/compute_kernel_writer/src/ITensorComponent.h b/compute_kernel_writer/src/ITensorComponent.h
index e2775b62b0..f9c9d8fd81 100644
--- a/compute_kernel_writer/src/ITensorComponent.h
+++ b/compute_kernel_writer/src/ITensorComponent.h
@@ -26,6 +26,7 @@
 #define CKW_SRC_ITENSORCOMPONENT_H
 
 #include "ckw/types/TensorComponentType.h"
+
 #include "src/ITile.h"
 
 namespace ckw
diff --git a/compute_kernel_writer/src/ITile.h b/compute_kernel_writer/src/ITile.h
index 73b7315fb5..8eaac5ac12 100644
--- a/compute_kernel_writer/src/ITile.h
+++ b/compute_kernel_writer/src/ITile.h
@@ -37,15 +37,15 @@ using TileContainer = std::vector<std::vector<std::string>>;
 /** Tile descriptor which reports the underlying datatype and vector length */
 struct TileVariableDescriptor
 {
-    DataType dt{ DataType::Unknown }; /** Data type  */
-    int32_t  len{ 1 };                /** Number of elements in a single variable. For example, 1 for scalar  */
+    DataType dt{DataType::Unknown}; /** Data type  */
+    int32_t  len{1};                /** Number of elements in a single variable. For example, 1 for scalar  */
 };
 
 /** Tile variable */
 struct TileVariable
 {
-    std::string            str{ "" }; /** Tile variable as a string */
-    TileVariableDescriptor desc{};    /** Tile value descriptor which reports the datatype and vector length */
+    std::string            str{""}; /** Tile variable as a string */
+    TileVariableDescriptor desc{};  /** Tile value descriptor which reports the datatype and vector length */
 };
 
 /** Interface to provide support for scalar access for a Tile.
diff --git a/compute_kernel_writer/src/Kernel.cpp b/compute_kernel_writer/src/Kernel.cpp
index bfb0f46300..12389b3816 100644
--- a/compute_kernel_writer/src/Kernel.cpp
+++ b/compute_kernel_writer/src/Kernel.cpp
@@ -23,6 +23,7 @@
  */
 
 #include "ckw/Kernel.h"
+
 #include "ckw/types/TargetLanguage.h"
 
 namespace ckw
diff --git a/compute_kernel_writer/src/KernelArgument.cpp b/compute_kernel_writer/src/KernelArgument.cpp
index a31ca1757b..a640d36507 100644
--- a/compute_kernel_writer/src/KernelArgument.cpp
+++ b/compute_kernel_writer/src/KernelArgument.cpp
@@ -23,6 +23,7 @@
  */
 
 #include "ckw/KernelArgument.h"
+
 #include "ckw/Error.h"
 
 namespace ckw
diff --git a/compute_kernel_writer/src/KernelWriter.cpp b/compute_kernel_writer/src/KernelWriter.cpp
index 0bea1200b7..a478231c09 100644
--- a/compute_kernel_writer/src/KernelWriter.cpp
+++ b/compute_kernel_writer/src/KernelWriter.cpp
@@ -23,14 +23,16 @@
  */
 
 #include "ckw/KernelWriter.h"
+
 #include "ckw/Error.h"
 #include "ckw/TileOperand.h"
 #include "ckw/types/TargetArchitecture.h"
 #include "ckw/types/TargetLanguage.h"
-#include "src/TileView.h"
+
 #include "src/cl/CLKernelWriter.h"
 #include "src/cl/CLTensorArgument.h"
 #include "src/cl/CLTile.h"
+#include "src/TileView.h"
 
 #include <tuple>
 
@@ -42,7 +44,7 @@ KernelWriter::~KernelWriter() = default;
 std::unique_ptr<KernelWriter> KernelWriter::create_instance(TargetArchitecture architecture, TargetLanguage language)
 {
     CKW_UNUSED(architecture);
-    switch(language)
+    switch (language)
     {
         case TargetLanguage::OpenCL:
             // Currently this is the oldest and the only supported GPU architecture.
@@ -95,7 +97,7 @@ TileOperand KernelWriter::create_tile_operand(ITile &tile)
 
 std::tuple<ITile &, TileArea> KernelWriter::get_tile(const TileOperand &operand)
 {
-    return { *operand._tile, { operand._row_start, operand._row_end, operand._col_start, operand._col_end } };
+    return {*operand._tile, {operand._row_start, operand._row_end, operand._col_start, operand._col_end}};
 }
 
 TensorOperand KernelWriter::create_tensor_operand(ITensor &tensor)
diff --git a/compute_kernel_writer/src/Tensor3dMapper.cpp b/compute_kernel_writer/src/Tensor3dMapper.cpp
index 7384b924da..acef6412a4 100644
--- a/compute_kernel_writer/src/Tensor3dMapper.cpp
+++ b/compute_kernel_writer/src/Tensor3dMapper.cpp
@@ -26,19 +26,19 @@
 
 #include "ckw/Error.h"
 #include "ckw/types/TensorSamplerTypes.h"
+
 #include "src/ITensor.h"
 #include "src/ITile.h"
 
 namespace ckw
 {
-Tensor3dMapper::Tensor3dMapper(ITensor *tensor, TensorSamplerFormat format)
-        : _tensor(tensor), _format(format)
+Tensor3dMapper::Tensor3dMapper(ITensor *tensor, TensorSamplerFormat format) : _tensor(tensor), _format(format)
 {
 }
 
 TileVariable Tensor3dMapper::dim_x() const
 {
-    switch(_format)
+    switch (_format)
     {
         case TensorSamplerFormat::Dim0_Dim1xDim2_1:
         case TensorSamplerFormat::Dim0_Dim1_Dim2:
@@ -51,7 +51,7 @@ TileVariable Tensor3dMapper::dim_x() const
 
 TileVariable Tensor3dMapper::dim_y() const
 {
-    switch(_format)
+    switch (_format)
     {
         case TensorSamplerFormat::Dim0_Dim1xDim2_1:
             return _tensor->component(TensorComponentType::Dim1xDim2).scalar(0, 0);
@@ -67,10 +67,10 @@ TileVariable Tensor3dMapper::dim_z() const
 {
     TileVariable dim_one;
 
-    switch(_format)
+    switch (_format)
     {
         case TensorSamplerFormat::Dim0_Dim1xDim2_1:
-            dim_one = _tensor->component(TensorComponentType::Dim3).scalar(0, 0);
+            dim_one     = _tensor->component(TensorComponentType::Dim3).scalar(0, 0);
             dim_one.str = "1";
             return dim_one;
         case TensorSamplerFormat::Dim0_Dim1_Dim2:
@@ -85,7 +85,7 @@ TileVariable Tensor3dMapper::dim_batch() const
 {
     TileVariable dim_one;
 
-    switch(_format)
+    switch (_format)
     {
         case TensorSamplerFormat::Dim0_Dim1xDim2_1:
         case TensorSamplerFormat::Dim0_Dim1_Dim2:
@@ -98,7 +98,7 @@ TileVariable Tensor3dMapper::dim_batch() const
 
 TileVariable Tensor3dMapper::stride_x() const
 {
-    switch(_format)
+    switch (_format)
     {
         case TensorSamplerFormat::Dim0_Dim1xDim2_1:
         case TensorSamplerFormat::Dim0_Dim1_Dim2:
@@ -111,7 +111,7 @@ TileVariable Tensor3dMapper::stride_x() const
 
 TileVariable Tensor3dMapper::stride_y() const
 {
-    switch(_format)
+    switch (_format)
     {
         case TensorSamplerFormat::Dim0_Dim1xDim2_1:
         case TensorSamplerFormat::Dim0_Dim1_Dim2:
@@ -126,10 +126,10 @@ TileVariable Tensor3dMapper::stride_z() const
 {
     TileVariable stride_zero;
 
-    switch(_format)
+    switch (_format)
     {
         case TensorSamplerFormat::Dim0_Dim1xDim2_1:
-            stride_zero = _tensor->component(TensorComponentType::Stride3).scalar(0, 0);
+            stride_zero     = _tensor->component(TensorComponentType::Stride3).scalar(0, 0);
             stride_zero.str = "0";
             return stride_zero;
         case TensorSamplerFormat::Dim0_Dim1_Dim2:
@@ -142,7 +142,7 @@ TileVariable Tensor3dMapper::stride_z() const
 
 TileVariable Tensor3dMapper::stride_batch() const
 {
-    switch(_format)
+    switch (_format)
     {
         case TensorSamplerFormat::Dim0_Dim1xDim2_1:
         case TensorSamplerFormat::Dim0_Dim1_Dim2:
@@ -152,4 +152,4 @@ TileVariable Tensor3dMapper::stride_batch() const
             return _tensor->component(TensorComponentType::Unknown).scalar(0, 0);
     }
 }
-} // namespace ckw
-\ No newline at end of file
+} // namespace ckw
diff --git a/compute_kernel_writer/src/Tensor3dMapper.h b/compute_kernel_writer/src/Tensor3dMapper.h
index fa68ac2d15..e94b595193 100644
--- a/compute_kernel_writer/src/Tensor3dMapper.h
+++ b/compute_kernel_writer/src/Tensor3dMapper.h
@@ -74,8 +74,8 @@ public:
     TileVariable stride_batch() const;
 
 private:
-    ITensor             *_tensor;
-    TensorSamplerFormat  _format;
+    ITensor            *_tensor;
+    TensorSamplerFormat _format;
 };
 } // namespace ckw
 
diff --git a/compute_kernel_writer/src/TensorOperand.cpp b/compute_kernel_writer/src/TensorOperand.cpp
index 5ad24c6276..bf11d0d332 100644
--- a/compute_kernel_writer/src/TensorOperand.cpp
+++ b/compute_kernel_writer/src/TensorOperand.cpp
@@ -23,13 +23,13 @@
  */
 
 #include "ckw/TensorOperand.h"
+
 #include "src/ITensor.h"
 
 namespace ckw
 {
 
-TensorOperand::TensorOperand(ITensor &tensor)
-    : _tensor(tensor)
+TensorOperand::TensorOperand(ITensor &tensor) : _tensor(tensor)
 {
 }
 
@@ -108,4 +108,4 @@ TileOperand TensorOperand::offset_first_element_in_bytes()
     return TileOperand(_tensor.component(TensorComponentType::OffsetFirstElement));
 }
 
-} // namespace ckw
-\ No newline at end of file
+} // namespace ckw
diff --git a/compute_kernel_writer/src/TensorSampler.cpp b/compute_kernel_writer/src/TensorSampler.cpp
index 2ee8df4bca..91d5af2fd0 100644
--- a/compute_kernel_writer/src/TensorSampler.cpp
+++ b/compute_kernel_writer/src/TensorSampler.cpp
@@ -32,7 +32,11 @@ TensorSampler::TensorSampler(TensorStorageType         storage,
                              TensorSamplerAddressModeX address_mode_x,
                              TensorSamplerAddressModeY address_mode_y,
                              TensorSamplerAddressModeZ address_mode_z)
-    : _storage(storage), _format(format), _address_mode_x(address_mode_x), _address_mode_y(address_mode_y), _address_mode_z(address_mode_z)
+    : _storage(storage),
+      _format(format),
+      _address_mode_x(address_mode_x),
+      _address_mode_y(address_mode_y),
+      _address_mode_z(address_mode_z)
 {
 }
 
diff --git a/compute_kernel_writer/src/TensorUtils.cpp b/compute_kernel_writer/src/TensorUtils.cpp
index 24836092d4..17fc9547ae 100644
--- a/compute_kernel_writer/src/TensorUtils.cpp
+++ b/compute_kernel_writer/src/TensorUtils.cpp
@@ -23,6 +23,7 @@
  */
 
 #include "src/TensorUtils.h"
+
 #include "ckw/Error.h"
 #include "ckw/TensorInfo.h"
 #include "ckw/types/TensorComponentType.h"
@@ -31,10 +32,10 @@ namespace ckw
 {
 TensorComponentType get_tensor_dimension(TensorDataLayout layout, TensorDataLayoutComponent component)
 {
-    switch(layout)
+    switch (layout)
     {
         case TensorDataLayout::Nhwc:
-            switch(component)
+            switch (component)
             {
                 case TensorDataLayoutComponent::C:
                     return TensorComponentType::Dim0;
@@ -49,7 +50,7 @@ TensorComponentType get_tensor_dimension(TensorDataLayout layout, TensorDataLayo
                     return TensorComponentType::Unknown;
             }
         case TensorDataLayout::Ndhwc:
-            switch(component)
+            switch (component)
             {
                 case TensorDataLayoutComponent::C:
                     return TensorComponentType::Dim0;
@@ -73,10 +74,10 @@ TensorComponentType get_tensor_dimension(TensorDataLayout layout, TensorDataLayo
 
 TensorComponentType get_tensor_stride(TensorDataLayout layout, TensorDataLayoutComponent component)
 {
-    switch(layout)
+    switch (layout)
     {
         case TensorDataLayout::Nhwc:
-            switch(component)
+            switch (component)
             {
                 case TensorDataLayoutComponent::C:
                     return TensorComponentType::Stride0;
@@ -91,7 +92,7 @@ TensorComponentType get_tensor_stride(TensorDataLayout layout, TensorDataLayoutC
                     return TensorComponentType::Unknown;
             }
         case TensorDataLayout::Ndhwc:
-            switch(component)
+            switch (component)
             {
                 case TensorDataLayoutComponent::C:
                     return TensorComponentType::Stride0;
diff --git a/compute_kernel_writer/src/TileInfo.cpp b/compute_kernel_writer/src/TileInfo.cpp
index 66d8cb1620..273266eedc 100644
--- a/compute_kernel_writer/src/TileInfo.cpp
+++ b/compute_kernel_writer/src/TileInfo.cpp
@@ -26,18 +26,15 @@
 
 namespace ckw
 {
-TileInfo::TileInfo(DataType dt)
-    : _dt(dt), _shape({ { 1, 1 } })
+TileInfo::TileInfo(DataType dt) : _dt(dt), _shape({{1, 1}})
 {
 }
 
-TileInfo::TileInfo(DataType dt, int32_t w)
-    : _dt(dt), _shape({ { w, 1 } })
+TileInfo::TileInfo(DataType dt, int32_t w) : _dt(dt), _shape({{w, 1}})
 {
 }
 
-TileInfo::TileInfo(DataType dt, int32_t h, int32_t w)
-    : _dt(dt), _shape({ { w, h } })
+TileInfo::TileInfo(DataType dt, int32_t h, int32_t w) : _dt(dt), _shape({{w, h}})
 {
 }
 
diff --git a/compute_kernel_writer/src/TileOperand.cpp b/compute_kernel_writer/src/TileOperand.cpp
index 3dfa2b8b2b..865ef85a13 100644
--- a/compute_kernel_writer/src/TileOperand.cpp
+++ b/compute_kernel_writer/src/TileOperand.cpp
@@ -23,7 +23,9 @@
  */
 
 #include "ckw/TileOperand.h"
+
 #include "ckw/Error.h"
+
 #include "src/ITile.h"
 
 namespace ckw
@@ -34,7 +36,8 @@ TileOperand::TileOperand(ITile &tile)
 {
 }
 
-TileOperand::TileOperand(const TileOperand &operand, int32_t row_start, int32_t row_end, int32_t col_start, int32_t col_end)
+TileOperand::TileOperand(
+    const TileOperand &operand, int32_t row_start, int32_t row_end, int32_t col_start, int32_t col_end)
     : _tile(operand._tile), _row_start(row_start), _row_end(row_end), _col_start(col_start), _col_end(col_end)
 {
     CKW_ASSERT(row_start >= 0 && row_start < _tile->info().height());
@@ -50,7 +53,8 @@ TileOperand TileOperand::tile(int32_t row_start, int32_t row_end, int32_t col_st
     CKW_ASSERT(col_start >= 0 && _col_start + col_start < _col_end);
     CKW_ASSERT(col_end > col_start && _col_start + col_end <= _col_end);
 
-    return TileOperand(*this, _row_start + row_start, _row_start + row_end, _col_start + col_start, _col_start + col_end);
+    return TileOperand(*this, _row_start + row_start, _row_start + row_end, _col_start + col_start,
+                       _col_start + col_end);
 }
 
 TileOperand TileOperand::row(int32_t row) const
diff --git a/compute_kernel_writer/src/TileView.h b/compute_kernel_writer/src/TileView.h
index e0d034fa8d..50ae66b389 100644
--- a/compute_kernel_writer/src/TileView.h
+++ b/compute_kernel_writer/src/TileView.h
@@ -27,6 +27,7 @@
 
 #include "ckw/Error.h"
 #include "ckw/types/DataType.h"
+
 #include "src/ITile.h"
 
 #include <cstdint>
@@ -81,8 +82,7 @@ public:
      *
      * @param[in] tile The tile object.
      */
-    TileView(const T &tile)
-        : _tile(&tile), _area(0, tile.info().height(), 0, tile.info().width())
+    TileView(const T &tile) : _tile(&tile), _area(0, tile.info().height(), 0, tile.info().width())
     {
     }
 
@@ -91,8 +91,7 @@ public:
      * @param[in] tile The tile object.
      * @param[in] area The rectangular active area.
      */
-    TileView(const T &tile, const TileArea &area)
-        : _tile(&tile), _area(area)
+    TileView(const T &tile, const TileArea &area) : _tile(&tile), _area(area)
     {
     }
 
@@ -176,7 +175,8 @@ public:
     /** Get whether the tile view refers to the whole tile. */
     bool is_full_tile() const
     {
-        return row_start() == 0 && row_end() == _tile->info().height() && col_start() == 0 && col_end() == _tile->info().width();
+        return row_start() == 0 && row_end() == _tile->info().height() && col_start() == 0 &&
+               col_end() == _tile->info().width();
     }
 
 private:
diff --git a/compute_kernel_writer/src/cl/CLHelpers.cpp b/compute_kernel_writer/src/cl/CLHelpers.cpp
index ff4408b1a3..8e4a932764 100644
--- a/compute_kernel_writer/src/cl/CLHelpers.cpp
+++ b/compute_kernel_writer/src/cl/CLHelpers.cpp
@@ -28,6 +28,7 @@
 #include "ckw/types/DataType.h"
 #include "ckw/types/Operators.h"
 #include "ckw/types/TensorStorageType.h"
+
 #include "src/types/DataTypeHelpers.h"
 
 namespace ckw
@@ -35,7 +36,7 @@ namespace ckw
 bool cl_validate_vector_length(int32_t len)
 {
     bool valid_vector_length = true;
-    if(len < 1 || len > 16 || (len > 4 && len < 8) || (len > 8 && len < 16))
+    if (len < 1 || len > 16 || (len > 4 && len < 8) || (len > 8 && len < 16))
     {
         valid_vector_length = false;
     }
@@ -44,14 +45,14 @@ bool cl_validate_vector_length(int32_t len)
 
 std::string cl_get_variable_datatype_as_string(DataType dt, int32_t len)
 {
-    if(cl_validate_vector_length(len) == false)
+    if (cl_validate_vector_length(len) == false)
     {
         CKW_THROW_MSG("Unsupported vector length");
         return "";
     }
 
     std::string res;
-    switch(dt)
+    switch (dt)
     {
         case DataType::Fp32:
             res += "float";
@@ -85,7 +86,7 @@ std::string cl_get_variable_datatype_as_string(DataType dt, int32_t len)
             return "";
     }
 
-    if(len > 1)
+    if (len > 1)
     {
         res += std::to_string(len);
     }
@@ -95,7 +96,7 @@ std::string cl_get_variable_datatype_as_string(DataType dt, int32_t len)
 
 int32_t cl_round_up_to_nearest_valid_vector_width(int32_t width)
 {
-    switch(width)
+    switch (width)
     {
         case 1:
             return 1;
@@ -128,7 +129,7 @@ int32_t cl_round_up_to_nearest_valid_vector_width(int32_t width)
 std::string cl_get_variable_storagetype_as_string(TensorStorageType storage)
 {
     std::string res;
-    switch(storage)
+    switch (storage)
     {
         case TensorStorageType::BufferUint8Ptr:
             res += "__global uchar*";
@@ -148,7 +149,7 @@ std::string cl_get_variable_storagetype_as_string(TensorStorageType storage)
 
 std::string cl_get_assignment_op_as_string(AssignmentOp op)
 {
-    switch(op)
+    switch (op)
     {
         case AssignmentOp::Increment:
             return "+=";
@@ -163,34 +164,34 @@ std::string cl_get_assignment_op_as_string(AssignmentOp op)
 
 std::tuple<bool, std::string> cl_get_unary_op(UnaryOp op)
 {
-    switch(op)
+    switch (op)
     {
         case UnaryOp::LogicalNot:
-            return { false, "!" };
+            return {false, "!"};
 
         case UnaryOp::BitwiseNot:
-            return { false, "~" };
+            return {false, "~"};
 
         case UnaryOp::Exp:
-            return { true, "exp" };
+            return {true, "exp"};
 
         case UnaryOp::Tanh:
-            return { true, "tanh" };
+            return {true, "tanh"};
 
         case UnaryOp::Sqrt:
-            return { true, "sqrt" };
+            return {true, "sqrt"};
 
         case UnaryOp::Erf:
-            return { true, "erf" };
+            return {true, "erf"};
 
         case UnaryOp::Fabs:
-            return { true, "fabs" };
+            return {true, "fabs"};
 
         case UnaryOp::Log:
-            return { true, "log" };
+            return {true, "log"};
 
         case UnaryOp::Round:
-            return { true, "round" };
+            return {true, "round"};
 
         default:
             CKW_THROW_MSG("Unsupported unary operation!");
@@ -201,52 +202,52 @@ std::tuple<bool, std::string> cl_get_binary_op(BinaryOp op, DataType data_type)
 {
     const auto is_float = is_data_type_float(data_type);
 
-    switch(op)
+    switch (op)
     {
         case BinaryOp::Add:
-            return { false, "+" };
+            return {false, "+"};
 
         case BinaryOp::Sub:
-            return { false, "-" };
+            return {false, "-"};
 
         case BinaryOp::Mul:
-            return { false, "*" };
+            return {false, "*"};
 
         case BinaryOp::Div:
-            return { false, "/" };
+            return {false, "/"};
 
         case BinaryOp::Mod:
-            return { false, "%" };
+            return {false, "%"};
 
         case BinaryOp::Equal:
-            return { false, "==" };
+            return {false, "=="};
 
         case BinaryOp::Less:
-            return { false, "<" };
+            return {false, "<"};
 
         case BinaryOp::LessEqual:
-            return { false, "<=" };
+            return {false, "<="};
 
         case BinaryOp::Greater:
-            return { false, ">" };
+            return {false, ">"};
 
         case BinaryOp::GreaterEqual:
-            return { false, ">=" };
+            return {false, ">="};
 
         case BinaryOp::LogicalAnd:
-            return { false, "&&" };
+            return {false, "&&"};
 
         case BinaryOp::LogicalOr:
-            return { false, "||" };
+            return {false, "||"};
 
         case BinaryOp::BitwiseXOR:
-            return { false, "^" };
+            return {false, "^"};
 
         case BinaryOp::Min:
-            return { true, is_float ? "fmin" : "min" };
+            return {true, is_float ? "fmin" : "min"};
 
         case BinaryOp::Max:
-            return { true, is_float ? "fmax" : "max" };
+            return {true, is_float ? "fmax" : "max"};
 
         default:
             CKW_THROW_MSG("Unsupported binary operator/function!");
@@ -255,13 +256,13 @@ std::tuple<bool, std::string> cl_get_binary_op(BinaryOp op, DataType data_type)
 
 std::tuple<bool, std::string> cl_get_ternary_op(TernaryOp op)
 {
-    switch(op)
+    switch (op)
     {
         case TernaryOp::Select:
-            return { true, "select" };
+            return {true, "select"};
 
         case TernaryOp::Clamp:
-            return { true, "clamp" };
+            return {true, "clamp"};
 
         default:
             CKW_THROW_MSG("Unsupported ternary function!");
@@ -273,7 +274,7 @@ std::string cl_data_type_rounded_up_to_valid_vector_width(DataType dt, int32_t w
     std::string   data_type;
     const int32_t w = cl_round_up_to_nearest_valid_vector_width(width);
     data_type += cl_get_variable_datatype_as_string(dt, 1);
-    if(w != 1)
+    if (w != 1)
     {
         data_type += std::to_string(w);
     }
@@ -284,7 +285,7 @@ std::vector<int32_t> cl_decompose_vector_width(int32_t vector_width)
 {
     std::vector<int32_t> x;
 
-    switch(vector_width)
+    switch (vector_width)
     {
         case 0:
             break;
diff --git a/compute_kernel_writer/src/cl/CLKernelWriter.cpp b/compute_kernel_writer/src/cl/CLKernelWriter.cpp
index 2db9c139b7..62e6853a7a 100644
--- a/compute_kernel_writer/src/cl/CLKernelWriter.cpp
+++ b/compute_kernel_writer/src/cl/CLKernelWriter.cpp
@@ -31,14 +31,15 @@
 #include "ckw/types/DataType.h"
 #include "ckw/types/MemoryOperation.h"
 #include "ckw/types/TargetLanguage.h"
-#include "src/ITensorComponent.h"
-#include "src/TileView.h"
+
 #include "src/cl/CLHelpers.h"
 #include "src/cl/CLTensorArgument.h"
 #include "src/cl/CLTile.h"
 #include "src/cl/helpers/CLMemoryOpBufferHelper.h"
 #include "src/cl/helpers/CLMemoryOpImage2dHelper.h"
 #include "src/cl/helpers/ICLMemoryOpHelper.h"
+#include "src/ITensorComponent.h"
+#include "src/TileView.h"
 #include "src/types/DataTypeHelpers.h"
 
 #include <algorithm>
@@ -63,14 +64,14 @@ std::unique_ptr<Kernel> CLKernelWriter::emit_kernel(const std::string &name)
     // Create the list of arguments.
     std::vector<KernelArgument> arguments;
 
-    for(const auto &tensor : _tensors)
+    for (const auto &tensor : _tensors)
     {
         const auto tensor_id = tensor->info().id();
 
         const auto storages   = tensor->storages();
         const auto components = tensor->components();
 
-        for(const auto &storage : storages)
+        for (const auto &storage : storages)
         {
             code += cl_get_variable_storagetype_as_string(storage.type);
             code += " ";
@@ -80,7 +81,7 @@ std::unique_ptr<Kernel> CLKernelWriter::emit_kernel(const std::string &name)
             arguments.emplace_back(tensor_id, storage.type);
         }
 
-        for(const auto &component : components)
+        for (const auto &component : components)
         {
             const auto &tile      = component->tile();
             const auto &tile_info = tile.info();
@@ -96,7 +97,7 @@ std::unique_ptr<Kernel> CLKernelWriter::emit_kernel(const std::string &name)
         }
     }
 
-    if(code.size() >= 2 && code[code.size() - 2] == ',' && code[code.size() - 1] == '\n')
+    if (code.size() >= 2 && code[code.size() - 2] == ',' && code[code.size() - 1] == '\n')
     {
         // Remove the last comma in the argument list.
         code.pop_back();
@@ -127,11 +128,12 @@ void CLKernelWriter::op_assign(const TileOperand &dst, const TileOperand &src)
     const std::string src_prefix      = broadcast_src_x ? "(" + data_type_str + ")" : "";
 
     CKW_ASSERT_MSG(src_view.data_type() == dst_view.data_type(), "Source and destination type must match.");
-    CKW_ASSERT_MSG(src_view.height() == dst_h || src_view.height() == 1, "Tile height must match or source is broadcasting in y dimension.");
+    CKW_ASSERT_MSG(src_view.height() == dst_h || src_view.height() == 1,
+                   "Tile height must match or source is broadcasting in y dimension.");
     CKW_ASSERT_MSG(src_w == dst_w || src_w == 1, "Tile width must match or source is broadcasting in x dimension.");
 
     // Broadcasting on y dimension is automatic (see CLTile::vector).
-    for(int32_t y = 0; y < dst_h; ++y)
+    for (int32_t y = 0; y < dst_h; ++y)
     {
         append_code(dst_view.vector(y).str, " = ", src_prefix, src_view.vector(y).str, ";\n");
     }
@@ -158,13 +160,15 @@ void CLKernelWriter::op_cast(const TileOperand &dst, const TileOperand &src, Con
     const std::string prefix      = broadcast_x ? "(" + dst_type_str + ")" : "";
 
     CKW_ASSERT_MSG(src_view.data_type() != dst_view.data_type(), "Source and destination type must be different.");
-    CKW_ASSERT_MSG(src_view.height() == dst_h || src_view.height() == 1, "Tile height must match or source is broadcasting in y dimension.");
+    CKW_ASSERT_MSG(src_view.height() == dst_h || src_view.height() == 1,
+                   "Tile height must match or source is broadcasting in y dimension.");
     CKW_ASSERT_MSG(src_w == dst_w || src_w == 1, "Tile width must match or source is broadcasting in x dimension.");
 
     // Broadcasting on y dimension is automatic (see CLTile::vector).
-    for(int32_t y = 0; y < dst_h; ++y)
+    for (int32_t y = 0; y < dst_h; ++y)
     {
-        append_code(dst_view.vector(y).str, " = ", prefix, "convert_", convert_type_str, sat, "(", src_view.vector(y).str, ");\n");
+        append_code(dst_view.vector(y).str, " = ", prefix, "convert_", convert_type_str, sat, "(",
+                    src_view.vector(y).str, ");\n");
     }
 }
 
@@ -189,11 +193,12 @@ void CLKernelWriter::op_unary(const TileOperand &dst, UnaryOp op, const TileOper
     const auto  op_suffix  = op_is_func ? ")" : "";
 
     CKW_ASSERT_MSG(src_view.data_type() == dst_view.data_type(), "Source and destination type must match.");
-    CKW_ASSERT_MSG(src_view.height() == dst_h || src_view.height() == 1, "Tile height must match or source is broadcasting in y dimension.");
+    CKW_ASSERT_MSG(src_view.height() == dst_h || src_view.height() == 1,
+                   "Tile height must match or source is broadcasting in y dimension.");
     CKW_ASSERT_MSG(src_w == dst_w || src_w == 1, "Tile width must match or source is broadcasting in x dimension.");
 
     // Broadcasting on y dimension is automatic (see CLTile::vector).
-    for(int32_t y = 0; y < dst_h; ++y)
+    for (int32_t y = 0; y < dst_h; ++y)
     {
         append_code(dst_view.vector(y).str, " = ", src_prefix, op_prefix, src_view.vector(y).str, op_suffix, ";\n");
     }
@@ -214,27 +219,28 @@ void CLKernelWriter::op_binary(const TileOperand &dst, BinaryOp op, const TileOp
 
     CKW_ASSERT_MSG(lhs_view.data_type() == rhs_view.data_type(), "LHS and RHS type must match.");
 
-    CKW_ASSERT_MSG(lhs_view.height() == dst_h || lhs_view.height() == 1, "LHS tile height must match or source is broadcasting in y dimension.");
-    CKW_ASSERT_MSG(rhs_view.height() == dst_h || rhs_view.height() == 1, "RHS tile height must match or source is broadcasting in y dimension.");
+    CKW_ASSERT_MSG(lhs_view.height() == dst_h || lhs_view.height() == 1,
+                   "LHS tile height must match or source is broadcasting in y dimension.");
+    CKW_ASSERT_MSG(rhs_view.height() == dst_h || rhs_view.height() == 1,
+                   "RHS tile height must match or source is broadcasting in y dimension.");
 
-    CKW_ASSERT_MSG(lhs_w == dst_w || lhs_w == 1, "LHS tile width must match destination or LHS is broadcasting in x dimension.");
-    CKW_ASSERT_MSG(rhs_w == dst_w || rhs_w == 1, "RHS tile width must match destination or RHS is broadcasting in x dimension.");
+    CKW_ASSERT_MSG(lhs_w == dst_w || lhs_w == 1,
+                   "LHS tile width must match destination or LHS is broadcasting in x dimension.");
+    CKW_ASSERT_MSG(rhs_w == dst_w || rhs_w == 1,
+                   "RHS tile width must match destination or RHS is broadcasting in x dimension.");
 
-    if(op == BinaryOp::MatMul_Nt_T)
+    if (op == BinaryOp::MatMul_Nt_T)
     {
         CKW_ASSERT(is_data_type_float(data_type));
 
-        for(int32_t y = 0; y < dst_h; ++y)
+        for (int32_t y = 0; y < dst_h; ++y)
         {
-            for(int32_t x = 0; x < dst_w; ++x)
+            for (int32_t x = 0; x < dst_w; ++x)
             {
-                for(int32_t k = 0; k < lhs_w; ++k)
+                for (int32_t k = 0; k < lhs_w; ++k)
                 {
-                    append_code(
-                        dst_view.scalar(x, y).str, " = fma(",
-                        lhs_view.scalar(k, y).str, ", ",
-                        rhs_view.scalar(k, x).str, ", ",
-                        dst_view.scalar(x, y).str, ");\n");
+                    append_code(dst_view.scalar(x, y).str, " = fma(", lhs_view.scalar(k, y).str, ", ",
+                                rhs_view.scalar(k, x).str, ", ", dst_view.scalar(x, y).str, ");\n");
                 }
             }
         }
@@ -258,14 +264,16 @@ void CLKernelWriter::op_binary(const TileOperand &dst, BinaryOp op, const TileOp
         const std::string op_suffix    = op_is_func ? ");\n" : ";\n";
 
         // Broadcasting on y dimension is automatic (see CLTile::vector).
-        for(int32_t y = 0; y < dst_h; ++y)
+        for (int32_t y = 0; y < dst_h; ++y)
         {
-            append_code(dst_view.vector(y).str, op_prefix, lhs_prefix, lhs_view.vector(y).str, op_separator, rhs_prefix, rhs_view.vector(y).str, op_suffix);
+            append_code(dst_view.vector(y).str, op_prefix, lhs_prefix, lhs_view.vector(y).str, op_separator, rhs_prefix,
+                        rhs_view.vector(y).str, op_suffix);
         }
     }
 }
 
-void CLKernelWriter::op_ternary(const TileOperand &dst, TernaryOp op, const TileOperand &first, const TileOperand &second, const TileOperand &third)
+void CLKernelWriter::op_ternary(
+    const TileOperand &dst, TernaryOp op, const TileOperand &first, const TileOperand &second, const TileOperand &third)
 {
     const auto dst_view    = to_cl_tile_view(dst);
     const auto first_view  = to_cl_tile_view(first);
@@ -297,37 +305,42 @@ void CLKernelWriter::op_ternary(const TileOperand &dst, TernaryOp op, const Tile
     CKW_ASSERT_MSG(second_view.data_type() == dst_view.data_type(), "2nd source and destination type must match.");
     CKW_ASSERT_MSG(third_view.data_type() == dst_view.data_type(), "3rd source and destination type must match.");
 
-    CKW_ASSERT_MSG(first_view.height() == dst_h || first_view.height() == 1, "1st tile height must match or source is broadcasting in y dimension.");
-    CKW_ASSERT_MSG(second_view.height() == dst_h || second_view.height() == 1, "2nd tile height must match or source is broadcasting in y dimension.");
-    CKW_ASSERT_MSG(third_view.height() == dst_h || third_view.height() == 1, "3rd tile height must match or source is broadcasting in y dimension.");
+    CKW_ASSERT_MSG(first_view.height() == dst_h || first_view.height() == 1,
+                   "1st tile height must match or source is broadcasting in y dimension.");
+    CKW_ASSERT_MSG(second_view.height() == dst_h || second_view.height() == 1,
+                   "2nd tile height must match or source is broadcasting in y dimension.");
+    CKW_ASSERT_MSG(third_view.height() == dst_h || third_view.height() == 1,
+                   "3rd tile height must match or source is broadcasting in y dimension.");
 
-    CKW_ASSERT_MSG(first_w == dst_w || first_w == 1, "1st tile width must match or source is broadcasting in x dimension.");
-    CKW_ASSERT_MSG(second_w == dst_w || second_w == 1, "2nd tile width must match or source is broadcasting in x dimension.");
-    CKW_ASSERT_MSG(third_w == dst_w || third_w == 1, "3rd tile width must match or source is broadcasting in x dimension.");
+    CKW_ASSERT_MSG(first_w == dst_w || first_w == 1,
+                   "1st tile width must match or source is broadcasting in x dimension.");
+    CKW_ASSERT_MSG(second_w == dst_w || second_w == 1,
+                   "2nd tile width must match or source is broadcasting in x dimension.");
+    CKW_ASSERT_MSG(third_w == dst_w || third_w == 1,
+                   "3rd tile width must match or source is broadcasting in x dimension.");
 
     // Broadcasting on y dimension is automatic (see CLTile::vector).
-    for(int32_t y = 0; y < dst_h; ++y)
+    for (int32_t y = 0; y < dst_h; ++y)
     {
-        append_code(
-            dst_view.vector(y).str, " = ", op_name, "(",
-            first_prefix, first_view.vector(y).str, ", ",
-            second_prefix, second_view.vector(y).str, ", ",
-            third_prefix, third_view.vector(y).str, ");\n");
+        append_code(dst_view.vector(y).str, " = ", op_name, "(", first_prefix, first_view.vector(y).str, ", ",
+                    second_prefix, second_view.vector(y).str, ", ", third_prefix, third_view.vector(y).str, ");\n");
     }
 }
 
-void CLKernelWriter::op_if_generic(const TileOperand &lhs, BinaryOp op, const TileOperand &rhs, const std::function<void()> &body, bool is_else_if)
+void CLKernelWriter::op_if_generic(
+    const TileOperand &lhs, BinaryOp op, const TileOperand &rhs, const std::function<void()> &body, bool is_else_if)
 {
     const auto lhs_view = to_cl_tile_view(lhs);
     const auto rhs_view = to_cl_tile_view(rhs);
 
     const auto op_name = std::get<1>(cl_get_binary_op(op, lhs_view.data_type()));
-    CKW_ASSERT(op == BinaryOp::Less || op == BinaryOp::LessEqual || op == BinaryOp::Equal || op == BinaryOp::GreaterEqual || op == BinaryOp::Greater);
+    CKW_ASSERT(op == BinaryOp::Less || op == BinaryOp::LessEqual || op == BinaryOp::Equal ||
+               op == BinaryOp::GreaterEqual || op == BinaryOp::Greater);
 
     CKW_ASSERT(lhs_view.is_scalar());
     CKW_ASSERT(rhs_view.is_scalar());
 
-    if(is_else_if)
+    if (is_else_if)
     {
         append_code("else ");
     }
@@ -337,12 +350,18 @@ void CLKernelWriter::op_if_generic(const TileOperand &lhs, BinaryOp op, const Ti
     append_code("}\n");
 }
 
-void CLKernelWriter::op_if(const TileOperand &lhs, BinaryOp op, const TileOperand &rhs, const std::function<void()> &body)
+void CLKernelWriter::op_if(const TileOperand           &lhs,
+                           BinaryOp                     op,
+                           const TileOperand           &rhs,
+                           const std::function<void()> &body)
 {
     op_if_generic(lhs, op, rhs, body, false /* is_else_if */);
 }
 
-void CLKernelWriter::op_else_if(const TileOperand &lhs, BinaryOp op, const TileOperand &rhs, const std::function<void()> &body)
+void CLKernelWriter::op_else_if(const TileOperand           &lhs,
+                                BinaryOp                     op,
+                                const TileOperand           &rhs,
+                                const std::function<void()> &body)
 {
     op_if_generic(lhs, op, rhs, body, true /* is_else_if */);
 }
@@ -354,10 +373,13 @@ void CLKernelWriter::op_else(const std::function<void()> &body)
     append_code("}\n");
 }
 
-void CLKernelWriter::op_for_loop(
-    const TileOperand &var, BinaryOp cond_op, const TileOperand &cond_value,
-    const TileOperand &update_var, AssignmentOp update_op, const TileOperand &update_value,
-    const std::function<void()> &body)
+void CLKernelWriter::op_for_loop(const TileOperand           &var,
+                                 BinaryOp                     cond_op,
+                                 const TileOperand           &cond_value,
+                                 const TileOperand           &update_var,
+                                 AssignmentOp                 update_op,
+                                 const TileOperand           &update_value,
+                                 const std::function<void()> &body)
 {
     const auto var_view          = to_cl_tile_view(var);
     const auto cond_value_view   = to_cl_tile_view(cond_value);
@@ -373,11 +395,12 @@ void CLKernelWriter::op_for_loop(
     CKW_ASSERT(update_var_view.data_type() == update_value_view.data_type());
 
     const auto cond_op_name = std::get<1>(cl_get_binary_op(cond_op, var_view.data_type()));
-    CKW_ASSERT(cond_op == BinaryOp::Less || cond_op == BinaryOp::LessEqual || cond_op == BinaryOp::Equal || cond_op == BinaryOp::GreaterEqual || cond_op == BinaryOp::Greater);
+    CKW_ASSERT(cond_op == BinaryOp::Less || cond_op == BinaryOp::LessEqual || cond_op == BinaryOp::Equal ||
+               cond_op == BinaryOp::GreaterEqual || cond_op == BinaryOp::Greater);
 
-    append_code(
-        "for (; ", var_view.scalar(0, 0).str, " ", cond_op_name, " ", cond_value_view.scalar(0, 0).str, "; ",
-        update_var_view.scalar(0, 0).str, " ", cl_get_assignment_op_as_string(update_op), " ", update_value_view.scalar(0, 0).str, ")\n{\n");
+    append_code("for (; ", var_view.scalar(0, 0).str, " ", cond_op_name, " ", cond_value_view.scalar(0, 0).str, "; ",
+                update_var_view.scalar(0, 0).str, " ", cl_get_assignment_op_as_string(update_op), " ",
+                update_value_view.scalar(0, 0).str, ")\n{\n");
     write_body(body);
     append_code("}\n");
 }
@@ -404,7 +427,7 @@ void CLKernelWriter::op_print(const std::string &prefix, const std::vector<TileO
     std::string format_code;
     std::string args_code;
 
-    for(auto &op : operands)
+    for (auto &op : operands)
     {
         const auto tile_view = to_cl_tile_view(op);
 
@@ -416,12 +439,12 @@ void CLKernelWriter::op_print(const std::string &prefix, const std::vector<TileO
         // Construct the format specifier to print out one row of the tile.
         std::string row_format("%");
 
-        if(width > 1)
+        if (width > 1)
         {
             row_format += "v" + std::to_string(width);
         }
 
-        switch(data_type)
+        switch (data_type)
         {
             case DataType::Fp32:
                 row_format += "hlg";
@@ -452,7 +475,7 @@ void CLKernelWriter::op_print(const std::string &prefix, const std::vector<TileO
                 CKW_THROW_MSG("Unsupported data type!");
         }
 
-        if(width > 1)
+        if (width > 1)
         {
             row_format = "[" + row_format + "]";
         }
@@ -460,14 +483,14 @@ void CLKernelWriter::op_print(const std::string &prefix, const std::vector<TileO
         // Construct the format specifier for the printf statement.
         format_code += name + " = ";
 
-        if(height == 1)
+        if (height == 1)
         {
             format_code += row_format;
         }
         else
         {
             format_code += "[" + row_format;
-            for(int32_t row = 1; row < height; ++row)
+            for (int32_t row = 1; row < height; ++row)
             {
                 format_code += ", " + row_format;
             }
@@ -477,7 +500,7 @@ void CLKernelWriter::op_print(const std::string &prefix, const std::vector<TileO
         format_code += "\\n";
 
         // Construct the variable arguments for the printf statement.
-        for(int32_t row = 0; row < height; ++row)
+        for (int32_t row = 0; row < height; ++row)
         {
             args_code += ", " + tile_view.vector(row).str;
         }
@@ -527,19 +550,14 @@ TileOperand CLKernelWriter::declare_tile(const std::string &name, const TileInfo
     const int32_t  width     = tile_info.width();
     const DataType data_type = tile_info.data_type();
 
-    CKW_ASSERT_MSG(
-        std::find_if(
-            _tiles.begin(), _tiles.end(),
-            [=](const std::unique_ptr<CLTile> &e)
-            {
-                return e->name() == fullname;
-            })
-            == _tiles.end(),
-        "There is already a tile with name: " + fullname);
+    CKW_ASSERT_MSG(std::find_if(_tiles.begin(), _tiles.end(),
+                                [=](const std::unique_ptr<CLTile> &e)
+                                { return e->name() == fullname; }) == _tiles.end(),
+                   "There is already a tile with name: " + fullname);
 
     auto tile = std::make_unique<CLTile>(fullname, tile_info);
 
-    for(int32_t row = 0; row < height; ++row)
+    for (int32_t row = 0; row < height; ++row)
     {
         const std::string cl_type = cl_get_variable_datatype_as_string(data_type, width);
         append_code(cl_type, " ", tile->vector(row).str, ";\n");
@@ -578,40 +596,40 @@ TileView<CLTile> CLKernelWriter::to_cl_tile_view(const TileOperand &operand) con
     {
         bool found = false;
 
-        for(const auto &t : _tiles)
+        for (const auto &t : _tiles)
         {
-            if(&tile == t.get())
+            if (&tile == t.get())
             {
                 found = true;
                 break;
             }
         }
 
-        for(const auto &t : _constant_tiles)
+        for (const auto &t : _constant_tiles)
         {
-            if(&tile == t.get())
+            if (&tile == t.get())
             {
                 found = true;
                 break;
             }
         }
 
-        if(!found)
+        if (!found)
         {
-            for(const auto &t : _tensors)
+            for (const auto &t : _tensors)
             {
                 const auto components = t->components();
 
-                for(const auto component : components)
+                for (const auto component : components)
                 {
-                    if(&tile == &component->tile())
+                    if (&tile == &component->tile())
                     {
                         found = true;
                         break;
                     }
                 }
 
-                if(found)
+                if (found)
                 {
                     break;
                 }
@@ -622,66 +640,106 @@ TileView<CLTile> CLKernelWriter::to_cl_tile_view(const TileOperand &operand) con
     }
 #endif // COMPUTE_KERNEL_WRITER_ASSERTS_ENABLED
 
-    return { static_cast<CLTile &>(tile), area };
+    return {static_cast<CLTile &>(tile), area};
 }
 
-void CLKernelWriter::op_load(const TileOperand &tile_op, const TensorOperand &tensor_op, TensorSampler &sampler,
-                             const TileOperand &x, const TileOperand &y, const TileOperand &z, const TileOperand &batch)
+void CLKernelWriter::op_load(const TileOperand   &tile_op,
+                             const TensorOperand &tensor_op,
+                             TensorSampler       &sampler,
+                             const TileOperand   &x,
+                             const TileOperand   &y,
+                             const TileOperand   &z,
+                             const TileOperand   &batch)
 {
-    const CLTile dilation_x({ { "1" } }, DataType::Int32);
-    const CLTile dilation_y({ { "1" } }, DataType::Int32);
+    const CLTile dilation_x({{"1"}}, DataType::Int32);
+    const CLTile dilation_y({{"1"}}, DataType::Int32);
 
-    op_load_store(MemoryOperation::Load, tile_op, tensor_op, sampler, x, y, z, batch, dilation_x, dilation_y, false /* indirect buffer */);
+    op_load_store(MemoryOperation::Load, tile_op, tensor_op, sampler, x, y, z, batch, dilation_x, dilation_y,
+                  false /* indirect buffer */);
 }
 
-void CLKernelWriter::op_load_dilated(const TileOperand &tile_op, const TensorOperand &tensor_op, TensorSampler &sampler,
-                                     const TileOperand &x, const TileOperand &y, const TileOperand &z, const TileOperand &batch,
-                                     const TileOperand &dilation_x, const TileOperand &dilation_y)
+void CLKernelWriter::op_load_dilated(const TileOperand   &tile_op,
+                                     const TensorOperand &tensor_op,
+                                     TensorSampler       &sampler,
+                                     const TileOperand   &x,
+                                     const TileOperand   &y,
+                                     const TileOperand   &z,
+                                     const TileOperand   &batch,
+                                     const TileOperand   &dilation_x,
+                                     const TileOperand   &dilation_y)
 {
     const auto dil_x_view = to_cl_tile_view(dilation_x);
     const auto dil_y_view = to_cl_tile_view(dilation_y);
 
-    op_load_store(MemoryOperation::Load, tile_op, tensor_op, sampler, x, y, z, batch, dil_x_view, dil_y_view, false /* indirect buffer */);
+    op_load_store(MemoryOperation::Load, tile_op, tensor_op, sampler, x, y, z, batch, dil_x_view, dil_y_view,
+                  false /* indirect buffer */);
 }
 
-void CLKernelWriter::op_store(const TensorOperand &tensor_op, const TileOperand &tile_op, TensorSampler &sampler,
-                              const TileOperand &x, const TileOperand &y, const TileOperand &z, const TileOperand &batch)
+void CLKernelWriter::op_store(const TensorOperand &tensor_op,
+                              const TileOperand   &tile_op,
+                              TensorSampler       &sampler,
+                              const TileOperand   &x,
+                              const TileOperand   &y,
+                              const TileOperand   &z,
+                              const TileOperand   &batch)
 {
-    const CLTile dilation_x({ { "1" } }, DataType::Int32);
-    const CLTile dilation_y({ { "1" } }, DataType::Int32);
+    const CLTile dilation_x({{"1"}}, DataType::Int32);
+    const CLTile dilation_y({{"1"}}, DataType::Int32);
 
-    op_load_store(MemoryOperation::Store, tile_op, tensor_op, sampler, x, y, z, batch, dilation_x, dilation_y, false /* indirect buffer */);
+    op_load_store(MemoryOperation::Store, tile_op, tensor_op, sampler, x, y, z, batch, dilation_x, dilation_y,
+                  false /* indirect buffer */);
 }
 
-void CLKernelWriter::op_store_dilated(const TensorOperand &tensor_op, const TileOperand &tile_op, TensorSampler &sampler,
-                                      const TileOperand &x, const TileOperand &y, const TileOperand &z, const TileOperand &batch,
-                                      const TileOperand &dilation_x, const TileOperand &dilation_y)
+void CLKernelWriter::op_store_dilated(const TensorOperand &tensor_op,
+                                      const TileOperand   &tile_op,
+                                      TensorSampler       &sampler,
+                                      const TileOperand   &x,
+                                      const TileOperand   &y,
+                                      const TileOperand   &z,
+                                      const TileOperand   &batch,
+                                      const TileOperand   &dilation_x,
+                                      const TileOperand   &dilation_y)
 {
     const auto dil_x_view = to_cl_tile_view(dilation_x);
     const auto dil_y_view = to_cl_tile_view(dilation_y);
 
-    op_load_store(MemoryOperation::Store, tile_op, tensor_op, sampler, x, y, z, batch, dil_x_view, dil_y_view, false /* indirect buffer */);
+    op_load_store(MemoryOperation::Store, tile_op, tensor_op, sampler, x, y, z, batch, dil_x_view, dil_y_view,
+                  false /* indirect buffer */);
 }
 
-void CLKernelWriter::op_load_indirect(const TileOperand &tile_op, const TensorOperand &tensor_op, TensorSampler &sampler,
-                                      const TileOperand &x, const TileOperand &y, const TileOperand &z, const TileOperand &batch)
+void CLKernelWriter::op_load_indirect(const TileOperand   &tile_op,
+                                      const TensorOperand &tensor_op,
+                                      TensorSampler       &sampler,
+                                      const TileOperand   &x,
+                                      const TileOperand   &y,
+                                      const TileOperand   &z,
+                                      const TileOperand   &batch)
 {
-    const CLTile dilation_x({ { "1" } }, DataType::Int32);
-    const CLTile dilation_y({ { "1" } }, DataType::Int32);
+    const CLTile dilation_x({{"1"}}, DataType::Int32);
+    const CLTile dilation_y({{"1"}}, DataType::Int32);
 
-    op_load_store(MemoryOperation::Load, tile_op, tensor_op, sampler, x, y, z, batch, dilation_x, dilation_y, true /* indirect buffer */);
+    op_load_store(MemoryOperation::Load, tile_op, tensor_op, sampler, x, y, z, batch, dilation_x, dilation_y,
+                  true /* indirect buffer */);
 }
 
-void CLKernelWriter::op_load_store(MemoryOperation op, const TileOperand &tile_op, const TensorOperand &tensor_op, TensorSampler &sampler,
-                                   const TileOperand &x, const TileOperand &y, const TileOperand &z, const TileOperand &batch,
-                                   const TileView<CLTile> &dilation_x, const TileView<CLTile> &dilation_y, bool indirect_buffer)
+void CLKernelWriter::op_load_store(MemoryOperation         op,
+                                   const TileOperand      &tile_op,
+                                   const TensorOperand    &tensor_op,
+                                   TensorSampler          &sampler,
+                                   const TileOperand      &x,
+                                   const TileOperand      &y,
+                                   const TileOperand      &z,
+                                   const TileOperand      &batch,
+                                   const TileView<CLTile> &dilation_x,
+                                   const TileView<CLTile> &dilation_y,
+                                   bool                    indirect_buffer)
 {
     CKW_UNUSED(dilation_x);
     CKW_ASSERT(dilation_x.is_scalar());
     CKW_ASSERT(dilation_y.is_scalar());
     CKW_ASSERT(dilation_x.scalar(0, 0).str == "((int)(1))"); // Dilation in x dimension is not implemented yet
 
-    if(indirect_buffer)
+    if (indirect_buffer)
     {
         CKW_ASSERT(dilation_y.scalar(0, 0).str == "((int)(1))" && dilation_x.scalar(0, 0).str == "((int)(1))");
     }
@@ -689,7 +747,7 @@ void CLKernelWriter::op_load_store(MemoryOperation op, const TileOperand &tile_o
     ITensor &tensor = get_tensor(tensor_op);
 
     std::unique_ptr<ICLMemoryOpHelper> helper;
-    switch(sampler.storage())
+    switch (sampler.storage())
     {
         case TensorStorageType::BufferUint8Ptr:
             helper = std::make_unique<CLMemoryOpBufferHelper>(this, &tensor, &sampler, op);
@@ -717,13 +775,13 @@ void CLKernelWriter::op_load_store(MemoryOperation op, const TileOperand &tile_o
 
     helper->initialize(&tile, &x_tile, &z_tile, &batch_tile);
 
-    for(int row = 0; row < tile.info().height(); ++row)
+    for (int row = 0; row < tile.info().height(); ++row)
     {
-        if(!indirect_buffer)
+        if (!indirect_buffer)
         {
             std::string coord_y = y_tile.scalar(0, 0).str + " + " + std::to_string(row);
 
-            if(dilation_y.scalar(0, 0).str != "((int)(1))")
+            if (dilation_y.scalar(0, 0).str != "((int)(1))")
             {
                 coord_y += " * " + dilation_y.scalar(0, 0).str;
             }
diff --git a/compute_kernel_writer/src/cl/CLKernelWriter.h b/compute_kernel_writer/src/cl/CLKernelWriter.h
index d7cf24d5e6..6485bae512 100644
--- a/compute_kernel_writer/src/cl/CLKernelWriter.h
+++ b/compute_kernel_writer/src/cl/CLKernelWriter.h
@@ -26,6 +26,7 @@
 #define CKW_SRC_CL_CLKERNELWRITER_H
 
 #include "ckw/KernelWriter.h"
+
 #include "src/TileView.h"
 
 #include <memory>
@@ -73,7 +74,11 @@ public:
 
     void op_binary(const TileOperand &dst, BinaryOp op, const TileOperand &first, const TileOperand &second) override;
 
-    void op_ternary(const TileOperand &dst, TernaryOp op, const TileOperand &first, const TileOperand &second, const TileOperand &third) override;
+    void op_ternary(const TileOperand &dst,
+                    TernaryOp          op,
+                    const TileOperand &first,
+                    const TileOperand &second,
+                    const TileOperand &third) override;
 
     // =============================================================================================
     // Flow control
@@ -81,14 +86,18 @@ public:
 
     void op_if(const TileOperand &lhs, BinaryOp op, const TileOperand &rhs, const std::function<void()> &body) override;
 
-    void op_else_if(const TileOperand &lhs, BinaryOp op, const TileOperand &rhs, const std::function<void()> &body) override;
+    void
+    op_else_if(const TileOperand &lhs, BinaryOp op, const TileOperand &rhs, const std::function<void()> &body) override;
 
     void op_else(const std::function<void()> &body) override;
 
-    void op_for_loop(
-        const TileOperand &var, BinaryOp cond_op, const TileOperand &cond_value,
-        const TileOperand &update_var, AssignmentOp update_op, const TileOperand &update_value,
-        const std::function<void()> &body) override;
+    void op_for_loop(const TileOperand           &var,
+                     BinaryOp                     cond_op,
+                     const TileOperand           &cond_value,
+                     const TileOperand           &update_var,
+                     AssignmentOp                 update_op,
+                     const TileOperand           &update_value,
+                     const std::function<void()> &body) override;
 
     void op_return() override;
 
@@ -132,26 +141,49 @@ public:
     // Memory Operations
     // =============================================================================================
 
-    void op_load(
-        const TileOperand &tile_op, const TensorOperand &tensor_op, TensorSampler &sampler,
-        const TileOperand &x, const TileOperand &y, const TileOperand &z, const TileOperand &batch) override;
-
-    void op_load_dilated(
-        const TileOperand &tile_op, const TensorOperand &tensor_op, TensorSampler &sampler,
-        const TileOperand &x, const TileOperand &y, const TileOperand &z, const TileOperand &batch,
-        const TileOperand &dilation_x, const TileOperand &dilation_y) override;
-
-    void op_store(
-        const TensorOperand &tensor_op, const TileOperand &tile_op, TensorSampler &sampler,
-        const TileOperand &x, const TileOperand &y, const TileOperand &z, const TileOperand &batch) override;
-
-    void op_store_dilated(
-        const TensorOperand &tensor_op, const TileOperand &tile_op, TensorSampler &sampler,
-        const TileOperand &x, const TileOperand &y, const TileOperand &z, const TileOperand &batch,
-        const TileOperand &dilation_x, const TileOperand &dilation_y) override;
-
-    void op_load_indirect(const TileOperand &tile_op, const TensorOperand &tensor_op, TensorSampler &sampler,
-                          const TileOperand &x, const TileOperand &y, const TileOperand &z, const TileOperand &batch) override;
+    void op_load(const TileOperand   &tile_op,
+                 const TensorOperand &tensor_op,
+                 TensorSampler       &sampler,
+                 const TileOperand   &x,
+                 const TileOperand   &y,
+                 const TileOperand   &z,
+                 const TileOperand   &batch) override;
+
+    void op_load_dilated(const TileOperand   &tile_op,
+                         const TensorOperand &tensor_op,
+                         TensorSampler       &sampler,
+                         const TileOperand   &x,
+                         const TileOperand   &y,
+                         const TileOperand   &z,
+                         const TileOperand   &batch,
+                         const TileOperand   &dilation_x,
+                         const TileOperand   &dilation_y) override;
+
+    void op_store(const TensorOperand &tensor_op,
+                  const TileOperand   &tile_op,
+                  TensorSampler       &sampler,
+                  const TileOperand   &x,
+                  const TileOperand   &y,
+                  const TileOperand   &z,
+                  const TileOperand   &batch) override;
+
+    void op_store_dilated(const TensorOperand &tensor_op,
+                          const TileOperand   &tile_op,
+                          TensorSampler       &sampler,
+                          const TileOperand   &x,
+                          const TileOperand   &y,
+                          const TileOperand   &z,
+                          const TileOperand   &batch,
+                          const TileOperand   &dilation_x,
+                          const TileOperand   &dilation_y) override;
+
+    void op_load_indirect(const TileOperand   &tile_op,
+                          const TensorOperand &tensor_op,
+                          TensorSampler       &sampler,
+                          const TileOperand   &x,
+                          const TileOperand   &y,
+                          const TileOperand   &z,
+                          const TileOperand   &batch) override;
 
 protected:
     /** Return a tile view containing a reference to @ref CLTile object and the active area.
@@ -181,9 +213,17 @@ protected:
     // For helper functions
 private:
     /** Helper method to consolidate all load/store logic in this class */
-    void op_load_store(MemoryOperation op, const TileOperand &tile_op, const TensorOperand &tensor_op, TensorSampler &sampler,
-                       const TileOperand &x, const TileOperand &y, const TileOperand &z, const TileOperand &batch,
-                       const TileView<CLTile> &dilation_x, const TileView<CLTile> &dilation_y, bool indirect_buffer);
+    void op_load_store(MemoryOperation         op,
+                       const TileOperand      &tile_op,
+                       const TensorOperand    &tensor_op,
+                       TensorSampler          &sampler,
+                       const TileOperand      &x,
+                       const TileOperand      &y,
+                       const TileOperand      &z,
+                       const TileOperand      &batch,
+                       const TileView<CLTile> &dilation_x,
+                       const TileView<CLTile> &dilation_y,
+                       bool                    indirect_buffer);
 
     /** This function is the generic function to write both `if` and `else if` blocks.
      *
@@ -195,7 +235,11 @@ private:
      * @param[in] body       The function that writes the body of the else-if block.
      * @param[in] is_else_if True if this is an `else if` block, otherwise this is an `if` block.
      */
-    void op_if_generic(const TileOperand &lhs, BinaryOp op, const TileOperand &rhs, const std::function<void()> &body, bool is_else_if);
+    void op_if_generic(const TileOperand           &lhs,
+                       BinaryOp                     op,
+                       const TileOperand           &rhs,
+                       const std::function<void()> &body,
+                       bool                         is_else_if);
 
     // For attributes
 private:
diff --git a/compute_kernel_writer/src/cl/CLTensorArgument.cpp b/compute_kernel_writer/src/cl/CLTensorArgument.cpp
index 7d4dc958df..e53de2830d 100644
--- a/compute_kernel_writer/src/cl/CLTensorArgument.cpp
+++ b/compute_kernel_writer/src/cl/CLTensorArgument.cpp
@@ -23,11 +23,13 @@
  */
 
 #include "src/cl/CLTensorArgument.h"
+
 #include "ckw/Error.h"
-#include "src/ITensorArgument.h"
-#include "src/ITensorComponent.h"
+
 #include "src/cl/CLHelpers.h"
 #include "src/cl/CLTensorComponent.h"
+#include "src/ITensorArgument.h"
+#include "src/ITensorComponent.h"
 #include "src/types/TensorComponentType.h"
 
 #include <algorithm>
@@ -48,25 +50,23 @@ CLTensorComponent &CLTensorArgument::cl_component(TensorComponentType x)
 {
     // Return the component if it has already been created.
     {
-        const auto it = std::find_if(
-            _components_used.begin(), _components_used.end(),
-            [=](const std::unique_ptr<CLTensorComponent> &item)
-            {
-                return item->component_type() == x;
-            });
+        const auto it =
+            std::find_if(_components_used.begin(), _components_used.end(),
+                         [=](const std::unique_ptr<CLTensorComponent> &item) { return item->component_type() == x; });
 
-        if(it != _components_used.end())
+        if (it != _components_used.end())
         {
             return **it;
         }
     }
 
-    if(_return_dims_by_value)
+    if (_return_dims_by_value)
     {
         uint32_t component_type = static_cast<uint32_t>(x);
 
-        const bool is_dimension         = (component_type & static_cast<uint32_t>(TensorComponentBitmask::Dimension)) != 0;
-        const bool is_folded_dimensions = (component_type & static_cast<uint32_t>(TensorComponentBitmask::FoldedDimensions)) != 0;
+        const bool is_dimension = (component_type & static_cast<uint32_t>(TensorComponentBitmask::Dimension)) != 0;
+        const bool is_folded_dimensions =
+            (component_type & static_cast<uint32_t>(TensorComponentBitmask::FoldedDimensions)) != 0;
 
         constexpr auto bitmask_all     = static_cast<uint32_t>(TensorComponentIndexBitmask::All);
         constexpr auto bitmask_index_0 = static_cast<uint32_t>(TensorComponentIndexBitmask::Index0);
@@ -83,16 +83,16 @@ CLTensorComponent &CLTensorArgument::cl_component(TensorComponentType x)
         CKW_ASSERT(bitmask_index_2 == bitmask_index_3 >> 4);
 
         // If we have a dimension or folded dimensions, we can return the corresponding value if it is not dynamic (not equal to -1)
-        if(is_dimension == true || is_folded_dimensions == true)
+        if (is_dimension == true || is_folded_dimensions == true)
         {
             component_type = component_type & bitmask_all;
 
             int32_t idx = 1;
-            for(int32_t i = 0; i < tensor_component_index_max_count; ++i)
+            for (int32_t i = 0; i < tensor_component_index_max_count; ++i)
             {
                 uint32_t dim_idx = component_type & bitmask_index_0;
 
-                if(dim_idx == 0)
+                if (dim_idx == 0)
                 {
                     // Stop at the first nibble containing 0
                     break;
@@ -104,7 +104,7 @@ CLTensorComponent &CLTensorArgument::cl_component(TensorComponentType x)
                 // Get the dimension value
                 const int32_t dim_val = _info.shape()[dim_idx];
 
-                if(dim_val == kDynamicTensorDimensionValue)
+                if (dim_val == kDynamicTensorDimensionValue)
                 {
                     // We cannot return the dimension by value if it is dynamic.
                     // Therefore, force the idx variable to kDynamicTensorDimensionValue and break the loop.
@@ -118,7 +118,7 @@ CLTensorComponent &CLTensorArgument::cl_component(TensorComponentType x)
                 component_type >>= 4;
             }
 
-            if(idx != kDynamicTensorDimensionValue)
+            if (idx != kDynamicTensorDimensionValue)
             {
                 _components_used.emplace_back(std::make_unique<CLTensorComponent>(*this, x, idx));
 
@@ -141,14 +141,10 @@ TensorStorageVariable &CLTensorArgument::storage(TensorStorageType x)
 {
     // Return the storage if it has already been created.
     {
-        const auto it = std::find_if(
-            _storages_used.begin(), _storages_used.end(),
-            [=](const TensorStorageVariable &item)
-            {
-                return item.type == x;
-            });
+        const auto it = std::find_if(_storages_used.begin(), _storages_used.end(),
+                                     [=](const TensorStorageVariable &item) { return item.type == x; });
 
-        if(it != _storages_used.end())
+        if (it != _storages_used.end())
         {
             return *it;
         }
@@ -167,7 +163,7 @@ std::string CLTensorArgument::create_storage_name(TensorStorageType x) const
 {
     std::string var_name = _basename;
 
-    switch(x)
+    switch (x)
     {
         case TensorStorageType::BufferUint8Ptr:
             var_name += "_ptr";
@@ -198,9 +194,9 @@ std::vector<const ITensorComponent *> CLTensorArgument::components() const
 {
     std::vector<const ITensorComponent *> components;
 
-    for(const auto &component : _components_used)
+    for (const auto &component : _components_used)
     {
-        if(component->is_assignable())
+        if (component->is_assignable())
         {
             components.push_back(component.get());
         }
diff --git a/compute_kernel_writer/src/cl/CLTensorArgument.h b/compute_kernel_writer/src/cl/CLTensorArgument.h
index 4cbbee21ee..35df51422e 100644
--- a/compute_kernel_writer/src/cl/CLTensorArgument.h
+++ b/compute_kernel_writer/src/cl/CLTensorArgument.h
@@ -26,7 +26,9 @@
 
 #include "ckw/types/TensorComponentType.h"
 #include "ckw/types/TensorStorageType.h"
+
 #include "src/ITensor.h"
+
 #include <memory>
 #include <string>
 #include <vector>
@@ -67,7 +69,7 @@ public:
      * unlike @ref CLTensorComponent::component which is for the public API and only returns
      * a reference to a generic @ref ITile object.
      */
-    CLTensorComponent& cl_component(TensorComponentType component_type);
+    CLTensorComponent &cl_component(TensorComponentType component_type);
 
     // Inherited method overridden
     TensorStorageVariable                &storage(TensorStorageType x) override;
@@ -78,7 +80,7 @@ public:
 private:
     std::string create_storage_name(TensorStorageType x) const;
 
-    bool                                            _return_dims_by_value{ false };
+    bool                                            _return_dims_by_value{false};
     std::vector<TensorStorageVariable>              _storages_used{};
     std::vector<std::unique_ptr<CLTensorComponent>> _components_used{};
 };
diff --git a/compute_kernel_writer/src/cl/CLTensorComponent.cpp b/compute_kernel_writer/src/cl/CLTensorComponent.cpp
index c29b307748..dbe2036768 100644
--- a/compute_kernel_writer/src/cl/CLTensorComponent.cpp
+++ b/compute_kernel_writer/src/cl/CLTensorComponent.cpp
@@ -23,8 +23,10 @@
  */
 
 #include "src/cl/CLTensorComponent.h"
+
 #include "ckw/Error.h"
 #include "ckw/types/TensorComponentType.h"
+
 #include "src/cl/CLTensorArgument.h"
 #include "src/cl/CLTile.h"
 
@@ -38,7 +40,7 @@ std::string create_component_name(const std::string &name, TensorComponentType x
 {
     std::string var_name(name);
 
-    switch(x)
+    switch (x)
     {
         case TensorComponentType::OffsetFirstElement:
             var_name += "_offset_first_element";
@@ -93,12 +95,13 @@ std::string create_component_name(const std::string &name, TensorComponentType x
 } // namespace
 
 CLTensorComponent::CLTensorComponent(const CLTensorArgument &tensor, TensorComponentType component_type)
-    : CLTile(create_component_name(tensor.name(), component_type), TileInfo(DataType::Int32)), _component_type(component_type)
+    : CLTile(create_component_name(tensor.name(), component_type), TileInfo(DataType::Int32)),
+      _component_type(component_type)
 {
 }
 
 CLTensorComponent::CLTensorComponent(const CLTensorArgument &tensor, TensorComponentType component_type, int32_t value)
-    : CLTile({ { std::to_string(value) } }, DataType::Int32), _component_type(component_type)
+    : CLTile({{std::to_string(value)}}, DataType::Int32), _component_type(component_type)
 {
     CKW_UNUSED(tensor);
 }
diff --git a/compute_kernel_writer/src/cl/CLTensorComponent.h b/compute_kernel_writer/src/cl/CLTensorComponent.h
index 42a42666dc..731597ebbf 100644
--- a/compute_kernel_writer/src/cl/CLTensorComponent.h
+++ b/compute_kernel_writer/src/cl/CLTensorComponent.h
@@ -26,8 +26,9 @@
 #define CKW_SRC_CL_CLTENSORCOMPONENT_H
 
 #include "ckw/types/TensorComponentType.h"
-#include "src/ITensorComponent.h"
+
 #include "src/cl/CLTile.h"
+#include "src/ITensorComponent.h"
 
 namespace ckw
 {
@@ -72,7 +73,7 @@ public:
     TensorComponentType component_type() const override;
 
 private:
-    TensorComponentType _component_type{ TensorComponentType::Unknown };
+    TensorComponentType _component_type{TensorComponentType::Unknown};
 };
 
 } // namespace ckw
diff --git a/compute_kernel_writer/src/cl/CLTile.cpp b/compute_kernel_writer/src/cl/CLTile.cpp
index 0cce69a9e1..f6e271e813 100644
--- a/compute_kernel_writer/src/cl/CLTile.cpp
+++ b/compute_kernel_writer/src/cl/CLTile.cpp
@@ -21,20 +21,20 @@
  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  * SOFTWARE.
  */
+#include "src/cl/CLTile.h"
+
 #include "ckw/Error.h"
 #include "ckw/TileInfo.h"
 
-#include "src/Helpers.h"
 #include "src/cl/CLHelpers.h"
-#include "src/cl/CLTile.h"
+#include "src/Helpers.h"
 
 #include <algorithm>
 #include <vector>
 
 namespace ckw
 {
-CLTile::CLTile(const std::string &name, const TileInfo &info)
-    : _is_constant(false)
+CLTile::CLTile(const std::string &name, const TileInfo &info) : _is_constant(false)
 {
     validate_tile_info(info);
 
@@ -42,8 +42,7 @@ CLTile::CLTile(const std::string &name, const TileInfo &info)
     _info     = info;
 }
 
-CLTile::CLTile(const TileContainer &vals, DataType dt)
-    : _is_constant(true)
+CLTile::CLTile(const TileContainer &vals, DataType dt) : _is_constant(true)
 {
     const int32_t w = vals[0].size();
     const int32_t h = vals.size();
@@ -56,9 +55,9 @@ CLTile::CLTile(const TileContainer &vals, DataType dt)
 
     _vals = TileContainer(h, std::vector<std::string>(w));
 
-    for(int32_t y = 0; y < h; ++y)
+    for (int32_t y = 0; y < h; ++y)
     {
-        for(int32_t x = 0; x < w; ++x)
+        for (int32_t x = 0; x < w; ++x)
         {
             _vals[y][x] = vals[y][x];
         }
@@ -81,7 +80,7 @@ TileVariable CLTile::scalar(int32_t row, int32_t col) const
     col = clamp(col, static_cast<int32_t>(0), _info.width() - 1);
     row = clamp(row, static_cast<int32_t>(0), _info.height() - 1);
 
-    if(_is_constant)
+    if (_is_constant)
     {
         // We can use the vector method to retrieve the scalar variable stored in the constant tile
         return vector(row, col, 1);
@@ -94,7 +93,7 @@ TileVariable CLTile::scalar(int32_t row, int32_t col) const
         t.desc.len = 1;
 
         // This check is required because if the width has only one element, we cannot use .s0
-        if(_info.width() != 1)
+        if (_info.width() != 1)
         {
             // Automatic broadcasting
             t.str += ".s" + dec_to_hex_as_string(col);
@@ -109,7 +108,7 @@ TileVariable CLTile::vector(int32_t row) const
     // Clamp to nearest valid edge
     row = clamp(row, static_cast<int32_t>(0), _info.height() - 1);
 
-    if(_is_constant)
+    if (_is_constant)
     {
         return vector(row, 0, _info.width());
     }
@@ -138,14 +137,14 @@ TileVariable CLTile::vector(int32_t row, int32_t col_start, int32_t width) const
     t.desc.dt  = _info.data_type();
     t.desc.len = width;
 
-    if(_is_constant)
+    if (_is_constant)
     {
         // The vector has the following form: ((data_typeN)(val0, val1,..., ValN-1))
         t.str = "((" + cl_get_variable_datatype_as_string(t.desc.dt, width) + ")";
         t.str += "(";
 
         int32_t col = col_start;
-        for(; col < width - 1; ++col)
+        for (; col < width - 1; ++col)
         {
             t.str += _vals[row][col];
             t.str += ", ";
@@ -157,10 +156,10 @@ TileVariable CLTile::vector(int32_t row, int32_t col_start, int32_t width) const
     {
         t.str = create_var_name(row);
 
-        if(_info.width() != 1 && _info.width() != width)
+        if (_info.width() != 1 && _info.width() != width)
         {
             t.str += ".s";
-            for(int i = 0; i < width; ++i)
+            for (int i = 0; i < width; ++i)
             {
                 t.str += dec_to_hex_as_string(col_start + i);
             }
@@ -174,11 +173,11 @@ std::vector<TileVariable> CLTile::all() const
 {
     std::vector<TileVariable> vars;
 
-    if(_is_constant)
+    if (_is_constant)
     {
-        for(int32_t y = 0; y < _info.height(); ++y)
+        for (int32_t y = 0; y < _info.height(); ++y)
         {
-            for(int32_t x = 0; x < _info.width(); ++x)
+            for (int32_t x = 0; x < _info.width(); ++x)
             {
                 // We can use the vector method to retrieve all the scalar variables stored in the constant tile
                 TileVariable t = vector(y, x, 1);
@@ -188,7 +187,7 @@ std::vector<TileVariable> CLTile::all() const
     }
     else
     {
-        for(int32_t y = 0; y < _info.height(); ++y)
+        for (int32_t y = 0; y < _info.height(); ++y)
         {
             TileVariable t;
             t.str      = create_var_name(y);
@@ -211,7 +210,7 @@ std::string CLTile::create_var_name(int32_t row) const
     std::string var_name = _basename;
 
     // If a scalar variable, we do not append the row index
-    if(_info.height() > 1)
+    if (_info.height() > 1)
     {
         var_name += "__";
         var_name += std::to_string(row);
@@ -222,7 +221,7 @@ std::string CLTile::create_var_name(int32_t row) const
 
 std::vector<int32_t> CLTile::supported_vector_lengths() const
 {
-    return std::vector<int32_t>{ 1, 2, 3, 4, 8, 16 };
+    return std::vector<int32_t>{1, 2, 3, 4, 8, 16};
 }
 
 void CLTile::validate_tile_info(const TileInfo &info) const
diff --git a/compute_kernel_writer/src/cl/CLTile.h b/compute_kernel_writer/src/cl/CLTile.h
index 1fb0fc9dbe..498cf51034 100644
--- a/compute_kernel_writer/src/cl/CLTile.h
+++ b/compute_kernel_writer/src/cl/CLTile.h
@@ -25,6 +25,7 @@
 #define COMPUTE_KERNEL_WRITER_SRC_CL_CLTILE_H
 
 #include "src/ITile.h"
+
 #include <string>
 
 namespace ckw
@@ -75,9 +76,9 @@ private:
 
     std::string create_var_name(int32_t row) const;
 
-    TileInfo      _info{ DataType::Unknown };
-    std::string   _basename{ "" };
-    bool          _is_constant{ false };
+    TileInfo      _info{DataType::Unknown};
+    std::string   _basename{""};
+    bool          _is_constant{false};
     TileContainer _vals{};
 };
 } // namespace ckw
diff --git a/compute_kernel_writer/src/cl/helpers/CLMemoryOpBufferHelper.cpp b/compute_kernel_writer/src/cl/helpers/CLMemoryOpBufferHelper.cpp
index f906bcd4b1..a98ebed8fa 100644
--- a/compute_kernel_writer/src/cl/helpers/CLMemoryOpBufferHelper.cpp
+++ b/compute_kernel_writer/src/cl/helpers/CLMemoryOpBufferHelper.cpp
@@ -28,20 +28,25 @@
 #include "ckw/types/MemoryOperation.h"
 #include "ckw/types/TensorStorageType.h"
 
-#include "src/ITensor.h"
-#include "src/Tensor3dMapper.h"
 #include "src/cl/CLHelpers.h"
 #include "src/cl/CLKernelWriter.h"
 #include "src/cl/CLTensorArgument.h"
 #include "src/cl/CLTile.h"
+#include "src/ITensor.h"
+#include "src/Tensor3dMapper.h"
 
 namespace ckw
 {
-bool CLMemoryOpBufferHelper::validate(const CLKernelWriter *writer, const ITensor *tensor, const TensorSampler *sampler, const Tensor3dMapper *mapper, MemoryOperation op, const CLTile *dst)
+bool CLMemoryOpBufferHelper::validate(const CLKernelWriter *writer,
+                                      const ITensor        *tensor,
+                                      const TensorSampler  *sampler,
+                                      const Tensor3dMapper *mapper,
+                                      MemoryOperation       op,
+                                      const CLTile         *dst)
 {
     CKW_UNUSED(writer, tensor, mapper, op, dst);
 
-    if(sampler->storage() != TensorStorageType::BufferUint8Ptr)
+    if (sampler->storage() != TensorStorageType::BufferUint8Ptr)
     {
         return false;
     }
@@ -97,15 +102,15 @@ bool CLMemoryOpBufferHelper::validate(const CLKernelWriter *writer, const ITenso
  */
 void CLMemoryOpBufferHelper::initialize(const CLTile *dst, const CLTile *x, const CLTile *z, const CLTile *b)
 {
-    _dst           = dst;
+    _dst = dst;
 
     CKW_ASSERT(validate(_writer, _tensor, _sampler, _mapper.get(), _op, _dst));
 
     _ls_width_full = dst->info().width();
-    _coord_x      = x->scalar(0, 0).str;
-    _coord_z      = z->scalar(0, 0).str;
-    _coord_b      = b->scalar(0, 0).str;
-    _coord_orig_z = _coord_z;
+    _coord_x       = x->scalar(0, 0).str;
+    _coord_z       = z->scalar(0, 0).str;
+    _coord_b       = b->scalar(0, 0).str;
+    _coord_orig_z  = _coord_z;
 
     out_of_bound_initialize_x(_coord_x);
     out_of_bound_initialize_z(_coord_z);
@@ -126,10 +131,10 @@ void CLMemoryOpBufferHelper::write_row(int32_t row_id, const std::string &coord_
     out_of_bound_finalize_y(dst);
 
     // The left over load/store will be written in the finalize stage
-    if(_ls_width_part.size() != 0)
+    if (_ls_width_part.size() != 0)
     {
         int32_t col_start = 0;
-        for(int32_t partial_width : _ls_width_part)
+        for (int32_t partial_width : _ls_width_part)
         {
             const std::string dst       = _dst->vector(row_id, col_start, partial_width).str;
             const std::string coord_x   = _coord_x + " + " + std::to_string(col_start);
@@ -150,13 +155,13 @@ void CLMemoryOpBufferHelper::finalize()
 
 void CLMemoryOpBufferHelper::out_of_bound_initialize_x(const std::string &coord)
 {
-    if(_sampler->address_mode_x() == TensorSamplerAddressModeX::OverlappingMin)
+    if (_sampler->address_mode_x() == TensorSamplerAddressModeX::OverlappingMin)
     {
-        TensorInfo tensor_info = _tensor->info();
-        TensorShape shape      = tensor_info.shape();
+        TensorInfo  tensor_info = _tensor->info();
+        TensorShape shape       = tensor_info.shape();
 
         _ls_width_part = cl_decompose_vector_width(shape[0] % _ls_width_full);
-        if(_ls_width_part.size() != 0)
+        if (_ls_width_part.size() != 0)
         {
             _writer->op_write_raw_code("if(" + coord + " > 0)\n{\n");
         }
@@ -165,14 +170,14 @@ void CLMemoryOpBufferHelper::out_of_bound_initialize_x(const std::string &coord)
 
 void CLMemoryOpBufferHelper::out_of_bound_finalize_x()
 {
-    if(_sampler->address_mode_x() == TensorSamplerAddressModeX::OverlappingMin)
+    if (_sampler->address_mode_x() == TensorSamplerAddressModeX::OverlappingMin)
     {
-        if(_ls_width_part.size() != 0)
+        if (_ls_width_part.size() != 0)
         {
             _writer->op_write_raw_code("}\nelse\n{\n");
 
             out_of_bound_initialize_z(_coord_orig_z);
-            for(LeftoverDescriptor leftover_desc : _leftovers_x)
+            for (LeftoverDescriptor leftover_desc : _leftovers_x)
             {
                 out_of_bound_initialize_y(leftover_desc.coord);
                 _writer->op_write_raw_code(leftover_desc.statement);
@@ -191,7 +196,7 @@ void CLMemoryOpBufferHelper::out_of_bound_initialize_y(const std::string &coord)
 
     const TensorSamplerAddressModeY address_mode_y = _sampler->address_mode_y();
 
-    switch(address_mode_y)
+    switch (address_mode_y)
     {
         case TensorSamplerAddressModeY::ClampToBorderMaxOnly:
             // Not to be moved outside the case because it marks the relevant tensor component as used even if we dont't use the variable
@@ -212,7 +217,7 @@ void CLMemoryOpBufferHelper::out_of_bound_finalize_y(const std::string &dst)
 {
     const TensorSamplerAddressModeY address_mode_y = _sampler->address_mode_y();
 
-    switch(address_mode_y)
+    switch (address_mode_y)
     {
         case TensorSamplerAddressModeY::ClampToBorderMaxOnly:
             _writer->op_write_raw_code("}\nelse\n{\n");
@@ -234,7 +239,7 @@ void CLMemoryOpBufferHelper::out_of_bound_initialize_z(const std::string &coord)
     CKW_UNUSED(coord);
 
     const TensorSamplerAddressModeZ address_mode_z = _sampler->address_mode_z();
-    switch(address_mode_z)
+    switch (address_mode_z)
     {
         case TensorSamplerAddressModeZ::None:
             break;
@@ -247,7 +252,7 @@ void CLMemoryOpBufferHelper::out_of_bound_finalize_z()
 {
     const TensorSamplerAddressModeZ address_mode_z = _sampler->address_mode_z();
 
-    switch(address_mode_z)
+    switch (address_mode_z)
     {
         case TensorSamplerAddressModeZ::None:
             break;
@@ -256,13 +261,15 @@ void CLMemoryOpBufferHelper::out_of_bound_finalize_z()
     }
 }
 
-std::string CLMemoryOpBufferHelper::to_statement(MemoryOperation op, int32_t vector_width, const std::string &data,
-                             const std::string &address) const
+std::string CLMemoryOpBufferHelper::to_statement(MemoryOperation    op,
+                                                 int32_t            vector_width,
+                                                 const std::string &data,
+                                                 const std::string &address) const
 {
-    switch(op)
+    switch (op)
     {
         case MemoryOperation::Load:
-            if(vector_width != 1)
+            if (vector_width != 1)
             {
                 return data + " = vload" + std::to_string(vector_width) + "(0, " + address + ")";
             }
@@ -272,7 +279,7 @@ std::string CLMemoryOpBufferHelper::to_statement(MemoryOperation op, int32_t vec
             }
             break;
         case MemoryOperation::Store:
-            if(vector_width != 1)
+            if (vector_width != 1)
             {
                 return "vstore" + std::to_string(vector_width) + "(" + data + ", 0, " + address + ")";
             }
@@ -288,26 +295,28 @@ std::string CLMemoryOpBufferHelper::to_statement(MemoryOperation op, int32_t vec
     return "";
 }
 
-std::string CLMemoryOpBufferHelper::to_buffer_address(const std::string &x, const std::string &y, const std::string &z,
-                                     const std::string &b) const
+std::string CLMemoryOpBufferHelper::to_buffer_address(const std::string &x,
+                                                      const std::string &y,
+                                                      const std::string &z,
+                                                      const std::string &b) const
 {
     TensorStorageType tensor_storage = _sampler->storage();
     CKW_ASSERT(tensor_storage == TensorStorageType::BufferUint8Ptr);
 
-    const std::string ptr_buf      = _tensor->storage(tensor_storage).val;
-    const std::string dst_type     = cl_data_type_rounded_up_to_valid_vector_width(_dst->info().data_type(), 1);
+    const std::string ptr_buf  = _tensor->storage(tensor_storage).val;
+    const std::string dst_type = cl_data_type_rounded_up_to_valid_vector_width(_dst->info().data_type(), 1);
 
     std::string address;
     address += "(__global ";
     address += dst_type;
     address += "*)(";
     address += ptr_buf;
-    if(x != "0" && (_mapper->dim_x().str != "1"))
+    if (x != "0" && (_mapper->dim_x().str != "1"))
     {
         address += " + (";
         address += x + ") * sizeof(" + dst_type + ")";
     }
-    if(y != "0")
+    if (y != "0")
     {
         const std::string stride_y = _mapper->stride_y().str;
         address += " + (";
@@ -315,7 +324,7 @@ std::string CLMemoryOpBufferHelper::to_buffer_address(const std::string &x, cons
         address += " * ";
         address += stride_y;
     }
-    if(z != "0" && (_mapper->dim_z().str != "1"))
+    if (z != "0" && (_mapper->dim_z().str != "1"))
     {
         const std::string stride_z = _mapper->stride_z().str;
         address += " + (";
@@ -323,7 +332,7 @@ std::string CLMemoryOpBufferHelper::to_buffer_address(const std::string &x, cons
         address += " * ";
         address += stride_z;
     }
-    if(b != "0" && (_mapper->dim_batch().str != "1"))
+    if (b != "0" && (_mapper->dim_batch().str != "1"))
     {
         const std::string stride_b = _mapper->stride_batch().str;
         address += " + (";
diff --git a/compute_kernel_writer/src/cl/helpers/CLMemoryOpBufferHelper.h b/compute_kernel_writer/src/cl/helpers/CLMemoryOpBufferHelper.h
index 9bcd571a81..4e1a842fe1 100644
--- a/compute_kernel_writer/src/cl/helpers/CLMemoryOpBufferHelper.h
+++ b/compute_kernel_writer/src/cl/helpers/CLMemoryOpBufferHelper.h
@@ -27,9 +27,9 @@
 
 #include "src/cl/helpers/ICLMemoryOpHelper.h"
 
+#include <cstdint>
 #include <string>
 #include <vector>
-#include <cstdint>
 
 namespace ckw
 {
@@ -65,20 +65,25 @@ private:
     struct LeftoverDescriptor
     {
         LeftoverDescriptor(const std::string &dst, const std::string &coord, const std::string &statement)
-        : dst(dst), coord(coord), statement(statement)
+            : dst(dst), coord(coord), statement(statement)
         {
         }
 
-        std::string dst{};        // Describes the destination tile or part of it
-        std::string coord{};      // Describes the coordinate to be used in boundary checks
-        std::string statement{};  // Describes the memory operation statement
+        std::string dst{};       // Describes the destination tile or part of it
+        std::string coord{};     // Describes the coordinate to be used in boundary checks
+        std::string statement{}; // Describes the memory operation statement
     };
 
     std::vector<int32_t>            _ls_width_part{};
     std::vector<LeftoverDescriptor> _leftovers_x{};
     std::string                     _coord_orig_z{};
 
-    static bool validate(const CLKernelWriter *writer, const ITensor *tensor, const TensorSampler *sampler, const Tensor3dMapper *mapper, MemoryOperation op, const CLTile *dst);
+    static bool validate(const CLKernelWriter *writer,
+                         const ITensor        *tensor,
+                         const TensorSampler  *sampler,
+                         const Tensor3dMapper *mapper,
+                         MemoryOperation       op,
+                         const CLTile         *dst);
 
     void out_of_bound_initialize_x(const std::string &coord);
     void out_of_bound_finalize_x();
@@ -87,8 +92,10 @@ private:
     void out_of_bound_initialize_z(const std::string &coord);
     void out_of_bound_finalize_z();
 
-    std::string to_statement(MemoryOperation op, int32_t vector_width, const std::string &data, const std::string &address) const;
-    std::string to_buffer_address(const std::string &x, const std::string &y, const std::string &z, const std::string &b) const;
+    std::string
+    to_statement(MemoryOperation op, int32_t vector_width, const std::string &data, const std::string &address) const;
+    std::string
+    to_buffer_address(const std::string &x, const std::string &y, const std::string &z, const std::string &b) const;
 };
 } // namespace ckw
 
diff --git a/compute_kernel_writer/src/cl/helpers/CLMemoryOpImage2dHelper.cpp b/compute_kernel_writer/src/cl/helpers/CLMemoryOpImage2dHelper.cpp
index 55f88f4136..b7d146bdee 100644
--- a/compute_kernel_writer/src/cl/helpers/CLMemoryOpImage2dHelper.cpp
+++ b/compute_kernel_writer/src/cl/helpers/CLMemoryOpImage2dHelper.cpp
@@ -28,11 +28,11 @@
 #include "ckw/types/MemoryOperation.h"
 #include "ckw/types/TensorStorageType.h"
 
-#include "src/ITensor.h"
-#include "src/Tensor3dMapper.h"
 #include "src/cl/CLKernelWriter.h"
 #include "src/cl/CLTensorArgument.h"
 #include "src/cl/CLTile.h"
+#include "src/ITensor.h"
+#include "src/Tensor3dMapper.h"
 
 namespace ckw
 {
@@ -66,31 +66,36 @@ void CLMemoryOpImage2dHelper::finalize()
 {
 }
 
-bool CLMemoryOpImage2dHelper::validate(const CLKernelWriter *writer, const ITensor *tensor, const TensorSampler *sampler, const Tensor3dMapper *mapper, MemoryOperation op, const CLTile *dst)
+bool CLMemoryOpImage2dHelper::validate(const CLKernelWriter *writer,
+                                       const ITensor        *tensor,
+                                       const TensorSampler  *sampler,
+                                       const Tensor3dMapper *mapper,
+                                       MemoryOperation       op,
+                                       const CLTile         *dst)
 {
     CKW_UNUSED(writer, tensor, mapper);
 
-    if(dst->info().width() != 4)
+    if (dst->info().width() != 4)
     {
         return false;
     }
-    if(sampler->address_mode_x() != TensorSamplerAddressModeX::None)
+    if (sampler->address_mode_x() != TensorSamplerAddressModeX::None)
     {
         return false;
     }
-    if(sampler->address_mode_z() != TensorSamplerAddressModeZ::None)
+    if (sampler->address_mode_z() != TensorSamplerAddressModeZ::None)
     {
         return false;
     }
-    if(sampler->storage() != TensorStorageType::Texture2dReadOnly && op == MemoryOperation::Load)
+    if (sampler->storage() != TensorStorageType::Texture2dReadOnly && op == MemoryOperation::Load)
     {
         return false;
     }
-    if(sampler->storage() != TensorStorageType::Texture2dWriteOnly && op == MemoryOperation::Store)
+    if (sampler->storage() != TensorStorageType::Texture2dWriteOnly && op == MemoryOperation::Store)
     {
         return false;
     }
-    if((dst->info().data_type() != DataType::Fp32) && (dst->info().data_type() != DataType::Fp16))
+    if ((dst->info().data_type() != DataType::Fp32) && (dst->info().data_type() != DataType::Fp16))
     {
         return false;
     }
@@ -102,7 +107,7 @@ void CLMemoryOpImage2dHelper::out_of_bound_initialize_y(const std::string &coord
     CKW_UNUSED(coord);
 
     const TensorSamplerAddressModeY address_mode_y = _sampler->address_mode_y();
-    switch(address_mode_y)
+    switch (address_mode_y)
     {
         case TensorSamplerAddressModeY::SkipLessThanZero:
             _writer->op_write_raw_code("if(" + coord + " >= 0)\n{\n");
@@ -118,7 +123,7 @@ void CLMemoryOpImage2dHelper::out_of_bound_initialize_y(const std::string &coord
 void CLMemoryOpImage2dHelper::out_of_bound_finalize_y()
 {
     const TensorSamplerAddressModeY address_mode_y = _sampler->address_mode_y();
-    switch(address_mode_y)
+    switch (address_mode_y)
     {
         case TensorSamplerAddressModeY::SkipLessThanZero:
             _writer->op_write_raw_code("}\n");
@@ -131,15 +136,19 @@ void CLMemoryOpImage2dHelper::out_of_bound_finalize_y()
     }
 }
 
-std::string CLMemoryOpImage2dHelper::to_ls_image2d(MemoryOperation op, int32_t vector_width, const std::string &data, const std::string &sampler, const std::string &address) const
+std::string CLMemoryOpImage2dHelper::to_ls_image2d(MemoryOperation    op,
+                                                   int32_t            vector_width,
+                                                   const std::string &data,
+                                                   const std::string &sampler,
+                                                   const std::string &address) const
 {
     CKW_UNUSED(vector_width);
 
     const TensorStorageType tensor_storage = _sampler->storage();
-    const std::string image2d_obj    = _tensor->storage(tensor_storage).val;
-    const std::string post_fix = _dst->info().data_type() == DataType::Fp32 ? "f" : "h";
+    const std::string       image2d_obj    = _tensor->storage(tensor_storage).val;
+    const std::string       post_fix       = _dst->info().data_type() == DataType::Fp32 ? "f" : "h";
 
-    switch(op)
+    switch (op)
     {
         case MemoryOperation::Load:
             return data + " = read_image" + post_fix + "(" + image2d_obj + ", " + sampler + ", " + address + ")";
@@ -155,7 +164,7 @@ std::string CLMemoryOpImage2dHelper::to_ls_image2d_sampler() const
 {
     const auto address_mode_y = _sampler->address_mode_y();
 
-    switch(address_mode_y)
+    switch (address_mode_y)
     {
         case TensorSamplerAddressModeY::None:
             return "CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_NONE | CLK_FILTER_NEAREST";
@@ -167,17 +176,19 @@ std::string CLMemoryOpImage2dHelper::to_ls_image2d_sampler() const
     }
 }
 
-std::string CLMemoryOpImage2dHelper::to_ls_image2d_address(const std::string &x, const std::string &y, const std::string &z,
+std::string CLMemoryOpImage2dHelper::to_ls_image2d_address(const std::string &x,
+                                                           const std::string &y,
+                                                           const std::string &z,
                                                            const std::string &b) const
 {
     std::string coord_x = "(" + x + ") >> 2";
     std::string coord_y = "(";
 
-    if(y != "0")
+    if (y != "0")
     {
         coord_y += y;
     }
-    if(z != "0" && (_mapper->dim_z().str != "1"))
+    if (z != "0" && (_mapper->dim_z().str != "1"))
     {
         const std::string dim = _mapper->dim_y().str;
         coord_y += " + (";
@@ -185,7 +196,7 @@ std::string CLMemoryOpImage2dHelper::to_ls_image2d_address(const std::string &x,
         coord_y += " * ";
         coord_y += dim;
     }
-    if(b != "0" && (_mapper->dim_batch().str != "1"))
+    if (b != "0" && (_mapper->dim_batch().str != "1"))
     {
         const std::string dim0 = _mapper->dim_y().str;
         const std::string dim1 = _mapper->dim_z().str;
diff --git a/compute_kernel_writer/src/cl/helpers/CLMemoryOpImage2dHelper.h b/compute_kernel_writer/src/cl/helpers/CLMemoryOpImage2dHelper.h
index 73bede7789..fd9b097a24 100644
--- a/compute_kernel_writer/src/cl/helpers/CLMemoryOpImage2dHelper.h
+++ b/compute_kernel_writer/src/cl/helpers/CLMemoryOpImage2dHelper.h
@@ -59,14 +59,24 @@ public:
     void finalize() override;
 
 private:
-    static bool validate(const CLKernelWriter *writer, const ITensor *tensor, const TensorSampler *sampler, const Tensor3dMapper *mapper, MemoryOperation op, const CLTile *dst);
+    static bool validate(const CLKernelWriter *writer,
+                         const ITensor        *tensor,
+                         const TensorSampler  *sampler,
+                         const Tensor3dMapper *mapper,
+                         MemoryOperation       op,
+                         const CLTile         *dst);
 
     void out_of_bound_initialize_y(const std::string &coord);
     void out_of_bound_finalize_y();
 
-    std::string to_ls_image2d(MemoryOperation op, int32_t vector_width, const std::string &data, const std::string &sampler, const std::string &address) const;
+    std::string to_ls_image2d(MemoryOperation    op,
+                              int32_t            vector_width,
+                              const std::string &data,
+                              const std::string &sampler,
+                              const std::string &address) const;
     std::string to_ls_image2d_sampler() const;
-    std::string to_ls_image2d_address(const std::string &x, const std::string &y, const std::string &z, const std::string &b) const;
+    std::string
+    to_ls_image2d_address(const std::string &x, const std::string &y, const std::string &z, const std::string &b) const;
 };
 } // namespace ckw
 
diff --git a/compute_kernel_writer/src/cl/helpers/ICLMemoryOpHelper.h b/compute_kernel_writer/src/cl/helpers/ICLMemoryOpHelper.h
index 7f363431e8..f46fee9750 100644
--- a/compute_kernel_writer/src/cl/helpers/ICLMemoryOpHelper.h
+++ b/compute_kernel_writer/src/cl/helpers/ICLMemoryOpHelper.h
@@ -26,6 +26,7 @@
 #define CKW_SRC_CL_HELPERS_ICLMEMORYOPHELPER_H
 
 #include "ckw/TensorSampler.h"
+
 #include "src/Tensor3dMapper.h"
 
 #include <cstdint>
@@ -98,16 +99,16 @@ public:
     virtual void finalize() = 0;
 
 protected:
-    CLKernelWriter                  *_writer{ nullptr };
-    ITensor                         *_tensor{ nullptr };
-    TensorSampler                   *_sampler{ nullptr };
-    MemoryOperation                  _op;
-    std::unique_ptr<Tensor3dMapper>  _mapper{ nullptr };
-    const CLTile                    *_dst{ nullptr };
-    int32_t                          _ls_width_full{ 0 };
-    std::string                      _coord_x{};
-    std::string                      _coord_z{};
-    std::string                      _coord_b{};
+    CLKernelWriter                 *_writer{nullptr};
+    ITensor                        *_tensor{nullptr};
+    TensorSampler                  *_sampler{nullptr};
+    MemoryOperation                 _op;
+    std::unique_ptr<Tensor3dMapper> _mapper{nullptr};
+    const CLTile                   *_dst{nullptr};
+    int32_t                         _ls_width_full{0};
+    std::string                     _coord_x{};
+    std::string                     _coord_z{};
+    std::string                     _coord_b{};
 };
 } // namespace ckw
 
diff --git a/compute_kernel_writer/src/types/ConstantData.cpp b/compute_kernel_writer/src/types/ConstantData.cpp
index d2155cf55a..67b1103860 100644
--- a/compute_kernel_writer/src/types/ConstantData.cpp
+++ b/compute_kernel_writer/src/types/ConstantData.cpp
@@ -30,52 +30,51 @@ namespace ckw
 {
 namespace
 {
-    template<typename T>
-    inline typename std::enable_if<std::is_same<T, float>::value, std::string>::type to_str(T value)
-    {
-        std::stringstream ss;
-        ss << std::scientific << std::setprecision(std::numeric_limits<T>::max_digits10) << value;
-        return ss.str();
-    }
+template <typename T>
+inline typename std::enable_if<std::is_same<T, float>::value, std::string>::type to_str(T value)
+{
+    std::stringstream ss;
+    ss << std::scientific << std::setprecision(std::numeric_limits<T>::max_digits10) << value;
+    return ss.str();
+}
 
-    template<typename T>
-    inline typename std::enable_if<!std::is_same<T, float>::value && !std::is_same<T, bool>::value, std::string>::type to_str(T value)
-    {
-        return std::to_string(value);
-    }
+template <typename T>
+inline typename std::enable_if<!std::is_same<T, float>::value && !std::is_same<T, bool>::value, std::string>::type
+to_str(T value)
+{
+    return std::to_string(value);
+}
 
-    template<typename T>
-    inline typename std::enable_if<std::is_same<T, bool>::value, std::string>::type to_str(T value)
-    {
-        return std::to_string((int) value);
-    }
+template <typename T>
+inline typename std::enable_if<std::is_same<T, bool>::value, std::string>::type to_str(T value)
+{
+    return std::to_string((int)value);
 }
+} // namespace
 
-template<typename T>
+template <typename T>
 ConstantData::ConstantData(std::initializer_list<std::initializer_list<T>> values, DataType data_type)
     : _data_type(data_type)
 {
     CKW_ASSERT(validate<T>(data_type));
     CKW_ASSERT(values.size() > 0);
 
-    for(auto value_arr: values)
+    for (auto value_arr : values)
     {
         // Each row must have the same number of elements
         CKW_ASSERT(value_arr.size() == (*values.begin()).size());
 
         StringVector vec;
-        std::transform(value_arr.begin(), value_arr.end(),
-            std::back_inserter(vec),
-            [](T val) { return to_str(val); });
+        std::transform(value_arr.begin(), value_arr.end(), std::back_inserter(vec), [](T val) { return to_str(val); });
 
         _values.push_back(std::move(vec));
     }
 }
 
-template<typename T>
+template <typename T>
 bool ConstantData::validate(DataType data_type)
 {
-    switch(data_type)
+    switch (data_type)
     {
         case DataType::Fp32:
         case DataType::Fp16:
@@ -107,7 +106,7 @@ template bool ConstantData::validate<uint32_t>(DataType);
 template bool ConstantData::validate<bool>(DataType);
 template bool ConstantData::validate<float>(DataType);
 
-const std::vector<std::vector<std::string>>& ConstantData::values() const
+const std::vector<std::vector<std::string>> &ConstantData::values() const
 {
     return _values;
 }
author	Felix Thomasmathibalan <felixjohnny.thomasmathibalan@arm.com>	2023-09-27 17:46:17 +0100
committer	felixjohnny.thomasmathibalan <felixjohnny.thomasmathibalan@arm.com>	2023-09-28 12:08:05 +0000
commit	afd38f0c617d6f89b2b4532c6c44f116617e2b6f (patch)
tree	03bc7d5a762099989b16a656fa8d397b490ed70e /compute_kernel_writer
parent	bdcb4c148ee2fdeaaddf4cf1e57bbb0de02bb894 (diff)
download	ComputeLibrary-afd38f0c617d6f89b2b4532c6c44f116617e2b6f.tar.gz