1 files changed, 23 insertions, 21 deletions
diff --git a/src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwCast.cpp b/src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwCast.cpp
index 6ecf2bac44..e8e5087633 100644
--- a/src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwCast.cpp
+++ b/src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwCast.cpp
@@ -24,16 +24,18 @@
 #include "GpuCkwCast.h"
 
 #include "arm_compute/core/Error.h"
-#include "arm_compute/core/Validate.h"
 #include "arm_compute/core/utils/helpers/AdjustVecSize.h"
+#include "arm_compute/core/Validate.h"
 #include "ckw/TensorTileSampler.h"
+
 #include "src/core/helpers/WindowHelpers.h"
-#include "src/dynamic_fusion/sketch/gpu/GpuKernelArgument.h"
-#include "src/dynamic_fusion/sketch/gpu/GpuKernelComponentGroup.h"
+#include "src/dynamic_fusion/sketch/gpu/ckw_driver/components/utils/type_converter/Common.h"
 #include "src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwKernelWriter.h"
 #include "src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwScopedKernelWriter.h"
 #include "src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwVariableTable.h"
-#include "src/dynamic_fusion/sketch/gpu/ckw_driver/components/utils/type_converter/Common.h"
+#include "src/dynamic_fusion/sketch/gpu/GpuKernelArgument.h"
+#include "src/dynamic_fusion/sketch/gpu/GpuKernelComponentGroup.h"
+
 #include <string>
 
 using namespace ckw;
@@ -84,30 +86,29 @@ inline TensorTileSampler create_sampler(GpuCkwScopedKernelWriter &writer, int32_
 }
 } // namespace
 
-GpuCkwCast::GpuCkwCast(ComponentId                      id,
-                       const ArgumentPack<ITensorInfo> &tensors,
-                       const Attributes                &attributes)
-    : IGpuCkwComponentDriver{ id, tensors },
-      _src{},
-      _dst{},
-      _attributes{ attributes }
+GpuCkwCast::GpuCkwCast(ComponentId id, const ArgumentPack<ITensorInfo> &tensors, const Attributes &attributes)
+    : IGpuCkwComponentDriver{id, tensors}, _src{}, _dst{}, _attributes{attributes}
 {
     _src = this->tensors().get_const_tensor(TensorType::ACL_SRC_0);
     _dst = this->tensors().get_const_tensor(TensorType::ACL_DST_0);
     ARM_COMPUTE_ERROR_ON_NULLPTR(_src, _dst);
 }
 
-void GpuCkwCast::write_component_code(const ComponentGroup &comp_group, GpuCkwVariableTable &vtable, GpuCkwScopedKernelWriter writer) const
+void GpuCkwCast::write_component_code(const ComponentGroup    &comp_group,
+                                      GpuCkwVariableTable     &vtable,
+                                      GpuCkwScopedKernelWriter writer) const
 {
     const auto         root_window = comp_group.get_root_component()->ckw_component_driver()->get_window();
     const unsigned int n0          = root_window.x().step();
     const unsigned int m0          = root_window.y().step();
 
-    GpuCkwComponentArgument *src = vtable.declare_variable(comp_group, writer, _src, TensorStorageType::ClBufferUint8Ptr, "src");
-    GpuCkwComponentArgument *dst = vtable.declare_variable(comp_group, writer, _dst, TensorStorageType::ClBufferUint8Ptr, "dst");
+    GpuCkwComponentArgument *src =
+        vtable.declare_variable(comp_group, writer, _src, TensorStorageType::ClBufferUint8Ptr, "src");
+    GpuCkwComponentArgument *dst =
+        vtable.declare_variable(comp_group, writer, _dst, TensorStorageType::ClBufferUint8Ptr, "dst");
 
     // Load the source tile and prepare the sampler.
-    if(!src->has_tile())
+    if (!src->has_tile())
     {
         const auto sampler = create_sampler(writer, m0, n0);
         writer->op_load_once(src, sampler);
@@ -122,7 +123,7 @@ void GpuCkwCast::write_component_code(const ComponentGroup &comp_group, GpuCkwVa
     const auto &sampler  = src->tile_sampler();
 
     // Prepare the output tile.
-    if(!dst->has_tile())
+    if (!dst->has_tile())
     {
         // Get Target datatype and convert it to ckw::DataType.
         ckw::DataType target_dt = dynamic_fusion::to_ckw(_attributes.data_type());
@@ -143,7 +144,7 @@ void GpuCkwCast::write_component_code(const ComponentGroup &comp_group, GpuCkwVa
     const size_t dst_size  = data_size_from_type(_dst->data_type());
     const bool   cast_down = (src_size >= dst_size);
 
-    if(cast_down && is_data_type_quantized(_src->data_type()))
+    if (cast_down && is_data_type_quantized(_src->data_type()))
     {
         const auto &constant_x80 = writer->declare_tile("0x80", 0x80);
         writer->op_binary_expression(src_tile, src_tile, BinaryOp::BitwiseXOR, constant_x80);
@@ -151,7 +152,7 @@ void GpuCkwCast::write_component_code(const ComponentGroup &comp_group, GpuCkwVa
 
     ckw::ConvertPolicy convert_policy = ckw::ConvertPolicy::None;
 
-    if(cast_down && (is_data_type_float(_src->data_type()) || _attributes.convert_policy() == ConvertPolicy::SATURATE))
+    if (cast_down && (is_data_type_float(_src->data_type()) || _attributes.convert_policy() == ConvertPolicy::SATURATE))
     {
         convert_policy = ckw::ConvertPolicy::Saturate;
     }
@@ -167,9 +168,10 @@ Window GpuCkwCast::get_window() const
     // Collapse Dim 1 (W) and Dim 2 (H) together, leave Dim 0 (C) unchanged
     // This is in line with the collapsing convention used by operators like Conv2d
     output_shape.collapse(2U, 1U);
-    constexpr unsigned int vector_size_byte_opencl           = 16;
-    const unsigned int     num_elems_processed_per_iteration = adjust_vec_size(vector_size_byte_opencl / _dst->element_size(), _dst->dimension(0));
-    Window                 win                               = calculate_max_window(output_shape, Steps(num_elems_processed_per_iteration));
+    constexpr unsigned int vector_size_byte_opencl = 16;
+    const unsigned int     num_elems_processed_per_iteration =
+        adjust_vec_size(vector_size_byte_opencl / _dst->element_size(), _dst->dimension(0));
+    Window win = calculate_max_window(output_shape, Steps(num_elems_processed_per_iteration));
 
     return win;
 }