From afd38f0c617d6f89b2b4532c6c44f116617e2b6f Mon Sep 17 00:00:00 2001 From: Felix Thomasmathibalan Date: Wed, 27 Sep 2023 17:46:17 +0100 Subject: Apply clang-format on repository Code is formatted as per a revised clang format configuration file(not part of this delivery). Version 14.0.6 is used. Exclusion List: - files with .cl extension - files that are not strictly C/C++ (e.g. Android.bp, Sconscript ...) And the following directories - compute_kernel_writer/validation/ - tests/ - include/ - src/core/NEON/kernels/convolution/ - src/core/NEON/kernels/arm_gemm/ - src/core/NEON/kernels/arm_conv/ - data/ There will be a follow up for formatting of .cl files and the files under tests/ and compute_kernel_writer/validation/. Signed-off-by: Felix Thomasmathibalan Change-Id: Ib7eb1fcf4e7537b9feaefcfc15098a804a3fde0a Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/10391 Benchmark: Arm Jenkins Tested-by: Arm Jenkins Reviewed-by: Gunes Bayir --- compute_kernel_writer/include/ckw/Error.h | 24 +- compute_kernel_writer/include/ckw/Kernel.h | 1 + compute_kernel_writer/include/ckw/KernelArgument.h | 3 +- compute_kernel_writer/include/ckw/KernelWriter.h | 85 ++- compute_kernel_writer/include/ckw/TensorInfo.h | 9 +- compute_kernel_writer/include/ckw/TensorSampler.h | 23 +- compute_kernel_writer/include/ckw/TileInfo.h | 2 +- .../include/ckw/types/ConstantData.h | 10 +- .../include/ckw/types/MemoryOperation.h | 10 +- .../include/ckw/types/TensorSamplerTypes.h | 4 +- .../prototype/examples/add_exp_store.cpp | 39 +- .../examples/common/ExampleComponentArgument.cpp | 8 +- .../examples/common/ExampleComponentArgument.h | 4 +- .../examples/common/ExampleKernelWriter.cpp | 13 +- .../examples/common/ExampleScopedKernelWriter.cpp | 1 + .../prototype/examples/writer_helper.cpp | 31 +- .../prototype/include/ckw/Error.h | 7 +- .../prototype/include/ckw/KernelArgument.h | 3 +- .../prototype/include/ckw/KernelWriter.h | 32 +- .../prototype/include/ckw/KernelWriterHelper.h | 122 ++-- .../prototype/include/ckw/OperandBase.h | 1 + .../prototype/include/ckw/ScalarValue.h | 8 +- .../prototype/include/ckw/TensorInfo.h | 8 +- .../prototype/include/ckw/TensorOperand.h | 26 +- .../prototype/include/ckw/TensorTileSampler.h | 56 +- .../prototype/include/ckw/TileInfo.h | 2 +- .../prototype/include/ckw/types/Functions.h | 20 +- .../prototype/include/ckw/types/Operators.h | 4 +- .../include/ckw/types/TensorSamplerTypes.h | 40 +- compute_kernel_writer/prototype/src/Kernel.cpp | 19 +- .../prototype/src/KernelArgument.cpp | 4 +- .../prototype/src/KernelWriter.cpp | 82 +-- .../prototype/src/OperandBase.cpp | 3 +- compute_kernel_writer/prototype/src/Prototype.h | 690 +++++++++++---------- .../prototype/src/TensorOperand.cpp | 14 +- .../prototype/src/TensorTileSampler.cpp | 60 +- compute_kernel_writer/prototype/src/TileInfo.cpp | 9 +- .../prototype/src/TileOperand.cpp | 28 +- compute_kernel_writer/src/Error.cpp | 6 +- compute_kernel_writer/src/Helpers.cpp | 6 +- compute_kernel_writer/src/ITensorArgument.h | 9 +- compute_kernel_writer/src/ITensorComponent.h | 1 + compute_kernel_writer/src/ITile.h | 8 +- compute_kernel_writer/src/Kernel.cpp | 1 + compute_kernel_writer/src/KernelArgument.cpp | 1 + compute_kernel_writer/src/KernelWriter.cpp | 8 +- compute_kernel_writer/src/Tensor3dMapper.cpp | 26 +- compute_kernel_writer/src/Tensor3dMapper.h | 4 +- compute_kernel_writer/src/TensorOperand.cpp | 6 +- compute_kernel_writer/src/TensorSampler.cpp | 6 +- compute_kernel_writer/src/TensorUtils.cpp | 13 +- compute_kernel_writer/src/TileInfo.cpp | 9 +- compute_kernel_writer/src/TileOperand.cpp | 8 +- compute_kernel_writer/src/TileView.h | 10 +- compute_kernel_writer/src/cl/CLHelpers.cpp | 77 +-- compute_kernel_writer/src/cl/CLKernelWriter.cpp | 282 +++++---- compute_kernel_writer/src/cl/CLKernelWriter.h | 104 +++- compute_kernel_writer/src/cl/CLTensorArgument.cpp | 50 +- compute_kernel_writer/src/cl/CLTensorArgument.h | 6 +- compute_kernel_writer/src/cl/CLTensorComponent.cpp | 9 +- compute_kernel_writer/src/cl/CLTensorComponent.h | 5 +- compute_kernel_writer/src/cl/CLTile.cpp | 41 +- compute_kernel_writer/src/cl/CLTile.h | 7 +- .../src/cl/helpers/CLMemoryOpBufferHelper.cpp | 79 +-- .../src/cl/helpers/CLMemoryOpBufferHelper.h | 23 +- .../src/cl/helpers/CLMemoryOpImage2dHelper.cpp | 51 +- .../src/cl/helpers/CLMemoryOpImage2dHelper.h | 16 +- .../src/cl/helpers/ICLMemoryOpHelper.h | 21 +- compute_kernel_writer/src/types/ConstantData.cpp | 49 +- 69 files changed, 1390 insertions(+), 1057 deletions(-) (limited to 'compute_kernel_writer') diff --git a/compute_kernel_writer/include/ckw/Error.h b/compute_kernel_writer/include/ckw/Error.h index 7da9544b9e..6b80778957 100644 --- a/compute_kernel_writer/include/ckw/Error.h +++ b/compute_kernel_writer/include/ckw/Error.h @@ -53,7 +53,7 @@ create_error_msg(const std::string &file, const std::string &func, const std::st const std::string arg2(std::to_string(__LINE__)); \ const std::string arg3(msg); \ std::runtime_error(create_error_msg(arg0, arg1, arg2, arg3)); \ - } while(false) + } while (false) /** Mark the variables as unused. * @@ -74,16 +74,16 @@ inline void ignore_unused(T &&...) * * @param[in] msg The error message. */ -#define CKW_THROW_MSG(msg) \ - do \ - { \ - const std::string file(__FILE__); \ - const std::string func(__func__); \ - const std::string line(std::to_string(__LINE__)); \ - const std::string message(msg); \ - \ +#define CKW_THROW_MSG(msg) \ + do \ + { \ + const std::string file(__FILE__); \ + const std::string func(__func__); \ + const std::string line(std::to_string(__LINE__)); \ + const std::string message(msg); \ + \ throw std::runtime_error(ckw::create_error_msg(file, func, line, message)); \ - } while(false) + } while (false) #ifdef COMPUTE_KERNEL_WRITER_ASSERTS_ENABLED @@ -95,11 +95,11 @@ inline void ignore_unused(T &&...) #define CKW_ASSERT_MSG(cond, msg) \ do \ { \ - if(!(cond)) \ + if (!(cond)) \ { \ CKW_THROW_MSG(msg); \ } \ - } while(false) + } while (false) #else // COMPUTE_KERNEL_WRITER_ASSERTS_ENABLED diff --git a/compute_kernel_writer/include/ckw/Kernel.h b/compute_kernel_writer/include/ckw/Kernel.h index dc0cad5503..f9b7bbb82e 100644 --- a/compute_kernel_writer/include/ckw/Kernel.h +++ b/compute_kernel_writer/include/ckw/Kernel.h @@ -26,6 +26,7 @@ #define CKW_INCLUDE_CKW_KERNEL_H #include "ckw/KernelArgument.h" + #include #include diff --git a/compute_kernel_writer/include/ckw/KernelArgument.h b/compute_kernel_writer/include/ckw/KernelArgument.h index 530e2920eb..7e9bcbf1ee 100644 --- a/compute_kernel_writer/include/ckw/KernelArgument.h +++ b/compute_kernel_writer/include/ckw/KernelArgument.h @@ -27,6 +27,7 @@ #include "ckw/types/TensorComponentType.h" #include "ckw/types/TensorStorageType.h" + #include namespace ckw @@ -90,7 +91,7 @@ private: TensorComponentType tensor_component_type; }; - SubId _sub_id{ 0 }; + SubId _sub_id{0}; }; } // namespace ckw diff --git a/compute_kernel_writer/include/ckw/KernelWriter.h b/compute_kernel_writer/include/ckw/KernelWriter.h index 15c99fe652..0d739e859a 100644 --- a/compute_kernel_writer/include/ckw/KernelWriter.h +++ b/compute_kernel_writer/include/ckw/KernelWriter.h @@ -115,7 +115,8 @@ public: * @param[in] first The first source tile. * @param[in] second The second source tile. */ - virtual void op_binary(const TileOperand &dst, BinaryOp op, const TileOperand &first, const TileOperand &second) = 0; + virtual void + op_binary(const TileOperand &dst, BinaryOp op, const TileOperand &first, const TileOperand &second) = 0; /** Write ternary expression statement: ` = (, , );`. * @@ -125,7 +126,11 @@ public: * @param[in] second The second source tile. * @param[in] third The third source tile. */ - virtual void op_ternary(const TileOperand &dst, TernaryOp op, const TileOperand &first, const TileOperand &second, const TileOperand &third) = 0; + virtual void op_ternary(const TileOperand &dst, + TernaryOp op, + const TileOperand &first, + const TileOperand &second, + const TileOperand &third) = 0; // ============================================================================================= // Flow control @@ -138,7 +143,8 @@ public: * @param[in] rhs The RHS tile of the condition. * @param[in] body The function that writes the body of the if block. */ - virtual void op_if(const TileOperand &lhs, BinaryOp op, const TileOperand &rhs, const std::function &body) = 0; + virtual void + op_if(const TileOperand &lhs, BinaryOp op, const TileOperand &rhs, const std::function &body) = 0; /** Write else-if block: `else if( ) { }`. * @@ -147,7 +153,8 @@ public: * @param[in] rhs The RHS tile of the condition. * @param[in] body The function that writes the body of the else-if block. */ - virtual void op_else_if(const TileOperand &lhs, BinaryOp op, const TileOperand &rhs, const std::function &body) = 0; + virtual void + op_else_if(const TileOperand &lhs, BinaryOp op, const TileOperand &rhs, const std::function &body) = 0; /** Write an else block: `else { }`. * @@ -165,10 +172,13 @@ public: * @param[in] update_value The value which is updated at every iteration. * @param[in] body The function that writes the body of the for-loop block. */ - virtual void op_for_loop( - const TileOperand &var, BinaryOp cond_op, const TileOperand &cond_value, - const TileOperand &update_var, AssignmentOp update_op, const TileOperand &update_value, - const std::function &body) = 0; + virtual void op_for_loop(const TileOperand &var, + BinaryOp cond_op, + const TileOperand &cond_value, + const TileOperand &update_var, + AssignmentOp update_op, + const TileOperand &update_value, + const std::function &body) = 0; /** Write the return statement. */ virtual void op_return() = 0; @@ -271,9 +281,13 @@ public: * @param[in] z z-coordinate * @param[in] batch batch */ - virtual void op_load( - const TileOperand &tile_op, const TensorOperand &tensor_op, TensorSampler &sampler, - const TileOperand &x, const TileOperand &y, const TileOperand &z, const TileOperand &batch) = 0; + virtual void op_load(const TileOperand &tile_op, + const TensorOperand &tensor_op, + TensorSampler &sampler, + const TileOperand &x, + const TileOperand &y, + const TileOperand &z, + const TileOperand &batch) = 0; /** Load the data from the tensor memory to the tile in a dilated way using the sampling information. * @@ -282,27 +296,41 @@ public: * @param[in] dilation_x Dilation while reading in x-dimension * @param[in] dilation_y Dilation while reading in y-dimension */ - virtual void op_load_dilated( - const TileOperand &tile_op, const TensorOperand &tensor_op, TensorSampler &sampler, - const TileOperand &x, const TileOperand &y, const TileOperand &z, const TileOperand &batch, - const TileOperand &dilation_x, const TileOperand &dilation_y) = 0; + virtual void op_load_dilated(const TileOperand &tile_op, + const TensorOperand &tensor_op, + TensorSampler &sampler, + const TileOperand &x, + const TileOperand &y, + const TileOperand &z, + const TileOperand &batch, + const TileOperand &dilation_x, + const TileOperand &dilation_y) = 0; /** Store the data to the tensor memory from the tile using the sampling information. * * Similar to @ref KernelWriter::op_load() */ - virtual void op_store( - const TensorOperand &tensor_op, const TileOperand &tile_op, TensorSampler &sampler, - const TileOperand &x, const TileOperand &y, const TileOperand &z, const TileOperand &batch) = 0; + virtual void op_store(const TensorOperand &tensor_op, + const TileOperand &tile_op, + TensorSampler &sampler, + const TileOperand &x, + const TileOperand &y, + const TileOperand &z, + const TileOperand &batch) = 0; /** Store the data to the tensor memory from the tile in a dilated way using the sampling information. * * Similar to @ref KernelWriter::op_load_dilated() */ - virtual void op_store_dilated( - const TensorOperand &tensor_op, const TileOperand &tile_op, TensorSampler &sampler, - const TileOperand &x, const TileOperand &y, const TileOperand &z, const TileOperand &batch, - const TileOperand &dilation_x, const TileOperand &dilation_y) = 0; + virtual void op_store_dilated(const TensorOperand &tensor_op, + const TileOperand &tile_op, + TensorSampler &sampler, + const TileOperand &x, + const TileOperand &y, + const TileOperand &z, + const TileOperand &batch, + const TileOperand &dilation_x, + const TileOperand &dilation_y) = 0; /** Load the data from the tensor memory to the tile using the indirect buffer approach and respecting the sampling information. * @@ -314,8 +342,13 @@ public: * @param[in] z z-coordinate * @param[in] batch batch */ - virtual void op_load_indirect(const TileOperand &tile_op, const TensorOperand &tensor_op, TensorSampler &sampler, - const TileOperand &x, const TileOperand &y, const TileOperand &z, const TileOperand &batch_op) = 0; + virtual void op_load_indirect(const TileOperand &tile_op, + const TensorOperand &tensor_op, + TensorSampler &sampler, + const TileOperand &x, + const TileOperand &y, + const TileOperand &z, + const TileOperand &batch_op) = 0; protected: // ============================================================================================= @@ -373,8 +406,8 @@ protected: static DataType get_data_type(const ConstantData &data); private: - int32_t _id_space{ 0 }; - int32_t _last_created_id_space{ 0 }; + int32_t _id_space{0}; + int32_t _last_created_id_space{0}; }; } // namespace ckw diff --git a/compute_kernel_writer/include/ckw/TensorInfo.h b/compute_kernel_writer/include/ckw/TensorInfo.h index 87cf7c1426..5c87cb5b12 100644 --- a/compute_kernel_writer/include/ckw/TensorInfo.h +++ b/compute_kernel_writer/include/ckw/TensorInfo.h @@ -27,6 +27,7 @@ #include "ckw/types/DataType.h" #include "ckw/types/TensorDataLayout.h" + #include #include @@ -85,10 +86,10 @@ public: int32_t id() const; private: - TensorShape _shape{ { 0 } }; - DataType _dt{ DataType::Unknown }; - TensorDataLayout _dl{ TensorDataLayout::Unknown }; - int32_t _id{ -1 }; + TensorShape _shape{{0}}; + DataType _dt{DataType::Unknown}; + TensorDataLayout _dl{TensorDataLayout::Unknown}; + int32_t _id{-1}; }; } // namespace ckw diff --git a/compute_kernel_writer/include/ckw/TensorSampler.h b/compute_kernel_writer/include/ckw/TensorSampler.h index 1b51636edb..117e8de2cf 100644 --- a/compute_kernel_writer/include/ckw/TensorSampler.h +++ b/compute_kernel_writer/include/ckw/TensorSampler.h @@ -25,8 +25,8 @@ #ifndef CKW_INCLUDE_CKW_TENSORSAMPLER_H #define CKW_INCLUDE_CKW_TENSORSAMPLER_H -#include "ckw/types/TensorStorageType.h" #include "ckw/types/TensorSamplerTypes.h" +#include "ckw/types/TensorStorageType.h" namespace ckw { @@ -53,12 +53,11 @@ public: * @param[in] address_mode_y The address mode of the y dimension. * @param[in] address_mode_z The address mode of the z dimension. */ - TensorSampler( - TensorStorageType storage, - TensorSamplerFormat format, - TensorSamplerAddressModeX address_mode_x, - TensorSamplerAddressModeY address_mode_y, - TensorSamplerAddressModeZ address_mode_z); + TensorSampler(TensorStorageType storage, + TensorSamplerFormat format, + TensorSamplerAddressModeX address_mode_x, + TensorSamplerAddressModeY address_mode_y, + TensorSamplerAddressModeZ address_mode_z); /** Get the storage for the tensor */ TensorStorageType storage() const; @@ -91,11 +90,11 @@ public: TensorSampler &address_mode_z(TensorSamplerAddressModeZ address_mode_z); private: - TensorStorageType _storage { TensorStorageType::BufferUint8Ptr }; - TensorSamplerFormat _format { TensorSamplerFormat::Unknown }; - TensorSamplerAddressModeX _address_mode_x { TensorSamplerAddressModeX::Unknown }; - TensorSamplerAddressModeY _address_mode_y { TensorSamplerAddressModeY::Unknown }; - TensorSamplerAddressModeZ _address_mode_z { TensorSamplerAddressModeZ::Unknown }; + TensorStorageType _storage{TensorStorageType::BufferUint8Ptr}; + TensorSamplerFormat _format{TensorSamplerFormat::Unknown}; + TensorSamplerAddressModeX _address_mode_x{TensorSamplerAddressModeX::Unknown}; + TensorSamplerAddressModeY _address_mode_y{TensorSamplerAddressModeY::Unknown}; + TensorSamplerAddressModeZ _address_mode_z{TensorSamplerAddressModeZ::Unknown}; }; } // namespace ckw diff --git a/compute_kernel_writer/include/ckw/TileInfo.h b/compute_kernel_writer/include/ckw/TileInfo.h index b8094f79bf..678bb7aaf6 100644 --- a/compute_kernel_writer/include/ckw/TileInfo.h +++ b/compute_kernel_writer/include/ckw/TileInfo.h @@ -83,7 +83,7 @@ public: DataType data_type() const; private: - DataType _dt{ DataType::Unknown }; + DataType _dt{DataType::Unknown}; TileShape _shape{}; }; diff --git a/compute_kernel_writer/include/ckw/types/ConstantData.h b/compute_kernel_writer/include/ckw/types/ConstantData.h index 95425b2c65..7708818ca8 100644 --- a/compute_kernel_writer/include/ckw/types/ConstantData.h +++ b/compute_kernel_writer/include/ckw/types/ConstantData.h @@ -45,12 +45,12 @@ class KernelWriter; class ConstantData { - using String = std::string; + using String = std::string; using StringVector = std::vector; public: /** Templated constructor */ - template + template ConstantData(std::initializer_list> values, DataType data_type); private: @@ -60,14 +60,14 @@ private: * * @return true if user provided data type and the template type are conformant */ - template + template bool validate(DataType data_type); /** Get the constant data as a 2d vector of string values * * @return a 2d vector of strings that has the string-converted values */ - const std::vector& values() const; + const std::vector &values() const; /** Get the underlying data type of the constant values * @@ -81,7 +81,7 @@ private: private: // Data members std::vector _values{}; - DataType _data_type{}; + DataType _data_type{}; }; } // namespace ckw diff --git a/compute_kernel_writer/include/ckw/types/MemoryOperation.h b/compute_kernel_writer/include/ckw/types/MemoryOperation.h index 0466b82df2..f93f60c51a 100644 --- a/compute_kernel_writer/include/ckw/types/MemoryOperation.h +++ b/compute_kernel_writer/include/ckw/types/MemoryOperation.h @@ -27,11 +27,11 @@ namespace ckw { - enum class MemoryOperation - { - Load = 1, - Store = 2 - }; +enum class MemoryOperation +{ + Load = 1, + Store = 2 +}; } // namespace ckw #endif /* CKW_INCLUDE_CKW_TYPES_MEMORYOPERATION */ diff --git a/compute_kernel_writer/include/ckw/types/TensorSamplerTypes.h b/compute_kernel_writer/include/ckw/types/TensorSamplerTypes.h index 43dce1d4e4..512d0b4501 100644 --- a/compute_kernel_writer/include/ckw/types/TensorSamplerTypes.h +++ b/compute_kernel_writer/include/ckw/types/TensorSamplerTypes.h @@ -75,8 +75,8 @@ enum class TensorSamplerAddressModeY : int32_t */ enum class TensorSamplerAddressModeZ : int32_t { - Unknown = 0, - None = 1, + Unknown = 0, + None = 1, }; } // namespace ckw diff --git a/compute_kernel_writer/prototype/examples/add_exp_store.cpp b/compute_kernel_writer/prototype/examples/add_exp_store.cpp index 6a9884543c..2b640ca01b 100644 --- a/compute_kernel_writer/prototype/examples/add_exp_store.cpp +++ b/compute_kernel_writer/prototype/examples/add_exp_store.cpp @@ -32,7 +32,6 @@ #include "common/ExampleComponentArgument.h" #include "common/ExampleKernelWriter.h" #include "common/ExampleScopedKernelWriter.h" - #include #include @@ -78,14 +77,14 @@ void op_binary_elementwise(ExampleScopedKernelWriter writer, std::vectorhas_tile() && !rhs->has_tile()) + if (!lhs->has_tile() && !rhs->has_tile()) { const auto sampler = create_simple_sampler(writer); writer->op_load_once(lhs, sampler); writer->op_load_once(rhs, sampler); } - else if(lhs->has_tile()) + else if (lhs->has_tile()) { const auto &sampler = lhs->tile_sampler(); writer->op_load_once(rhs, sampler); @@ -101,7 +100,7 @@ void op_binary_elementwise(ExampleScopedKernelWriter writer, std::vectortile_sampler(); // Prepare the output tile. - if(!dst->has_tile()) + if (!dst->has_tile()) { auto &tile = writer->declare_tile("dst_tile", lhs_tile.tile_info()); dst->init_virtual_tensor(tile, sampler); @@ -119,7 +118,7 @@ void op_exp(ExampleScopedKernelWriter writer, std::vectorhas_tile()) + if (!src->has_tile()) { const auto sampler = create_simple_sampler(writer); writer->op_load_once(src, sampler); @@ -129,7 +128,7 @@ void op_exp(ExampleScopedKernelWriter writer, std::vectortile_sampler(); // Prepare the output tile. - if(!dst->has_tile()) + if (!dst->has_tile()) { auto &tile = writer->declare_tile("dst_tile", src_tile.tile_info()); dst->init_virtual_tensor(tile, sampler); @@ -160,34 +159,38 @@ int main() ExampleScopedKernelWriter writer(&root_writer); - const TensorInfo src0_info(DataType::Fp32, TensorShape({ 3, 10, 20, 1, 1 }), TensorDataLayout::Nhwc, 0); - const TensorInfo src1_info(DataType::Fp32, TensorShape({ 3, 10, 20, 1, 1 }), TensorDataLayout::Nhwc, 1); - const TensorInfo dst_info(DataType::Fp32, TensorShape({ 3, 10, 20, 1, 1 }), TensorDataLayout::Nhwc, 2); + const TensorInfo src0_info(DataType::Fp32, TensorShape({3, 10, 20, 1, 1}), TensorDataLayout::Nhwc, 0); + const TensorInfo src1_info(DataType::Fp32, TensorShape({3, 10, 20, 1, 1}), TensorDataLayout::Nhwc, 1); + const TensorInfo dst_info(DataType::Fp32, TensorShape({3, 10, 20, 1, 1}), TensorDataLayout::Nhwc, 2); - ExampleComponentArgument src0(writer->declare_tensor_argument("src0", src0_info, TensorStorageType::BufferUint8Ptr)); - ExampleComponentArgument src1(writer->declare_tensor_argument("src1", src1_info, TensorStorageType::BufferUint8Ptr)); + ExampleComponentArgument src0( + writer->declare_tensor_argument("src0", src0_info, TensorStorageType::BufferUint8Ptr)); + ExampleComponentArgument src1( + writer->declare_tensor_argument("src1", src1_info, TensorStorageType::BufferUint8Ptr)); ExampleComponentArgument dst(writer->declare_tensor_argument("dst", dst_info, TensorStorageType::BufferUint8Ptr)); ExampleComponentArgument ans; - op_binary_elementwise(writer, { &src0, &src1, &ans }); - op_exp(writer, { &ans, &ans }); - op_store(writer, { &ans, &dst }); + op_binary_elementwise(writer, {&src0, &src1, &ans}); + op_exp(writer, {&ans, &ans}); + op_store(writer, {&ans, &dst}); const auto arguments = kernel.arguments(); std::cout << "\n====================\nArguments:\n====================\n"; - for(auto &arg : arguments) + for (auto &arg : arguments) { - switch(arg.type()) + switch (arg.type()) { case ckw::KernelArgument::Type::TensorStorage: - std::cout << "* Tensor storage: ID = " << arg.id() << ", type = " << std::hex << "0x" << static_cast(arg.tensor_storage_type()) << std::dec << "\n"; + std::cout << "* Tensor storage: ID = " << arg.id() << ", type = " << std::hex << "0x" + << static_cast(arg.tensor_storage_type()) << std::dec << "\n"; break; case ckw::KernelArgument::Type::TensorComponent: - std::cout << "* Tensor component: ID = " << arg.id() << ", type = " << std::hex << "0x" << static_cast(arg.tensor_component_type()) << std::dec << "\n"; + std::cout << "* Tensor component: ID = " << arg.id() << ", type = " << std::hex << "0x" + << static_cast(arg.tensor_component_type()) << std::dec << "\n"; break; default: diff --git a/compute_kernel_writer/prototype/examples/common/ExampleComponentArgument.cpp b/compute_kernel_writer/prototype/examples/common/ExampleComponentArgument.cpp index 5a2ec526cc..55223dae0e 100644 --- a/compute_kernel_writer/prototype/examples/common/ExampleComponentArgument.cpp +++ b/compute_kernel_writer/prototype/examples/common/ExampleComponentArgument.cpp @@ -23,19 +23,19 @@ */ #include "ExampleComponentArgument.h" + #include "ckw/Error.h" ExampleComponentArgument::ExampleComponentArgument() { } -ExampleComponentArgument::ExampleComponentArgument(ckw::TensorOperand &tensor) - : _tensor(&tensor) +ExampleComponentArgument::ExampleComponentArgument(ckw::TensorOperand &tensor) : _tensor(&tensor) { } -ExampleComponentArgument & -ExampleComponentArgument::init_virtual_tensor(ckw::TileOperand &tile, const ckw::TensorTileSampler &tile_sampler) +ExampleComponentArgument &ExampleComponentArgument::init_virtual_tensor(ckw::TileOperand &tile, + const ckw::TensorTileSampler &tile_sampler) { CKW_ASSERT(_tile == nullptr); diff --git a/compute_kernel_writer/prototype/examples/common/ExampleComponentArgument.h b/compute_kernel_writer/prototype/examples/common/ExampleComponentArgument.h index 9fdc50ba08..0e029b1157 100644 --- a/compute_kernel_writer/prototype/examples/common/ExampleComponentArgument.h +++ b/compute_kernel_writer/prototype/examples/common/ExampleComponentArgument.h @@ -104,8 +104,8 @@ public: const ckw::TensorTileSampler &tile_sampler() const; private: - ckw::TensorOperand *_tensor{ nullptr }; - ckw::TileOperand *_tile{ nullptr }; + ckw::TensorOperand *_tensor{nullptr}; + ckw::TileOperand *_tile{nullptr}; ckw::TensorTileSampler _tile_sampler{}; }; diff --git a/compute_kernel_writer/prototype/examples/common/ExampleKernelWriter.cpp b/compute_kernel_writer/prototype/examples/common/ExampleKernelWriter.cpp index 6b9f244735..1734ce8823 100644 --- a/compute_kernel_writer/prototype/examples/common/ExampleKernelWriter.cpp +++ b/compute_kernel_writer/prototype/examples/common/ExampleKernelWriter.cpp @@ -23,26 +23,27 @@ */ #include "ExampleKernelWriter.h" -#include "ExampleComponentArgument.h" + #include "ckw/Error.h" #include "ckw/TileInfo.h" -ExampleKernelWriter::ExampleKernelWriter(ckw::Kernel &kernel) - : KernelWriter(kernel) +#include "ExampleComponentArgument.h" + +ExampleKernelWriter::ExampleKernelWriter(ckw::Kernel &kernel) : KernelWriter(kernel) { } void ExampleKernelWriter::op_load_once(ExampleComponentArgument *tensor_or_tile, const ckw::TensorTileSampler &sampler) { - if(!tensor_or_tile->has_tile()) + if (!tensor_or_tile->has_tile()) { CKW_ASSERT(tensor_or_tile->has_tensor()); auto &tensor = tensor_or_tile->tensor(); const auto tile_name = tensor.name() + "_tile"; - auto &tile = declare_tile(tile_name.c_str(), - ckw::TileInfo(tensor.data_type(), sampler.height(), sampler.width())); + auto &tile = + declare_tile(tile_name.c_str(), ckw::TileInfo(tensor.data_type(), sampler.height(), sampler.width())); op_load(tile, tensor, sampler); diff --git a/compute_kernel_writer/prototype/examples/common/ExampleScopedKernelWriter.cpp b/compute_kernel_writer/prototype/examples/common/ExampleScopedKernelWriter.cpp index 7c44fa8749..784d5ffb96 100644 --- a/compute_kernel_writer/prototype/examples/common/ExampleScopedKernelWriter.cpp +++ b/compute_kernel_writer/prototype/examples/common/ExampleScopedKernelWriter.cpp @@ -23,6 +23,7 @@ */ #include "ExampleScopedKernelWriter.h" + #include "ExampleKernelWriter.h" ExampleScopedKernelWriter::ExampleScopedKernelWriter(ExampleKernelWriter *writer) diff --git a/compute_kernel_writer/prototype/examples/writer_helper.cpp b/compute_kernel_writer/prototype/examples/writer_helper.cpp index ccef92dcdf..8623afbf50 100644 --- a/compute_kernel_writer/prototype/examples/writer_helper.cpp +++ b/compute_kernel_writer/prototype/examples/writer_helper.cpp @@ -23,14 +23,14 @@ */ #include "ckw/KernelWriter.h" -#include "../include/ckw/KernelWriterHelper.h" #include "ckw/TensorTileSampler.h" +#include "../include/ckw/KernelWriterHelper.h" #include using namespace ckw; -TensorTileSampler create_simple_sampler(KernelWriter& writer) +TensorTileSampler create_simple_sampler(KernelWriter &writer) { TensorTileSampler sampler; @@ -65,11 +65,11 @@ TensorTileSampler create_simple_sampler(KernelWriter& writer) int main() { - Kernel kernel("test", GpuTargetLanguage::OpenCL); + Kernel kernel("test", GpuTargetLanguage::OpenCL); KernelWriterHelper writer(kernel); - const TensorInfo src_info(DataType::Fp32, TensorShape({ 1, 1, 1, 1, 1 }), TensorDataLayout::Nhwc, 0); - const TensorInfo dst_info(DataType::Fp32, TensorShape({ 1, 1, 1, 1, 1 }), TensorDataLayout::Nhwc, 1); + const TensorInfo src_info(DataType::Fp32, TensorShape({1, 1, 1, 1, 1}), TensorDataLayout::Nhwc, 0); + const TensorInfo dst_info(DataType::Fp32, TensorShape({1, 1, 1, 1, 1}), TensorDataLayout::Nhwc, 1); auto &src_tensor = writer.declare_tensor_argument("src", src_info); auto &dst_tensor = writer.declare_tensor_argument("dst", dst_info); @@ -77,27 +77,24 @@ int main() const auto sampler = create_simple_sampler(writer); auto &src = writer.declare_tile("src_tile", TileInfo(src_tensor.data_type(), sampler.height(), sampler.width())); - auto &other = writer.declare_tile("other_tile", TileInfo(src_tensor.data_type(), sampler.height(), sampler.width())); + auto &other = + writer.declare_tile("other_tile", TileInfo(src_tensor.data_type(), sampler.height(), sampler.width())); auto &dst = writer.declare_tile("dst_tile", TileInfo(src_tensor.data_type(), sampler.height(), sampler.width())); writer.op_load(src, src_tensor, sampler); writer.op_load(other, src_tensor, sampler); writer.op_load(dst, dst_tensor, sampler); - auto test = dst ^ src ^ other; + auto test = dst ^ src ^ other; auto other_test = logical_and(dst, src, other); writer.op_assign(dst, logical_and(dst, src, other)); writer.op_assign(dst, test); writer.op_assign(dst, other_test); writer.op_assign(dst, operator^(operator^(dst, src), other)); - writer.op_if(exp(src) == dst, [&]{ - writer.op_binary_expression(dst, src, BinaryOp::Add, src); - }).op_else_if(exp(src) > dst, [&]{ - writer.op_binary_expression(dst, src, BinaryOp::Add, src); - }).op_else([&] { - writer.op_assign(dst, src); - }); + writer.op_if(exp(src) == dst, [&] { writer.op_binary_expression(dst, src, BinaryOp::Add, src); }) + .op_else_if(exp(src) > dst, [&] { writer.op_binary_expression(dst, src, BinaryOp::Add, src); }) + .op_else([&] { writer.op_assign(dst, src); }); writer.op_assign(dst, src + src * src); writer.op_assign(dst, src * max(src, dst) + src); @@ -106,13 +103,11 @@ int main() writer.op_assign(dst, src ^ dst); writer.op_assign(dst, ~src); - writer.op_for_loop(dst < src, dst += src, [&]{ - writer.op_assign(dst, src + dst); - }); + writer.op_for_loop(dst < src, dst += src, [&] { writer.op_assign(dst, src + dst); }); writer.op_assign(dst += src); writer.op_assign(dst += exp(src)); std::cout << "======== KERNEL ========" << std::endl; std::cout << writer.generate_code() << std::endl; -} \ No newline at end of file +} diff --git a/compute_kernel_writer/prototype/include/ckw/Error.h b/compute_kernel_writer/prototype/include/ckw/Error.h index b18944eac5..aab713c817 100644 --- a/compute_kernel_writer/prototype/include/ckw/Error.h +++ b/compute_kernel_writer/prototype/include/ckw/Error.h @@ -39,11 +39,11 @@ namespace ckw #define CKW_ASSERT_MSG(cond, msg) \ do \ { \ - if(!(cond)) \ + if (!(cond)) \ { \ throw ::std::runtime_error(msg); \ } \ - } while(false) + } while (false) /** If the condition is not met, throw an std::runtime_error. * @@ -56,8 +56,7 @@ namespace ckw * @param[in] precond The condition if is met requires the consequence must also be met. * @param[in] cond The condition that is expected to be true if the precondition is true. */ -#define CKW_ASSERT_IF(precond, cond) \ - CKW_ASSERT_MSG(!(precond) || ((precond) && (cond)), #precond " |-> " #cond) +#define CKW_ASSERT_IF(precond, cond) CKW_ASSERT_MSG(!(precond) || ((precond) && (cond)), #precond " |-> " #cond) /** Mark the variables as unused. * diff --git a/compute_kernel_writer/prototype/include/ckw/KernelArgument.h b/compute_kernel_writer/prototype/include/ckw/KernelArgument.h index af8bcde634..3384a20aef 100644 --- a/compute_kernel_writer/prototype/include/ckw/KernelArgument.h +++ b/compute_kernel_writer/prototype/include/ckw/KernelArgument.h @@ -26,6 +26,7 @@ #define CKW_PROTOTYPE_INCLUDE_CKW_KERNELARGUMENT_H #include "ckw/TensorInfo.h" + #include namespace ckw @@ -98,7 +99,7 @@ private: TensorComponentType tensor_component_type; }; - SubId _sub_id{ 0 }; + SubId _sub_id{0}; }; } // namespace ckw diff --git a/compute_kernel_writer/prototype/include/ckw/KernelWriter.h b/compute_kernel_writer/prototype/include/ckw/KernelWriter.h index fdb5fedc59..f9e0066f91 100644 --- a/compute_kernel_writer/prototype/include/ckw/KernelWriter.h +++ b/compute_kernel_writer/prototype/include/ckw/KernelWriter.h @@ -94,7 +94,9 @@ public: * * @return The @ref TensorOperand object. */ - TensorOperand &declare_tensor_argument(const std::string &name, const TensorInfo &info, TensorStorageType storage_type = TensorStorageType::BufferUint8Ptr); + TensorOperand &declare_tensor_argument(const std::string &name, + const TensorInfo &info, + TensorStorageType storage_type = TensorStorageType::BufferUint8Ptr); /** Declare a compile-time constant scalar argument. * @@ -134,7 +136,10 @@ public: * @param[in] sampler The tensor sampling information. * @param[in] dilation_y Dilation in the Y dimension. */ - void op_load(TileOperand &tile, const TensorOperand &tensor, const TensorTileSampler &sampler, const TileOperand &dilation_y = TileOperand("dil_y", 1)); + void op_load(TileOperand &tile, + const TensorOperand &tensor, + const TensorTileSampler &sampler, + const TileOperand &dilation_y = TileOperand("dil_y", 1)); /** Load the data from the tensor memory to the tile using the indirect buffer approach and respective of the sampling information. * @@ -221,7 +226,10 @@ public: * @param[in] first The first argument tile. * @param[in] second The second argument tile. */ - void op_binary_elementwise_function(const TileOperand &dst, BinaryFunction func, const TileOperand &first, const TileOperand &second); + void op_binary_elementwise_function(const TileOperand &dst, + BinaryFunction func, + const TileOperand &first, + const TileOperand &second); /** Write function applied to scalar value: ` = (, , );`. * @@ -231,7 +239,11 @@ public: * @param[in] second The second argument tile. * @param[in] third The third argument tile. */ - void op_ternary_elementwise_function(const TileOperand &dst, TernaryFunction func, const TileOperand &first, const TileOperand &second, const TileOperand &third); + void op_ternary_elementwise_function(const TileOperand &dst, + TernaryFunction func, + const TileOperand &first, + const TileOperand &second, + const TileOperand &third); /** Write if-statement: `if( ) { }`. * @@ -267,7 +279,13 @@ public: * @param[in, out] update_value The value which is updated at every iteration. * @param[in] body The body of the for-loop. */ - void op_for_loop(const TileOperand &var_name, BinaryOp cond_op, const TileOperand &cond_value_name, const TileOperand &update_var_name, AssignmentOp update_op, const TileOperand &update_value_name, const std::function &body); + void op_for_loop(const TileOperand &var_name, + BinaryOp cond_op, + const TileOperand &cond_value_name, + const TileOperand &update_var_name, + AssignmentOp update_op, + const TileOperand &update_value_name, + const std::function &body); /** Write the return statement: `return;` */ @@ -311,8 +329,8 @@ private: ::std::unique_ptr _impl_attr; ::std::unique_ptr _impl; - int32_t _id_space{ 0 }; - int32_t _max_id_space{ 0 }; + int32_t _id_space{0}; + int32_t _max_id_space{0}; }; } // namespace ckw diff --git a/compute_kernel_writer/prototype/include/ckw/KernelWriterHelper.h b/compute_kernel_writer/prototype/include/ckw/KernelWriterHelper.h index a8be859680..3ba079bbc2 100644 --- a/compute_kernel_writer/prototype/include/ckw/KernelWriterHelper.h +++ b/compute_kernel_writer/prototype/include/ckw/KernelWriterHelper.h @@ -32,8 +32,6 @@ #include #include -#include - /* * By including this header file you will be able to supplement the default * Compute Kernel Writer API with additional syntax to help ease the use of CKW. @@ -154,7 +152,9 @@ struct can_be_assigned : ::std::true_type * @tparam TLeft The type of the destination of the assignment. * @tparam TRight The type of the source assigned to the destination. */ -template ::value && can_be_assigned::value>> +template ::value && can_be_assigned::value>> struct Assignment { TLeft lhs; @@ -173,7 +173,7 @@ struct Assignment template inline Assignment operator+=(TLeft &&lhs, TRight &&rhs) { - return Assignment{ std::forward(lhs), std::forward(rhs), AssignmentOp::Increment }; + return Assignment{std::forward(lhs), std::forward(rhs), AssignmentOp::Increment}; } /** Represents the expression: `\p lhs -= \p rhs`. @@ -187,7 +187,7 @@ inline Assignment operator+=(TLeft &&lhs, TRight &&rhs) template inline Assignment operator-=(TLeft &&lhs, TRight &&rhs) { - return Assignment{ std::forward(lhs), std::forward(rhs), AssignmentOp::Decrement }; + return Assignment{std::forward(lhs), std::forward(rhs), AssignmentOp::Decrement}; } // ================================================== @@ -221,7 +221,7 @@ struct can_be_operand> : ::std::true_type template inline UnaryExpression operator!(TSrc &&src) { - return UnaryExpression{ std::forward(src), UnaryOp::LogicalNot }; + return UnaryExpression{std::forward(src), UnaryOp::LogicalNot}; } /** Represents the expression: `~\p src`. @@ -233,7 +233,7 @@ inline UnaryExpression operator!(TSrc &&src) template inline UnaryExpression operator~(TSrc &&src) { - return UnaryExpression{ std::forward(src), UnaryOp::BitwiseNot }; + return UnaryExpression{std::forward(src), UnaryOp::BitwiseNot}; } // ================================================== @@ -247,7 +247,9 @@ inline UnaryExpression operator~(TSrc &&src) * @tparam TLeft The type of the left argument of the expression. * @tparam TRight The type of the right argument of the expression. */ -template ::value && can_be_operand::value>> +template ::value && can_be_operand::value>> struct BinaryExpression { TLeft lhs; @@ -271,7 +273,7 @@ struct can_be_operand> : ::std::true_type template inline BinaryExpression operator+(TLeft &&lhs, TRight &&rhs) { - return BinaryExpression{ std::forward(lhs), std::forward(rhs), BinaryOp::Add }; + return BinaryExpression{std::forward(lhs), std::forward(rhs), BinaryOp::Add}; } /** Represents the expression: `\p lhs - \p rhs`. @@ -285,7 +287,7 @@ inline BinaryExpression operator+(TLeft &&lhs, TRight &&rhs) template inline BinaryExpression operator-(TLeft &&lhs, TRight &&rhs) { - return BinaryExpression{ std::forward(lhs), std::forward(rhs), BinaryOp::Sub }; + return BinaryExpression{std::forward(lhs), std::forward(rhs), BinaryOp::Sub}; } /** Represents the expression: `\p lhs * \p rhs`. @@ -299,7 +301,7 @@ inline BinaryExpression operator-(TLeft &&lhs, TRight &&rhs) template inline BinaryExpression operator*(TLeft &&lhs, TRight &&rhs) { - return BinaryExpression{ std::forward(lhs), std::forward(rhs), BinaryOp::Mul }; + return BinaryExpression{std::forward(lhs), std::forward(rhs), BinaryOp::Mul}; } /** Represents the expression: `\p lhs / \p rhs`. @@ -313,7 +315,7 @@ inline BinaryExpression operator*(TLeft &&lhs, TRight &&rhs) template inline BinaryExpression operator/(TLeft &&lhs, TRight &&rhs) { - return BinaryExpression{ std::forward(lhs), std::forward(rhs), BinaryOp::Div }; + return BinaryExpression{std::forward(lhs), std::forward(rhs), BinaryOp::Div}; } /** Represents the expression: `\p lhs % \p rhs`. @@ -327,7 +329,7 @@ inline BinaryExpression operator/(TLeft &&lhs, TRight &&rhs) template inline BinaryExpression operator%(TLeft &&lhs, TRight &&rhs) { - return BinaryExpression{ std::forward(lhs), std::forward(rhs), BinaryOp::Mod }; + return BinaryExpression{std::forward(lhs), std::forward(rhs), BinaryOp::Mod}; } /** Represents the expression: `\p lhs == \p rhs`. @@ -341,7 +343,7 @@ inline BinaryExpression operator%(TLeft &&lhs, TRight &&rhs) template inline BinaryExpression operator==(TLeft &&lhs, TRight &&rhs) { - return BinaryExpression{ std::forward(lhs), std::forward(rhs), BinaryOp::Equal }; + return BinaryExpression{std::forward(lhs), std::forward(rhs), BinaryOp::Equal}; } /** Represents the expression: `\p lhs < \p rhs`. @@ -355,7 +357,7 @@ inline BinaryExpression operator==(TLeft &&lhs, TRight &&rhs) template inline BinaryExpression operator<(TLeft &&lhs, TRight &&rhs) { - return BinaryExpression{ std::forward(lhs), std::forward(rhs), BinaryOp::Less }; + return BinaryExpression{std::forward(lhs), std::forward(rhs), BinaryOp::Less}; } /** Represents the expression: `\p lhs <= \p rhs`. @@ -369,7 +371,7 @@ inline BinaryExpression operator<(TLeft &&lhs, TRight &&rhs) template inline BinaryExpression operator<=(TLeft &&lhs, TRight &&rhs) { - return BinaryExpression{ std::forward(lhs), std::forward(rhs), BinaryOp::LessEqual }; + return BinaryExpression{std::forward(lhs), std::forward(rhs), BinaryOp::LessEqual}; } /** Represents the expression: `\p lhs > \p rhs`. @@ -383,7 +385,7 @@ inline BinaryExpression operator<=(TLeft &&lhs, TRight &&rhs) template inline BinaryExpression operator>(TLeft &&lhs, TRight &&rhs) { - return BinaryExpression{ std::forward(lhs), std::forward(rhs), BinaryOp::Greater }; + return BinaryExpression{std::forward(lhs), std::forward(rhs), BinaryOp::Greater}; } /** Represents the expression: `\p lhs >= \p rhs`. @@ -397,7 +399,7 @@ inline BinaryExpression operator>(TLeft &&lhs, TRight &&rhs) template inline BinaryExpression operator>=(TLeft &&lhs, TRight &&rhs) { - return BinaryExpression{ std::forward(lhs), std::forward(rhs), BinaryOp::GreaterEqual }; + return BinaryExpression{std::forward(lhs), std::forward(rhs), BinaryOp::GreaterEqual}; } /** Represents the expression: `\p lhs ^ \p rhs`. @@ -411,7 +413,7 @@ inline BinaryExpression operator>=(TLeft &&lhs, TRight &&rhs) template inline BinaryExpression operator^(TLeft &&lhs, TRight &&rhs) { - return BinaryExpression{ std::forward(lhs), std::forward(rhs), BinaryOp::BitwiseXOR }; + return BinaryExpression{std::forward(lhs), std::forward(rhs), BinaryOp::BitwiseXOR}; } /** Represents the expression: `\p lhs && \p rhs`. @@ -425,7 +427,7 @@ inline BinaryExpression operator^(TLeft &&lhs, TRight &&rhs) template inline BinaryExpression logical_and(TLeft &&lhs, TRight &&rhs) { - return BinaryExpression{ std::forward(lhs), std::forward(rhs), BinaryOp::LogicalAnd }; + return BinaryExpression{std::forward(lhs), std::forward(rhs), BinaryOp::LogicalAnd}; } /** Represents the expression: `\p lhs && \p rhs`. @@ -440,7 +442,7 @@ template inline BinaryExpression, TOps...> logical_and(TLeft &&lhs, TRight &&rhs, TOps &&...ops) { return logical_and( - BinaryExpression{ std::forward(lhs), std::forward(rhs), BinaryOp::LogicalAnd }, + BinaryExpression{std::forward(lhs), std::forward(rhs), BinaryOp::LogicalAnd}, std::forward(ops)...); } @@ -455,7 +457,7 @@ inline BinaryExpression, TOps...> logical_and(TL template inline BinaryExpression logical_or(TLeft &&lhs, TRight &&rhs) { - return BinaryExpression{ std::forward(lhs), std::forward(rhs), BinaryOp::LogicalOr }; + return BinaryExpression{std::forward(lhs), std::forward(rhs), BinaryOp::LogicalOr}; } /** Represents the expression: `\p lhs || \p rhs`. @@ -470,7 +472,7 @@ template inline BinaryExpression, TOps...> logical_or(TLeft &&lhs, TRight &&rhs, TOps &&...ops) { return logical_or( - BinaryExpression{ std::forward(lhs), std::forward(rhs), BinaryOp::LogicalOr }, + BinaryExpression{std::forward(lhs), std::forward(rhs), BinaryOp::LogicalOr}, std::forward(ops)...); } @@ -505,7 +507,7 @@ struct can_be_operand> : ::std::true_type template UnaryElementwiseFunction exp(TSrc &&src) { - return UnaryElementwiseFunction{ std::forward(src), UnaryFunction::Exp }; + return UnaryElementwiseFunction{std::forward(src), UnaryFunction::Exp}; } /** Represents the expression: `tanh(\p src)`. @@ -517,7 +519,7 @@ UnaryElementwiseFunction exp(TSrc &&src) template UnaryElementwiseFunction tanh(TSrc &&src) { - return UnaryElementwiseFunction{ std::forward(src), UnaryFunction::Tanh }; + return UnaryElementwiseFunction{std::forward(src), UnaryFunction::Tanh}; } /** Represents the expression: `sqrt(\p src)`. @@ -529,7 +531,7 @@ UnaryElementwiseFunction tanh(TSrc &&src) template UnaryElementwiseFunction sqrt(TSrc &&src) { - return UnaryElementwiseFunction{ std::forward(src), UnaryFunction::Sqrt }; + return UnaryElementwiseFunction{std::forward(src), UnaryFunction::Sqrt}; } /** Represents the expression: `erf(\p src)`. @@ -541,7 +543,7 @@ UnaryElementwiseFunction sqrt(TSrc &&src) template UnaryElementwiseFunction erf(TSrc &&src) { - return UnaryElementwiseFunction{ std::forward(src), UnaryFunction::Erf }; + return UnaryElementwiseFunction{std::forward(src), UnaryFunction::Erf}; } /** Represents the expression: `fabs(\p src)`. @@ -553,7 +555,7 @@ UnaryElementwiseFunction erf(TSrc &&src) template UnaryElementwiseFunction fabs(TSrc &&src) { - return UnaryElementwiseFunction{ std::forward(src), UnaryFunction::Fabs }; + return UnaryElementwiseFunction{std::forward(src), UnaryFunction::Fabs}; } /** Represents the expression: `log(\p src)`. @@ -565,7 +567,7 @@ UnaryElementwiseFunction fabs(TSrc &&src) template UnaryElementwiseFunction log(TSrc &&src) { - return UnaryElementwiseFunction{ std::forward(src), UnaryFunction::Log }; + return UnaryElementwiseFunction{std::forward(src), UnaryFunction::Log}; } /** Represents the expression: `round(\p src)`. @@ -577,7 +579,7 @@ UnaryElementwiseFunction log(TSrc &&src) template UnaryElementwiseFunction round(TSrc &&src) { - return UnaryElementwiseFunction{ std::forward(src), UnaryFunction::Round }; + return UnaryElementwiseFunction{std::forward(src), UnaryFunction::Round}; } /** Represents the expression: `sizeof(\p src)`. @@ -589,7 +591,7 @@ UnaryElementwiseFunction round(TSrc &&src) template UnaryElementwiseFunction sizeOf(TSrc &&src) { - return UnaryElementwiseFunction{ std::forward(src), UnaryFunction::SizeOf }; + return UnaryElementwiseFunction{std::forward(src), UnaryFunction::SizeOf}; } // ================================================== @@ -603,7 +605,9 @@ UnaryElementwiseFunction sizeOf(TSrc &&src) * @tparam TFirst The type of the left argument of the function. * @tparam TSecond The type of the right argument of the function. */ -template ::value && can_be_operand::value>> +template ::value && can_be_operand::value>> struct BinaryElementwiseFunction { TFirst first; @@ -627,7 +631,8 @@ struct can_be_operand> : ::std::true_ template BinaryElementwiseFunction max(TFirst &&first, TSecond &&second) { - return BinaryElementwiseFunction{ std::forward(first), std::forward(second), BinaryFunction::Max }; + return BinaryElementwiseFunction{std::forward(first), std::forward(second), + BinaryFunction::Max}; } /** Represents the function call: `min(\p first, \p second)`. @@ -641,7 +646,8 @@ BinaryElementwiseFunction max(TFirst &&first, TSecond &&second) template BinaryElementwiseFunction min(TFirst &&first, TSecond &&second) { - return BinaryElementwiseFunction{ std::forward(first), std::forward(second), BinaryFunction::Min }; + return BinaryElementwiseFunction{std::forward(first), std::forward(second), + BinaryFunction::Min}; } // ================================================== @@ -656,7 +662,11 @@ BinaryElementwiseFunction min(TFirst &&first, TSecond &&second) * @tparam TSecond The type of the second argument to the function. * @tparam TThird The type of the third argument to the function. */ -template ::value && can_be_operand::value && can_be_operand::value>> +template ::value && can_be_operand::value && + can_be_operand::value>> struct TernaryElementwiseFunction { TFirst first; @@ -683,7 +693,9 @@ struct can_be_operand> : ::s template TernaryElementwiseFunction select(TFirst &&first, TSecond &&second, TThird &&third) { - return TernaryElementwiseFunction{ std::forward(first), std::forward(second), std::forward(third), TernaryFunction::Select }; + return TernaryElementwiseFunction{std::forward(first), + std::forward(second), + std::forward(third), TernaryFunction::Select}; } /** Helper class used to extend a KernelWriter with additional functionality @@ -715,7 +727,8 @@ public: * @param[in] cond The BinaryExpression representing the condition. * @param[in] body The body of the if-statement. */ - KernelWriterHelper &op_if(const BinaryExpression &cond, const std::function &body) + KernelWriterHelper &op_if(const BinaryExpression &cond, + const std::function &body) { TWriter::op_if(cond.lhs, cond.opcode, cond.rhs, body); return *this; @@ -730,7 +743,8 @@ public: * @param[in] body The body of the if-statement. */ template - KernelWriterHelper &op_if(const BinaryExpression &cond, const std::function &body) + KernelWriterHelper &op_if(const BinaryExpression &cond, + const std::function &body) { auto &tmp1 = declare_temp_tile(cond.lhs.tile_info()); op_assign(tmp1, cond.rhs); @@ -747,7 +761,8 @@ public: * @param[in] body The body of the if-statement. */ template - KernelWriterHelper &op_if(const BinaryExpression &cond, const std::function &body) + KernelWriterHelper &op_if(const BinaryExpression &cond, + const std::function &body) { auto &tmp1 = declare_temp_tile(cond.rhs.tile_info()); op_assign(tmp1, cond.lhs); @@ -766,7 +781,8 @@ public: * @param[in] cond The BinaryExpression representing the condition. * @param[in] body The body of the else-if-statement. */ - KernelWriterHelper &op_else_if(const BinaryExpression &cond, const std::function &body) + KernelWriterHelper &op_else_if(const BinaryExpression &cond, + const std::function &body) { TWriter::op_else_if(cond.lhs, cond.opcode, cond.rhs, body); return *this; @@ -781,7 +797,8 @@ public: * @param[in] body The body of the else-if-statement. */ template - KernelWriterHelper &op_else_if(const BinaryExpression &cond, const std::function &body) + KernelWriterHelper &op_else_if(const BinaryExpression &cond, + const std::function &body) { auto &tmp1 = declare_temp_tile(cond.lhs.tile_info()); op_assign(tmp1, cond.rhs); @@ -798,7 +815,8 @@ public: * @param[in] body The body of the else-if-statement. */ template - KernelWriterHelper &op_else_if(const BinaryExpression &cond, const std::function &body) + KernelWriterHelper &op_else_if(const BinaryExpression &cond, + const std::function &body) { auto &tmp1 = declare_temp_tile(cond.rhs.tile_info()); op_assign(tmp1, cond.lhs); @@ -823,7 +841,9 @@ public: * @param[in] updater The Assignment representing the updater. * @param[in] body The body of the for-loop. */ - void op_for_loop(const BinaryExpression &cond, const Assignment &updater, const std::function &body) + void op_for_loop(const BinaryExpression &cond, + const Assignment &updater, + const std::function &body) { TWriter::op_for_loop(cond.lhs, cond.opcode, cond.rhs, updater.lhs, updater.opcode, updater.rhs, body); } @@ -1029,7 +1049,8 @@ public: * @param[in] dst The tile which is assigned to. * @param[in] exp The TernaryElementwiseFunction representing the expression to be evaluated and assigned. */ - void op_assign(const TileOperand &dst, const TernaryElementwiseFunction &exp) + void op_assign(const TileOperand &dst, + const TernaryElementwiseFunction &exp) { TWriter::op_ternary_elementwise_function(dst, exp.opcode, exp.first, exp.second, exp.third); } @@ -1169,11 +1190,11 @@ public: */ void op_assign(const Assignment &exp) { - if(exp.opcode == AssignmentOp::Increment) + if (exp.opcode == AssignmentOp::Increment) { TWriter::op_binary_expression(exp.lhs, exp.lhs, BinaryOp::Add, exp.rhs); } - else if(exp.opcode == AssignmentOp::Decrement) + else if (exp.opcode == AssignmentOp::Decrement) { TWriter::op_binary_expression(exp.lhs, exp.lhs, BinaryOp::Sub, exp.rhs); } @@ -1192,7 +1213,7 @@ public: { auto &tmp1 = declare_temp_tile(exp.lhs.tile_info()); op_assign(tmp1, exp.rhs); - op_assign(Assignment{ exp.lhs, tmp1, exp.opcode }); + op_assign(Assignment{exp.lhs, tmp1, exp.opcode}); } private: @@ -1241,11 +1262,8 @@ private: template ::value>> TileInfo get_largest_size(const TileInfo &first, const TileInfo &second, const TOps &...ops) { - TileInfo largest = { - first.data_type(), - std::max(first.width(), second.width()), - std::max(first.height(), second.height()) - }; + TileInfo largest = {first.data_type(), std::max(first.width(), second.width()), + std::max(first.height(), second.height())}; return get_largest_size(largest, ops...); } diff --git a/compute_kernel_writer/prototype/include/ckw/OperandBase.h b/compute_kernel_writer/prototype/include/ckw/OperandBase.h index 06d9f82756..9842127339 100644 --- a/compute_kernel_writer/prototype/include/ckw/OperandBase.h +++ b/compute_kernel_writer/prototype/include/ckw/OperandBase.h @@ -26,6 +26,7 @@ #define CKW_PROTOTYPE_INCLUDE_CKW_OPERANDBASE_H #include "ckw/types/DataType.h" + #include namespace ckw diff --git a/compute_kernel_writer/prototype/include/ckw/ScalarValue.h b/compute_kernel_writer/prototype/include/ckw/ScalarValue.h index 16c3f6d441..2a9c42acc8 100644 --- a/compute_kernel_writer/prototype/include/ckw/ScalarValue.h +++ b/compute_kernel_writer/prototype/include/ckw/ScalarValue.h @@ -59,9 +59,9 @@ public: _size = sizeof(T); - if(::std::is_integral::value) + if (::std::is_integral::value) { - if(::std::is_signed::value) + if (::std::is_signed::value) { _type = Type::INT; _value.i64 = value; @@ -90,9 +90,9 @@ public: CKW_ASSERT(::std::is_integral::value || ::std::is_floating_point::value); CKW_ASSERT(sizeof(T) >= _size); - if(::std::is_integral::value) + if (::std::is_integral::value) { - if(::std::is_signed::value) + if (::std::is_signed::value) { CKW_ASSERT(_type == Type::INT || _type == Type::UINT); CKW_ASSERT_IF(_type == Type::UINT, sizeof(T) > _size); diff --git a/compute_kernel_writer/prototype/include/ckw/TensorInfo.h b/compute_kernel_writer/prototype/include/ckw/TensorInfo.h index 55f8101a53..24da7dc8ab 100644 --- a/compute_kernel_writer/prototype/include/ckw/TensorInfo.h +++ b/compute_kernel_writer/prototype/include/ckw/TensorInfo.h @@ -143,10 +143,10 @@ public: int32_t id() const; private: - TensorShape _shape{ { 0 } }; - DataType _dt{ DataType::Unknown }; - TensorDataLayout _dl{ TensorDataLayout::Unknown }; - int32_t _id{ -1 }; + TensorShape _shape{{0}}; + DataType _dt{DataType::Unknown}; + TensorDataLayout _dl{TensorDataLayout::Unknown}; + int32_t _id{-1}; }; } // namespace ckw diff --git a/compute_kernel_writer/prototype/include/ckw/TensorOperand.h b/compute_kernel_writer/prototype/include/ckw/TensorOperand.h index 6d88932c66..c221b449fa 100644 --- a/compute_kernel_writer/prototype/include/ckw/TensorOperand.h +++ b/compute_kernel_writer/prototype/include/ckw/TensorOperand.h @@ -139,21 +139,21 @@ private: TensorInfo _info; TensorStorageType _storage_type; - TileOperand *_tile{ nullptr }; + TileOperand *_tile{nullptr}; TensorTileSampler _tile_sampler{}; - ::std::unique_ptr _stride1{ nullptr }; - ::std::unique_ptr _stride2{ nullptr }; - ::std::unique_ptr _stride3{ nullptr }; - ::std::unique_ptr _stride4{ nullptr }; - ::std::unique_ptr _dim0{ nullptr }; - ::std::unique_ptr _dim1{ nullptr }; - ::std::unique_ptr _dim2{ nullptr }; - ::std::unique_ptr _dim3{ nullptr }; - ::std::unique_ptr _dim4{ nullptr }; - ::std::unique_ptr _dim1_dim2{ nullptr }; - ::std::unique_ptr _dim1_dim2_dim3{ nullptr }; - ::std::unique_ptr _offset_first_element_in_bytes{ nullptr }; + ::std::unique_ptr _stride1{nullptr}; + ::std::unique_ptr _stride2{nullptr}; + ::std::unique_ptr _stride3{nullptr}; + ::std::unique_ptr _stride4{nullptr}; + ::std::unique_ptr _dim0{nullptr}; + ::std::unique_ptr _dim1{nullptr}; + ::std::unique_ptr _dim2{nullptr}; + ::std::unique_ptr _dim3{nullptr}; + ::std::unique_ptr _dim4{nullptr}; + ::std::unique_ptr _dim1_dim2{nullptr}; + ::std::unique_ptr _dim1_dim2_dim3{nullptr}; + ::std::unique_ptr _offset_first_element_in_bytes{nullptr}; }; // ================================================================================================= diff --git a/compute_kernel_writer/prototype/include/ckw/TensorTileSampler.h b/compute_kernel_writer/prototype/include/ckw/TensorTileSampler.h index e1bf0c52b8..606dec3535 100644 --- a/compute_kernel_writer/prototype/include/ckw/TensorTileSampler.h +++ b/compute_kernel_writer/prototype/include/ckw/TensorTileSampler.h @@ -26,6 +26,7 @@ #define CKW_PROTOTYPE_INCLUDE_CKW_TENSORTILESAMPLER_H #include "ckw/types/TensorSamplerTypes.h" + #include namespace ckw @@ -55,12 +56,14 @@ public: * @param[in] address_mode_y The address mode of the y dimension. * @param[in] address_mode_z The address mode of the z dimension. */ - TensorTileSampler( - TileOperand &x, TileOperand &y, TileOperand &z, TileOperand &b, - TensorSamplerFormat format, - TensorSamplerAddressModeX address_mode_x, - TensorSamplerAddressModeY address_mode_y, - TensorSamplerAddressModeZ address_mode_z); + TensorTileSampler(TileOperand &x, + TileOperand &y, + TileOperand &z, + TileOperand &b, + TensorSamplerFormat format, + TensorSamplerAddressModeX address_mode_x, + TensorSamplerAddressModeY address_mode_y, + TensorSamplerAddressModeZ address_mode_z); /** Initialize a new instance of @ref TensorSampler class. * @@ -75,13 +78,16 @@ public: * @param[in] address_mode_y The address mode of the y dimension. * @param[in] address_mode_z The address mode of the z dimension. */ - TensorTileSampler( - TileOperand &x, TileOperand &y, TileOperand &z, TileOperand &b, - int32_t height, int32_t width, - TensorSamplerFormat format, - TensorSamplerAddressModeX address_mode_x, - TensorSamplerAddressModeY address_mode_y, - TensorSamplerAddressModeZ address_mode_z); + TensorTileSampler(TileOperand &x, + TileOperand &y, + TileOperand &z, + TileOperand &b, + int32_t height, + int32_t width, + TensorSamplerFormat format, + TensorSamplerAddressModeX address_mode_x, + TensorSamplerAddressModeY address_mode_y, + TensorSamplerAddressModeZ address_mode_z); /** Get the coordinate in the x dimension. */ const TileOperand &x() const; @@ -144,18 +150,18 @@ public: TensorTileSampler &address_mode_z(TensorSamplerAddressModeZ address_mode_z); private: - TileOperand *_x{ nullptr }; - TileOperand *_y{ nullptr }; - TileOperand *_z{ nullptr }; - TileOperand *_b{ nullptr }; - - int32_t _height{ 0 }; - int32_t _width{ 0 }; - - TensorSamplerFormat _format{ TensorSamplerFormat::Unknown }; - TensorSamplerAddressModeX _address_mode_x{ TensorSamplerAddressModeX::Unknown }; - TensorSamplerAddressModeY _address_mode_y{ TensorSamplerAddressModeY::Unknown }; - TensorSamplerAddressModeZ _address_mode_z{ TensorSamplerAddressModeZ::Unknown }; + TileOperand *_x{nullptr}; + TileOperand *_y{nullptr}; + TileOperand *_z{nullptr}; + TileOperand *_b{nullptr}; + + int32_t _height{0}; + int32_t _width{0}; + + TensorSamplerFormat _format{TensorSamplerFormat::Unknown}; + TensorSamplerAddressModeX _address_mode_x{TensorSamplerAddressModeX::Unknown}; + TensorSamplerAddressModeY _address_mode_y{TensorSamplerAddressModeY::Unknown}; + TensorSamplerAddressModeZ _address_mode_z{TensorSamplerAddressModeZ::Unknown}; }; } // namespace ckw diff --git a/compute_kernel_writer/prototype/include/ckw/TileInfo.h b/compute_kernel_writer/prototype/include/ckw/TileInfo.h index de9e47af2b..e0d064169e 100644 --- a/compute_kernel_writer/prototype/include/ckw/TileInfo.h +++ b/compute_kernel_writer/prototype/include/ckw/TileInfo.h @@ -83,7 +83,7 @@ public: DataType data_type() const; private: - DataType _dt{ DataType::Unknown }; + DataType _dt{DataType::Unknown}; TileShape _shape{}; }; diff --git a/compute_kernel_writer/prototype/include/ckw/types/Functions.h b/compute_kernel_writer/prototype/include/ckw/types/Functions.h index bc1f85c188..c6afaa0ac8 100644 --- a/compute_kernel_writer/prototype/include/ckw/types/Functions.h +++ b/compute_kernel_writer/prototype/include/ckw/types/Functions.h @@ -32,14 +32,14 @@ namespace ckw enum class UnaryFunction : int32_t { - Exp = 0x0000, - Tanh = 0x0001, - Sqrt = 0x0002, - Erf = 0x0003, - Fabs = 0x0004, - Log = 0x0006, - Round = 0x0007, - Floor = 0x0008, + Exp = 0x0000, + Tanh = 0x0001, + Sqrt = 0x0002, + Erf = 0x0003, + Fabs = 0x0004, + Log = 0x0006, + Round = 0x0007, + Floor = 0x0008, // Misc SizeOf = 0x0009, @@ -47,8 +47,8 @@ enum class UnaryFunction : int32_t enum class BinaryFunction : int32_t { - Min = 0x0000, - Max = 0x0001, + Min = 0x0000, + Max = 0x0001, }; enum class TernaryFunction : int32_t diff --git a/compute_kernel_writer/prototype/include/ckw/types/Operators.h b/compute_kernel_writer/prototype/include/ckw/types/Operators.h index 43241170a5..b560996837 100644 --- a/compute_kernel_writer/prototype/include/ckw/types/Operators.h +++ b/compute_kernel_writer/prototype/include/ckw/types/Operators.h @@ -69,8 +69,8 @@ enum class BinaryOp : int32_t enum class AssignmentOp : int32_t { // Unary - Increment = 0x0000, // += - Decrement = 0x0001, // -= + Increment = 0x0000, // += + Decrement = 0x0001, // -= }; } // namespace ckw diff --git a/compute_kernel_writer/prototype/include/ckw/types/TensorSamplerTypes.h b/compute_kernel_writer/prototype/include/ckw/types/TensorSamplerTypes.h index 836bd13c95..63405a0764 100644 --- a/compute_kernel_writer/prototype/include/ckw/types/TensorSamplerTypes.h +++ b/compute_kernel_writer/prototype/include/ckw/types/TensorSamplerTypes.h @@ -39,34 +39,38 @@ enum class TensorSamplerFormat : int32_t enum class TensorSamplerAddressModeX : int32_t { - Unknown = 0, - None = 1, // The user guarantees that the X coordinate is always in-bound - OverlappingMin = 2 // (FIXED shapes only) Reduce the load/store length when x == 0 (MIN). The load length will be width % original length - // Leftover elements can be handled using overlapping. This involves processing some of the elements in the array twice. + Unknown = 0, + None = 1, // The user guarantees that the X coordinate is always in-bound + OverlappingMin = + 2 // (FIXED shapes only) Reduce the load/store length when x == 0 (MIN). The load length will be width % original length + // Leftover elements can be handled using overlapping. This involves processing some of the elements in the array twice. }; enum class TensorSamplerAddressModeY : int32_t { - Unknown = 0, - None = 1, // The user guarantees that the Y coordinate is always in-bound - OverlappingMin = 2, // (FIXED shapes only) Reduce the load/store length when x == 0 (MIN). The load length will be width % original length - Skip = 3, // Skip the read/write - SkipMinEdgeOnly = 4, // Skip greater than or equal to max only. The user guarantees that the Y coordinate is always >= 0 - SkipMaxEdgeOnly = 5, // Skip less than 0 only - ClampToNearest = 6, // Clamp the coordinate to nearest edge (0 or max value allowed on Y) - ClampToMinEdgeOnly = 7, // Clamp the negative coordinate to 0 only. Therefore, we expect Y to be always < MAX - ClampToMaxEdgeOnly = 8, // Clamp the coordinate to the max value allowed on Y only. We expect Y to be always >= 0 - ClampToBorder = 9, // Clamp to border which always has 0 value + Unknown = 0, + None = 1, // The user guarantees that the Y coordinate is always in-bound + OverlappingMin = + 2, // (FIXED shapes only) Reduce the load/store length when x == 0 (MIN). The load length will be width % original length + Skip = 3, // Skip the read/write + SkipMinEdgeOnly = + 4, // Skip greater than or equal to max only. The user guarantees that the Y coordinate is always >= 0 + SkipMaxEdgeOnly = 5, // Skip less than 0 only + ClampToNearest = 6, // Clamp the coordinate to nearest edge (0 or max value allowed on Y) + ClampToMinEdgeOnly = 7, // Clamp the negative coordinate to 0 only. Therefore, we expect Y to be always < MAX + ClampToMaxEdgeOnly = 8, // Clamp the coordinate to the max value allowed on Y only. We expect Y to be always >= 0 + ClampToBorder = 9, // Clamp to border which always has 0 value ClampToBorderMinEdgeOnly = 10, ClampToBorderMaxEdgeOnly = 11 }; enum class TensorSamplerAddressModeZ : int32_t { - Unknown = 0, - None = 1, // The user guarantees that the Y coordinate is always in-bound - Skip = 3, // Skip the read/write - SkipMinEdgeOnly = 4, // Skip greater than or equal to max only. The user guarantees that the Y coordinate is always >= 0 + Unknown = 0, + None = 1, // The user guarantees that the Y coordinate is always in-bound + Skip = 3, // Skip the read/write + SkipMinEdgeOnly = + 4, // Skip greater than or equal to max only. The user guarantees that the Y coordinate is always >= 0 SkipMaxEdgeOnly = 5, // Skip less than 0 only ClampToNearest = 6, // Clamp the coordinate to nearest edge (0 or max value allowed on Y) ClampToMinEdgeOnly = 7, // Clamp the negative coordinate to 0 only. Therefore, we expect Y to be always < MAX diff --git a/compute_kernel_writer/prototype/src/Kernel.cpp b/compute_kernel_writer/prototype/src/Kernel.cpp index 095ac879f1..6228ed17d0 100644 --- a/compute_kernel_writer/prototype/src/Kernel.cpp +++ b/compute_kernel_writer/prototype/src/Kernel.cpp @@ -23,24 +23,27 @@ */ #include "ckw/Kernel.h" + #include "ckw/TensorOperand.h" #include "ckw/types/GpuTargetLanguage.h" + #include "src/Prototype.h" namespace ckw { -Kernel::Kernel(GpuTargetLanguage language) - : Kernel{"unnamed", language} +Kernel::Kernel(GpuTargetLanguage language) : Kernel{"unnamed", language} { } Kernel::Kernel(const char *name, GpuTargetLanguage language) - : _name(name), _kernel(std::make_unique(language)), _operands{}, _tensor_id_operands{} + : _name(name), + _kernel(std::make_unique(language)), + _operands{}, + _tensor_id_operands{} { } - Kernel::~Kernel() { } @@ -50,7 +53,7 @@ const std::string &Kernel::name() const return _name; } -void Kernel::name(const std::string& name) +void Kernel::name(const std::string &name) { _name = name; } @@ -60,14 +63,14 @@ std::vector Kernel::arguments() const const auto impl_args = _kernel->arguments.tensor_argument_declarations(); - for(auto tensor_arg : impl_args) + for (auto tensor_arg : impl_args) { auto tensor = _tensor_id_operands.at(tensor_arg->format().id); arguments.push_back(*tensor); - for(auto component_arg : tensor_arg->component_declarations()) + for (auto component_arg : tensor_arg->component_declarations()) { - switch(component_arg) + switch (component_arg) { case TensorComponentType::OffsetFirstElement: arguments.push_back(tensor->offset_first_element_in_bytes()); diff --git a/compute_kernel_writer/prototype/src/KernelArgument.cpp b/compute_kernel_writer/prototype/src/KernelArgument.cpp index 2b4d7c8cee..24ace28eb3 100644 --- a/compute_kernel_writer/prototype/src/KernelArgument.cpp +++ b/compute_kernel_writer/prototype/src/KernelArgument.cpp @@ -23,14 +23,14 @@ */ #include "ckw/KernelArgument.h" + #include "ckw/Error.h" #include "ckw/TensorOperand.h" namespace ckw { -KernelArgument::KernelArgument(TensorOperand &tensor) - : _type(Type::TensorStorage), _id(tensor.info().id()) +KernelArgument::KernelArgument(TensorOperand &tensor) : _type(Type::TensorStorage), _id(tensor.info().id()) { _sub_id.tensor_storage_type = tensor.storage_type(); } diff --git a/compute_kernel_writer/prototype/src/KernelWriter.cpp b/compute_kernel_writer/prototype/src/KernelWriter.cpp index 5c9a16ee33..9f58d9fefa 100644 --- a/compute_kernel_writer/prototype/src/KernelWriter.cpp +++ b/compute_kernel_writer/prototype/src/KernelWriter.cpp @@ -23,9 +23,11 @@ */ #include "ckw/KernelWriter.h" + #include "ckw/Error.h" #include "ckw/TensorInfo.h" #include "ckw/TensorOperand.h" + #include "src/Prototype.h" #include @@ -38,7 +40,7 @@ namespace inline prototype::TensorInfo create_impl_tensor_info(const TensorInfo &info) { - return prototype::TensorInfo{ info.shape(), info.data_type(), info.data_layout(), info.id() }; + return prototype::TensorInfo{info.shape(), info.data_type(), info.data_layout(), info.id()}; } } // namespace @@ -86,7 +88,8 @@ int32_t KernelWriter::next_id_space() // Tensor and tile declaration // ================================================================================================= -TensorOperand &KernelWriter::declare_tensor_argument(const std::string &name, const TensorInfo &info, TensorStorageType storage_type) +TensorOperand & +KernelWriter::declare_tensor_argument(const std::string &name, const TensorInfo &info, TensorStorageType storage_type) { const auto var_name = generate_variable_name(name); @@ -120,13 +123,11 @@ TileOperand &KernelWriter::declare_tile_operand(std::unique_ptr ope auto &operand = _kernel->register_operand(std::move(operand_ptr)); const auto &name = operand.name(); - if(!operand.is_constant()) + if (!operand.is_constant()) { const auto &info = operand.tile_info(); - _impl->declare_tile( - name, - prototype::TileInfo(info.data_type(), info.width(), info.height())); + _impl->declare_tile(name, prototype::TileInfo(info.data_type(), info.width(), info.height())); } else { @@ -140,16 +141,15 @@ TileOperand &KernelWriter::declare_tile_operand(std::unique_ptr ope // Load and store // ================================================================================================= -void KernelWriter::op_load(TileOperand &tile, const TensorOperand &tensor, const TensorTileSampler &sampler, const TileOperand &dilation_y) +void KernelWriter::op_load(TileOperand &tile, + const TensorOperand &tensor, + const TensorTileSampler &sampler, + const TileOperand &dilation_y) { prototype::TensorOperand impl_tensor( tensor.name(), - prototype::GpuSampler{ - sampler.format(), - prototype::to_gpu_tensor_storage(tensor.storage_type()), - sampler.address_mode_x(), - sampler.address_mode_y(), - sampler.address_mode_z() }); + prototype::GpuSampler{sampler.format(), prototype::to_gpu_tensor_storage(tensor.storage_type()), + sampler.address_mode_x(), sampler.address_mode_y(), sampler.address_mode_z()}); auto impl_x = sampler.x().create_impl_operand(_impl.get()); auto impl_y = sampler.y().create_impl_operand(_impl.get()); @@ -167,12 +167,8 @@ void KernelWriter::op_load_indirect(TileOperand &tile, const TensorOperand &tens { prototype::TensorOperand impl_tensor( tensor.name(), - prototype::GpuSampler{ - sampler.format(), - prototype::to_gpu_tensor_storage(tensor.storage_type()), - sampler.address_mode_x(), - sampler.address_mode_y(), - sampler.address_mode_z() }); + prototype::GpuSampler{sampler.format(), prototype::to_gpu_tensor_storage(tensor.storage_type()), + sampler.address_mode_x(), sampler.address_mode_y(), sampler.address_mode_z()}); auto impl_x = sampler.x().create_impl_operand(_impl.get()); auto impl_y = sampler.y().create_impl_operand(_impl.get()); @@ -194,12 +190,8 @@ void KernelWriter::util_get_indirect_buffer(TileOperand &tile, { prototype::TensorOperand impl_tensor( tensor.name(), - prototype::GpuSampler{ - sampler.format(), - prototype::to_gpu_tensor_storage(tensor.storage_type()), - sampler.address_mode_x(), - sampler.address_mode_y(), - sampler.address_mode_z() }); + prototype::GpuSampler{sampler.format(), prototype::to_gpu_tensor_storage(tensor.storage_type()), + sampler.address_mode_x(), sampler.address_mode_y(), sampler.address_mode_z()}); auto impl_x = x.create_impl_operand(_impl.get()); auto impl_y = y.create_impl_operand(_impl.get()); @@ -215,12 +207,8 @@ void KernelWriter::op_store(TensorOperand &tensor, const TileOperand &tile, cons { prototype::TensorOperand impl_tensor( tensor.name(), - prototype::GpuSampler{ - sampler.format(), - prototype::to_gpu_tensor_storage(tensor.storage_type()), - sampler.address_mode_x(), - sampler.address_mode_y(), - sampler.address_mode_z() }); + prototype::GpuSampler{sampler.format(), prototype::to_gpu_tensor_storage(tensor.storage_type()), + sampler.address_mode_x(), sampler.address_mode_y(), sampler.address_mode_z()}); auto impl_src = tile.create_impl_operand(_impl.get()); auto impl_x = sampler.x().create_impl_operand(_impl.get()); auto impl_y = sampler.y().create_impl_operand(_impl.get()); @@ -250,7 +238,10 @@ void KernelWriter::op_cast_expression(const TileOperand &dst, const TileOperand _impl->op_cast_expression(impl_dst, impl_src, policy); } -void KernelWriter::op_binary_expression(const TileOperand &dst, const TileOperand &lhs, BinaryOp op, const TileOperand &rhs) +void KernelWriter::op_binary_expression(const TileOperand &dst, + const TileOperand &lhs, + BinaryOp op, + const TileOperand &rhs) { auto impl_lhs = lhs.create_impl_operand(_impl.get()); auto impl_rhs = rhs.create_impl_operand(_impl.get()); @@ -275,7 +266,10 @@ void KernelWriter::op_unary_elementwise_function(const TileOperand &dst, UnaryFu _impl->op_unary_elementwise_function(impl_dst, opcode, impl_src); } -void KernelWriter::op_binary_elementwise_function(const TileOperand &dst, BinaryFunction opcode, const TileOperand &first, const TileOperand &second) +void KernelWriter::op_binary_elementwise_function(const TileOperand &dst, + BinaryFunction opcode, + const TileOperand &first, + const TileOperand &second) { auto impl_dst = dst.create_impl_operand(_impl.get()); auto impl_first = first.create_impl_operand(_impl.get()); @@ -284,7 +278,11 @@ void KernelWriter::op_binary_elementwise_function(const TileOperand &dst, Binary _impl->op_binary_elementwise_function(impl_dst, opcode, impl_first, impl_second); } -void KernelWriter::op_ternary_elementwise_function(const TileOperand &dst, TernaryFunction opcode, const TileOperand &first, const TileOperand &second, const TileOperand &third) +void KernelWriter::op_ternary_elementwise_function(const TileOperand &dst, + TernaryFunction opcode, + const TileOperand &first, + const TileOperand &second, + const TileOperand &third) { auto impl_dst = dst.create_impl_operand(_impl.get()); auto impl_first = first.create_impl_operand(_impl.get()); @@ -305,7 +303,10 @@ void KernelWriter::op_if(const TileOperand &lhs, BinaryOp op, const TileOperand _impl->compound_statement_end(); } -void KernelWriter::op_else_if(const TileOperand &lhs, BinaryOp op, const TileOperand &rhs, const std::function &body) +void KernelWriter::op_else_if(const TileOperand &lhs, + BinaryOp op, + const TileOperand &rhs, + const std::function &body) { auto impl_lhs = lhs.create_impl_operand(_impl.get()); auto impl_rhs = rhs.create_impl_operand(_impl.get()); @@ -324,14 +325,21 @@ void KernelWriter::op_else(const std::function &body) _impl->compound_statement_end(); } -void KernelWriter::op_for_loop(const TileOperand &var_name, BinaryOp cond_op, const TileOperand &cond_value_name, const TileOperand &update_var_name, AssignmentOp update_op, const TileOperand &update_value_name, const std::function &body) +void KernelWriter::op_for_loop(const TileOperand &var_name, + BinaryOp cond_op, + const TileOperand &cond_value_name, + const TileOperand &update_var_name, + AssignmentOp update_op, + const TileOperand &update_value_name, + const std::function &body) { auto impl_var_name = var_name.create_impl_operand(_impl.get()); auto impl_cond_value_name = cond_value_name.create_impl_operand(_impl.get()); auto impl_update_var_name = update_var_name.create_impl_operand(_impl.get()); auto impl_update_value_name = update_value_name.create_impl_operand(_impl.get()); - _impl->op_for_loop_header(impl_var_name, cond_op, impl_cond_value_name, impl_update_var_name, update_op, impl_update_value_name); + _impl->op_for_loop_header(impl_var_name, cond_op, impl_cond_value_name, impl_update_var_name, update_op, + impl_update_value_name); _impl->compound_statement_begin(); body(); _impl->compound_statement_end(); diff --git a/compute_kernel_writer/prototype/src/OperandBase.cpp b/compute_kernel_writer/prototype/src/OperandBase.cpp index 59cf846cc7..e0617fdc06 100644 --- a/compute_kernel_writer/prototype/src/OperandBase.cpp +++ b/compute_kernel_writer/prototype/src/OperandBase.cpp @@ -27,8 +27,7 @@ namespace ckw { -OperandBase::OperandBase(const std::string &name) - : _name(name) +OperandBase::OperandBase(const std::string &name) : _name(name) { } diff --git a/compute_kernel_writer/prototype/src/Prototype.h b/compute_kernel_writer/prototype/src/Prototype.h index eb9d7198a9..433eef9e7b 100644 --- a/compute_kernel_writer/prototype/src/Prototype.h +++ b/compute_kernel_writer/prototype/src/Prototype.h @@ -25,12 +25,21 @@ #ifndef CKW_PROTOTYPE_SRC_PROTOTYPE_H #define CKW_PROTOTYPE_SRC_PROTOTYPE_H +#include "ckw/Error.h" +#include "ckw/TensorInfo.h" +#include "ckw/types/ConvertPolicy.h" +#include "ckw/types/DataType.h" +#include "ckw/types/Functions.h" +#include "ckw/types/GpuTargetLanguage.h" +#include "ckw/types/Operators.h" +#include "ckw/types/TensorSamplerTypes.h" + #include #include #include // assert (to be removed) #include #include -#include // int32_t +#include // int32_t #include #include // cout (to be removed) #include @@ -40,15 +49,6 @@ #include #include -#include "ckw/Error.h" -#include "ckw/TensorInfo.h" -#include "ckw/types/ConvertPolicy.h" -#include "ckw/types/DataType.h" -#include "ckw/types/Functions.h" -#include "ckw/types/GpuTargetLanguage.h" -#include "ckw/types/Operators.h" -#include "ckw/types/TensorSamplerTypes.h" - namespace ckw { namespace prototype @@ -83,21 +83,21 @@ enum class GpuExtensions struct TensorInfo { - TensorShape shape{ { 0 } }; - DataType data_type{ DataType::Unknown }; - TensorDataLayout data_layout{ TensorDataLayout::Nhwc }; - int32_t id{ -1 }; + TensorShape shape{{0}}; + DataType data_type{DataType::Unknown}; + TensorDataLayout data_layout{TensorDataLayout::Nhwc}; + int32_t id{-1}; }; struct ComponentAttribute { - GpuCompilationSpeed compilation_speed{ GpuCompilationSpeed::Fast }; - bool overwrite_tile{ true }; + GpuCompilationSpeed compilation_speed{GpuCompilationSpeed::Fast}; + bool overwrite_tile{true}; }; inline std::string data_type_to_cl_type(DataType dt) { - switch(dt) + switch (dt) { case DataType::Fp32: return "float"; @@ -125,7 +125,7 @@ inline std::string data_type_to_cl_type(DataType dt) inline int32_t width_to_cl_vector_size(int32_t width) { - switch(width) + switch (width) { case 1: return 1; @@ -160,7 +160,7 @@ inline std::string get_cl_data_type(DataType dt, int32_t width) std::string data_type; int32_t w = width_to_cl_vector_size(width); data_type += data_type_to_cl_type(dt); - if(w != 1) + if (w != 1) { data_type += std::to_string(w); } @@ -169,7 +169,7 @@ inline std::string get_cl_data_type(DataType dt, int32_t width) inline std::string to_opencl_store(int32_t vector_length) { - if(vector_length != 1) + if (vector_length != 1) { return "vstore" + std::to_string(vector_length) + "("; } @@ -185,24 +185,21 @@ struct TileInfo { } - TileInfo(DataType dt) - : dt(dt), w(1), h(1) + TileInfo(DataType dt) : dt(dt), w(1), h(1) { } - TileInfo(DataType dt, int32_t width) - : dt(dt), w(width), h(1) + TileInfo(DataType dt, int32_t width) : dt(dt), w(width), h(1) { } - TileInfo(DataType dt, int32_t width, int32_t height) - : dt(dt), w(width), h(height) + TileInfo(DataType dt, int32_t width, int32_t height) : dt(dt), w(width), h(height) { } - DataType dt{ DataType::Unknown }; // Data type of the tile - int32_t w{ 0 }; // Width (i.e. c0 - portion of the channels) - int32_t h{ 0 }; // Height (i.e. s0 - portion of the spatial dimensions) + DataType dt{DataType::Unknown}; // Data type of the tile + int32_t w{0}; // Width (i.e. c0 - portion of the channels) + int32_t h{0}; // Height (i.e. s0 - portion of the spatial dimensions) }; inline std::ostream &operator<<(std::ostream &o, const TileInfo &a) @@ -213,14 +210,14 @@ inline std::ostream &operator<<(std::ostream &o, const TileInfo &a) struct DataTypeAsString { - std::string str{ "" }; - DataType dt{ DataType::Unknown }; - int32_t size{ 1 }; + std::string str{""}; + DataType dt{DataType::Unknown}; + int32_t size{1}; }; struct ValueAsString { - std::string str{ "" }; + std::string str{""}; DataTypeAsString type{}; }; @@ -276,8 +273,8 @@ public: virtual bool need_declaration() const = 0; protected: - TileInfo _format{}; // Tile format - std::string _basename{ "" }; // Tile name + TileInfo _format{}; // Tile format + std::string _basename{""}; // Tile name }; // A tile is a collection of variables used to express a 2D data. The variables are vectors in the GPU context. @@ -329,7 +326,7 @@ public: t.type.size = 1; // Check required because if the width has only one element, we cannot use .s0 - if(_format.w != 1) + if (_format.w != 1) { // Automatic broadcasting t.str += ".s" + std::to_string(x); @@ -360,10 +357,10 @@ public: t.type.dt = _format.dt; t.type.size = width; - if(_format.w != 1) + if (_format.w != 1) { t.str += ".s"; - for(int i = 0; i < width; ++i) + for (int i = 0; i < width; ++i) { t.str += to_scalar_hex(x_start + i); } @@ -374,7 +371,7 @@ public: std::vector underlying_source_variables() const override { std::vector vars; - for(int32_t y = 0; y < _format.h; ++y) + for (int32_t y = 0; y < _format.h; ++y) { ValueAsString t; t.str = build_variable_name(y); @@ -401,7 +398,7 @@ private: { std::string var_name = _basename; - if(_format.h == 1) + if (_format.h == 1) { return var_name; } @@ -416,7 +413,7 @@ private: std::string to_scalar_hex(int32_t x) const { - switch(x) + switch (x) { case 0: case 1: @@ -461,9 +458,9 @@ public: _data = std::vector>(_format.h, std::vector(_format.w)); - for(int32_t y = 0; y < _format.h; ++y) + for (int32_t y = 0; y < _format.h; ++y) { - for(int32_t x = 0; x < _format.w; ++x) + for (int32_t x = 0; x < _format.w; ++x) { _data[y][x] = in[y][x]; } @@ -501,20 +498,20 @@ public: t.type.dt = _format.dt; t.type.size = width; - if(width > 1) + if (width > 1) { t.str += "((" + get_cl_data_type(_format.dt, width) + ")("; } int32_t x = x_start; - for(; x < width - 1; ++x) + for (; x < width - 1; ++x) { t.str += scalar(x, y).str; t.str += ", "; } t.str += scalar(x, y).str; - if(width > 1) + if (width > 1) { t.str += "))"; } @@ -526,9 +523,9 @@ public: { std::vector vars; - for(int32_t y = 0; y < _format.h; ++y) + for (int32_t y = 0; y < _format.h; ++y) { - for(int32_t x = 0; x < _format.w; ++x) + for (int32_t x = 0; x < _format.w; ++x) { ValueAsString t; t.str = _data[y][x]; @@ -572,7 +569,7 @@ enum class TensorComponentGroup : int32_t inline std::string to_string(TensorComponentType x) { - switch(x) + switch (x) { case TensorComponentType::Unknown: return "Unknown"; @@ -672,7 +669,7 @@ enum class GpuTensorStorage : int32_t inline GpuTensorStorage to_gpu_tensor_storage(TensorStorageType s) { - switch(s) + switch (s) { case TensorStorageType::Unknown: return GpuTensorStorage::Unknown; @@ -694,7 +691,7 @@ inline GpuTensorStorage to_gpu_tensor_storage(TensorStorageType s) inline TensorStorageType to_tensor_storage(GpuTensorStorage s) { - switch(s) + switch (s) { case GpuTensorStorage::Unknown: return TensorStorageType::Unknown; @@ -755,23 +752,23 @@ public: // Methods to override std::string component(TensorComponentType x) override { - if((static_cast(x) & static_cast(TensorComponentGroup::Constant))) + if ((static_cast(x) & static_cast(TensorComponentGroup::Constant))) { int32_t idx = static_cast(x) & static_cast(TensorComponentIndex::IndexMask); return std::to_string(idx - 1); } - if(_return_by_value_when_possible) + if (_return_by_value_when_possible) { - if((static_cast(x) & static_cast(TensorComponentGroup::Dimension))) + if ((static_cast(x) & static_cast(TensorComponentGroup::Dimension))) { int32_t idx = static_cast(x) & static_cast(TensorComponentIndex::IndexMask); return std::to_string(_format.shape[idx]); } - if((static_cast(x) & static_cast(TensorComponentGroup::FoldedDimension))) + if ((static_cast(x) & static_cast(TensorComponentGroup::FoldedDimension))) { - switch(x) + switch (x) { case TensorComponentType::Dim1xDim2: return std::to_string(_format.shape[1] * _format.shape[2]); @@ -784,7 +781,7 @@ public: } } - if(std::find(_components_required.begin(), _components_required.end(), x) == _components_required.end()) + if (std::find(_components_required.begin(), _components_required.end(), x) == _components_required.end()) { _components_required.push_back(x); } @@ -804,7 +801,7 @@ public: std::string storage(GpuTensorStorage x) override { - if(std::find(_storage_required.begin(), _storage_required.end(), x) == _storage_required.end()) + if (std::find(_storage_required.begin(), _storage_required.end(), x) == _storage_required.end()) { _storage_required.push_back(x); } @@ -814,7 +811,7 @@ public: std::string storage_type_declaration(GpuTensorStorage x) const override { - switch(x) + switch (x) { case GpuTensorStorage::BufferUint8Ptr: return "__global uchar*"; @@ -848,7 +845,7 @@ private: { std::string var_name = _basename; - switch(x) + switch (x) { case GpuTensorStorage::BufferUint8Ptr: return var_name + "_ptr"; @@ -870,7 +867,7 @@ private: { std::string var_name = _basename; - switch(x) + switch (x) { case TensorComponentType::OffsetFirstElement: return var_name + "_offset_first_element"; @@ -900,9 +897,9 @@ private: return var_name; } - bool _return_by_value_when_possible{ false }; - std::vector _storage_required{}; - std::vector _components_required{}; + bool _return_by_value_when_possible{false}; + std::vector _storage_required{}; + std::vector _components_required{}; }; /** @@ -930,16 +927,16 @@ public: struct RegistryTileTableEntry { - RegistryLevel registry_level{ 0 }; - std::unique_ptr tile_object{ nullptr }; + RegistryLevel registry_level{0}; + std::unique_ptr tile_object{nullptr}; }; struct RegistryTileTypeTableEntry { - RegistryTileType tile_type{ RegistryTileType::Tile }; + RegistryTileType tile_type{RegistryTileType::Tile}; RegistryTileName tile_name{}; - RegistryIdSpace registry_idspace{ 0 }; - RegistryLevel registry_level{ 0 }; + RegistryIdSpace registry_idspace{0}; + RegistryLevel registry_level{0}; }; using RegistryTileTable = std::map>; @@ -1002,7 +999,7 @@ public: auto it = _frags.begin(); - while(it != _frags.end()) + while (it != _frags.end()) { x.push_back(it->first); @@ -1026,7 +1023,7 @@ public: // First check whether a tile with the same name exists IVectorTile *result = (*this)[key_var_name]; assert(result == nullptr); - if(result == nullptr) + if (result == nullptr) { std::unique_ptr tile = std::make_unique(var_name, format); @@ -1058,7 +1055,7 @@ public: // First check whether a tile with the same name exists IVectorTile *result = (*this)[key_var_name]; assert(result == nullptr); - if(result == nullptr) + if (result == nullptr) { std::unique_ptr tile = std::make_unique(var_name, format); _frags[key_IdSpace][key_var_name].tile_object = std::move(tile); @@ -1090,7 +1087,7 @@ public: // First check whether a tile with the same name exists IVectorTile *result = (*this)[key_var_name]; assert(result == nullptr); - if(result == nullptr) + if (result == nullptr) { std::unique_ptr tile = std::make_unique(in, dt); _frags[key_IdSpace][key_var_name].tile_object = std::move(tile); @@ -1123,7 +1120,7 @@ public: // First check whether a tile with the same name exists IVectorTile *result = (*this)[key_var_name]; assert(result == nullptr); - if(result == nullptr) + if (result == nullptr) { std::unique_ptr tile = std::make_unique(in, dt); _frags[key_IdSpace][key_var_name].tile_object = std::move(tile); @@ -1153,10 +1150,10 @@ public: IVectorTile *result = nullptr; auto search_IdSpace = _frags.find(key_IdSpace); - if(search_IdSpace != _frags.end()) + if (search_IdSpace != _frags.end()) { auto search_tile = _frags[key_IdSpace].find(key_var_name); - if(search_tile != _frags[key_IdSpace].end()) + if (search_tile != _frags[key_IdSpace].end()) { result = search_tile->second.tile_object.get(); assert(result != nullptr); @@ -1224,7 +1221,7 @@ public: std::map::iterator it = _frag_types[IdSpace].begin(); - while(it != _frag_types[IdSpace].end()) + while (it != _frag_types[IdSpace].end()) { // The following line should be enabled. However, we cannot at this stage // because it used to retrieve the output tile produced by each component. @@ -1259,9 +1256,9 @@ public: // Remove all variables in the local scope std::map::iterator it = _frags[_IdSpace].begin(); - while(it != _frags[_IdSpace].end()) + while (it != _frags[_IdSpace].end()) { - if(it->second.registry_level == _registry_level) + if (it->second.registry_level == _registry_level) { it = _frags[_IdSpace].erase(it); } @@ -1273,9 +1270,9 @@ public: std::map::iterator it_type = _frag_types[_IdSpace].begin(); - while(it_type != _frag_types[_IdSpace].end()) + while (it_type != _frag_types[_IdSpace].end()) { - if(it_type->second.registry_level == _registry_level) + if (it_type->second.registry_level == _registry_level) { it_type = _frag_types[_IdSpace].erase(it_type); } @@ -1302,7 +1299,7 @@ private: std::string generate_tile_name(const std::string &name) { assert(_IdSpace >= 0); - if(_registry_level == 0) + if (_registry_level == 0) { return "_G" + std::to_string(_IdSpace) + "_" + name; } @@ -1314,10 +1311,10 @@ private: RegistryTileTable _frags{}; RegistryTileTypeTable _frag_types{}; - RegistryLevel _registry_level{ 0 }; - RegistryIdSpace _IdSpace{ -1 }; - int32_t _anonymous_frag_count{ 0 }; // Counter used to create the anonymous tiles - GpuTargetLanguage _language{ GpuTargetLanguage::Unknown }; // Gpu programming language + RegistryLevel _registry_level{0}; + RegistryIdSpace _IdSpace{-1}; + int32_t _anonymous_frag_count{0}; // Counter used to create the anonymous tiles + GpuTargetLanguage _language{GpuTargetLanguage::Unknown}; // Gpu programming language }; using TensorEntry = std::unique_ptr; @@ -1388,7 +1385,7 @@ public: auto it = _refs.begin(); - while(it != _refs.end()) + while (it != _refs.end()) { x.push_back(it->first); @@ -1420,12 +1417,12 @@ public: // Check whether a tensor with that tensorID exists auto result = _tensor_arguments.find(tensor_id); - if(result == _tensor_arguments.end()) + if (result == _tensor_arguments.end()) { // It means that we haven't added a tensor with that tensor_id yet. Create a IGpuTensorArgument before creating the reference - std::unique_ptr arg = std::make_unique(var_name, x, - return_by_value_when_possible); - _tensor_arguments[tensor_id] = std::move(arg); + std::unique_ptr arg = + std::make_unique(var_name, x, return_by_value_when_possible); + _tensor_arguments[tensor_id] = std::move(arg); } _refs[key_IdSpace][key_var_name] = tensor_id; @@ -1445,15 +1442,15 @@ public: IGpuTensorArgument *result = nullptr; auto search_IdSpace = _refs.find(key_IdSpace); - if(search_IdSpace != _refs.end()) + if (search_IdSpace != _refs.end()) { auto search_tensor_id = _refs[key_IdSpace].find(key_var_name); - if(search_tensor_id != _refs[key_IdSpace].end()) + if (search_tensor_id != _refs[key_IdSpace].end()) { const int32_t tensor_id = search_tensor_id->second; auto search_tensor_argument = _tensor_arguments.find(tensor_id); - if(search_tensor_argument != _tensor_arguments.end()) + if (search_tensor_argument != _tensor_arguments.end()) { result = search_tensor_argument->second.get(); } @@ -1475,7 +1472,7 @@ public: auto it = _tensor_arguments.begin(); - while(it != _tensor_arguments.end()) + while (it != _tensor_arguments.end()) { args.push_back(it->second.get()); it++; @@ -1499,7 +1496,7 @@ public: auto search_IdSpace = _refs.find(key_IdSpace); - if(search_IdSpace != _refs.end()) + if (search_IdSpace != _refs.end()) { auto search_tensor_id = _refs[key_IdSpace].find(key_var_name); @@ -1527,7 +1524,7 @@ public: auto search_IdSpace = _refs.find(key_IdSpace); - if(search_IdSpace != _refs.end()) + if (search_IdSpace != _refs.end()) { auto search_tensor_id = _refs[key_IdSpace].find(key_var_name); @@ -1550,8 +1547,8 @@ private: std::map _tensor_arguments{}; std::map> _refs{}; - int32_t _IdSpace{ -1 }; - GpuTargetLanguage _language{ GpuTargetLanguage::Unknown }; // Gpu programming language + int32_t _IdSpace{-1}; + GpuTargetLanguage _language{GpuTargetLanguage::Unknown}; // Gpu programming language }; enum class OpType : int32_t @@ -1563,7 +1560,7 @@ enum class OpType : int32_t inline std::string to_string(AssignmentOp op) { - switch(op) + switch (op) { case AssignmentOp::Decrement: return "-="; @@ -1577,7 +1574,7 @@ inline std::string to_string(AssignmentOp op) inline std::string to_string(UnaryOp op) { - switch(op) + switch (op) { case UnaryOp::LogicalNot: return "!"; @@ -1593,7 +1590,7 @@ inline std::string to_string(UnaryOp op) inline std::string to_string(BinaryOp op) { - switch(op) + switch (op) { case BinaryOp::Add: return "+"; @@ -1629,7 +1626,7 @@ inline std::string to_string(BinaryOp op) inline std::string binary_op_string(BinaryOp op) { - switch(op) + switch (op) { case BinaryOp::Add: return "add"; @@ -1698,13 +1695,12 @@ struct ScalarTileCoord { } - ScalarTileCoord(int32_t x0, int32_t y0) - : x(x0), y(y0) + ScalarTileCoord(int32_t x0, int32_t y0) : x(x0), y(y0) { } - int32_t x{ -1 }; - int32_t y{ -1 }; + int32_t x{-1}; + int32_t y{-1}; }; /** @@ -1768,7 +1764,7 @@ public: private: std::string _str{}; - OperandType _type{ OperandType::Unknown }; + OperandType _type{OperandType::Unknown}; ScalarTileCoord _coord{}; }; @@ -1778,16 +1774,15 @@ struct GpuSampler { GpuSampler() = default; - TensorSamplerFormat format{ TensorSamplerFormat::Unknown }; - GpuSamplerTensorStorage storage{ GpuSamplerTensorStorage::Unknown }; - TensorSamplerAddressModeX address_mode_x{ TensorSamplerAddressModeX::Unknown }; - TensorSamplerAddressModeY address_mode_y{ TensorSamplerAddressModeY::Unknown }; - TensorSamplerAddressModeZ address_mode_z{ TensorSamplerAddressModeZ::Unknown }; + TensorSamplerFormat format{TensorSamplerFormat::Unknown}; + GpuSamplerTensorStorage storage{GpuSamplerTensorStorage::Unknown}; + TensorSamplerAddressModeX address_mode_x{TensorSamplerAddressModeX::Unknown}; + TensorSamplerAddressModeY address_mode_y{TensorSamplerAddressModeY::Unknown}; + TensorSamplerAddressModeZ address_mode_z{TensorSamplerAddressModeZ::Unknown}; }; -inline GpuSampler -create_simple_sampler(const TensorInfo *tensor_info_id, GpuSampler sampler, int32_t step_x, int32_t step_y, - int32_t step_z) +inline GpuSampler create_simple_sampler( + const TensorInfo *tensor_info_id, GpuSampler sampler, int32_t step_x, int32_t step_y, int32_t step_z) { CKW_UNUSED(step_x, step_y, step_z); @@ -1804,7 +1799,7 @@ create_simple_sampler(const TensorInfo *tensor_info_id, GpuSampler sampler, int3 int32_t dim_y = 0; int32_t dim_z = 0; - switch(sampler.format) + switch (sampler.format) { case TensorSamplerFormat::C_W_H: dim_x = tensor[0]; @@ -1822,19 +1817,19 @@ create_simple_sampler(const TensorInfo *tensor_info_id, GpuSampler sampler, int3 break; } - if(dim_x == 1) + if (dim_x == 1) { assert(step_x == 1); dst_sampler.address_mode_x = TensorSamplerAddressModeX::None; } - if(dim_y == 1) + if (dim_y == 1) { assert(step_y == 1); dst_sampler.address_mode_y = TensorSamplerAddressModeY::None; } - if(dim_z == 1) + if (dim_z == 1) { assert(step_z == 1); dst_sampler.address_mode_z = TensorSamplerAddressModeZ::None; @@ -1858,8 +1853,12 @@ public: * @param[in] step_y Increment step in the Y direction. Not necessarily it is the same of m0 of tile! * @param[in] step_z Increment step in the Z direction. Not necessarily it is the same of d0 of tile! */ - void initialize(const TensorInfo *tensor_info_id, GpuSamplerTensorStorage tensor_storage, - TensorSamplerFormat tensor_format, int32_t step_x, int32_t step_y, int32_t step_z) + void initialize(const TensorInfo *tensor_info_id, + GpuSamplerTensorStorage tensor_storage, + TensorSamplerFormat tensor_format, + int32_t step_x, + int32_t step_y, + int32_t step_z) { assert(_is_initialized == false); @@ -1908,13 +1907,13 @@ private: sampler.address_mode_z = TensorSamplerAddressModeZ::None; // In the case of texture, we do not need any special checks at the border - if(tensor_storage == GpuSamplerTensorStorage::BufferUint8Ptr) + if (tensor_storage == GpuSamplerTensorStorage::BufferUint8Ptr) { int32_t dim_x = 0; int32_t dim_y = 0; int32_t dim_z = 0; - switch(tensor_format) + switch (tensor_format) { case TensorSamplerFormat::C_W_H: dim_x = tensor[0]; @@ -1932,17 +1931,17 @@ private: break; } - if((dim_x % _step_x) != 0 && dim_x != 1) + if ((dim_x % _step_x) != 0 && dim_x != 1) { sampler.address_mode_x = TensorSamplerAddressModeX::OverlappingMin; } - if((dim_y % _step_y) != 0 && dim_y != 1) + if ((dim_y % _step_y) != 0 && dim_y != 1) { sampler.address_mode_y = TensorSamplerAddressModeY::ClampToMaxEdgeOnly; } - if((dim_z % _step_z) != 0 && dim_z != 1) + if ((dim_z % _step_z) != 0 && dim_z != 1) { sampler.address_mode_z = TensorSamplerAddressModeZ::ClampToMaxEdgeOnly; } @@ -1952,11 +1951,11 @@ private: } GpuSampler _sampler{}; // GpuSampler - int32_t _step_x{ 1 }; - int32_t _step_y{ 1 }; - int32_t _step_z{ 1 }; - const TensorInfo *_tensor_info_id{ nullptr }; - bool _is_initialized{ false }; + int32_t _step_x{1}; + int32_t _step_y{1}; + int32_t _step_z{1}; + const TensorInfo *_tensor_info_id{nullptr}; + bool _is_initialized{false}; }; /** @@ -1965,8 +1964,7 @@ private: class TensorOperand { public: - TensorOperand(const std::string &val, GpuSampler sampler) - : _str(val), _sampler(sampler) + TensorOperand(const std::string &val, GpuSampler sampler) : _str(val), _sampler(sampler) { } @@ -2050,9 +2048,9 @@ private: struct LWS { - int32_t x{ 1 }; - int32_t y{ 1 }; - int32_t z{ 1 }; + int32_t x{1}; + int32_t y{1}; + int32_t z{1}; }; /** @@ -2062,8 +2060,7 @@ struct LWS class OperandUnpacker { public: - OperandUnpacker(GpuTileRegistry &tiles, GpuTensorArgumentRegistry &arguments) - : _tiles(tiles), _arguments(arguments) + OperandUnpacker(GpuTileRegistry &tiles, GpuTensorArgumentRegistry &arguments) : _tiles(tiles), _arguments(arguments) { // Increase the level of the stack to allocate possible temporary tiles _tiles.increment_registry_level(); @@ -2078,26 +2075,26 @@ public: IVectorTile *unpack(const Operand &src) { // Get the tile - if(src.type() == OperandType::Tile) + if (src.type() == OperandType::Tile) { assert(_tiles.has_tile(src.value())); return _tiles[src.value()]; } // Create an anonymous tile with a constant - else if(static_cast(src.type()) & 0x00001000) + else if (static_cast(src.type()) & 0x00001000) { - if(src.type() == OperandType::ScalarTile) + if (src.type() == OperandType::ScalarTile) { ScalarTileCoord coord = src.scalar_tile_coordinate(); assert(_tiles.has_tile(src.value())); assert(coord.x >= 0); assert(coord.y >= 0); auto val = _tiles[src.value()]->scalar(coord.x, coord.y); - return _tiles.insert({ { { val.str } } }, val.type.dt); + return _tiles.insert({{{val.str}}}, val.type.dt); } else { - return _tiles.insert({ { { src.value() } } }, to_tile_data_type(src.type())); + return _tiles.insert({{{src.value()}}}, to_tile_data_type(src.type())); } } // Create an anonymous tile with the tensor component @@ -2107,7 +2104,7 @@ public: auto x = _arguments[src.value()]; const std::string val = x->component(to_tensor_component(src.type())); const DataType dt = x->component_data_type(); - return _tiles.insert({ { { val } } }, dt); + return _tiles.insert({{{val}}}, dt); } } @@ -2119,7 +2116,7 @@ private: TensorComponentType to_tensor_component(OperandType x) { - switch(x) + switch (x) { case OperandType::TensorDim0: return TensorComponentType::Dim0; @@ -2163,8 +2160,7 @@ private: class TensorOperandUnpacker { public: - TensorOperandUnpacker(GpuTensorArgumentRegistry &arguments) - : _arguments(arguments){}; + TensorOperandUnpacker(GpuTensorArgumentRegistry &arguments) : _arguments(arguments){}; IGpuTensorArgument *unpack(const TensorOperand &src) { @@ -2191,9 +2187,11 @@ struct GpuKernel std::string config_id{}; // Unique id, required for the tuning stage std::vector list_lws{}; // LWS to test, required for the tuning stage // Dispatch stage - GpuOutputSampler output_sampler{}; // GpuOutputSampler, required for the dispatch stage - std::vector> list_tensor_storages; // List of tensor storages, required for the dispatch stage - std::vector> list_tensor_components; // List of tensor components (width, stride,..), required for the dispatch stage) + GpuOutputSampler output_sampler{}; // GpuOutputSampler, required for the dispatch stage + std::vector> + list_tensor_storages; // List of tensor storages, required for the dispatch stage + std::vector> + list_tensor_components; // List of tensor components (width, stride,..), required for the dispatch stage) }; // Generate all extension pragmas (hardcoded for now) @@ -2234,13 +2232,13 @@ inline std::string generate_code(GpuKernelWriterDataHolder &in, const std::strin auto tensor_args = in.arguments.tensor_argument_declarations(); - for(auto &i : tensor_args) + for (auto &i : tensor_args) { // For each tensor used, get the storage and tensor components auto storages = i->storage_declarations(); auto components = i->component_declarations(); - for(auto &y : storages) + for (auto &y : storages) { std::string str; str += i->storage_type_declaration(y); @@ -2249,7 +2247,7 @@ inline std::string generate_code(GpuKernelWriterDataHolder &in, const std::strin arg_str.push_back(str); } - for(auto &y : components) + for (auto &y : components) { std::string str; str += i->component_type_declaration(); @@ -2259,10 +2257,10 @@ inline std::string generate_code(GpuKernelWriterDataHolder &in, const std::strin } } - for(size_t i = 0; i < arg_str.size(); ++i) + for (size_t i = 0; i < arg_str.size(); ++i) { code += arg_str[i]; - if(i + 1 < arg_str.size()) + if (i + 1 < arg_str.size()) { code += ",\n"; } @@ -2284,13 +2282,12 @@ inline std::string generate_code(GpuKernelWriterDataHolder &in, const std::strin class GpuTensor3dMapper { public: - GpuTensor3dMapper(IGpuTensorArgument *tensor, GpuSampler sampler) - : _sampler(sampler), _tensor(tensor){}; + GpuTensor3dMapper(IGpuTensorArgument *tensor, GpuSampler sampler) : _sampler(sampler), _tensor(tensor){}; std::string tensor_component_x() const { const auto format = _sampler.format; - switch(format) + switch (format) { case TensorSamplerFormat::C_WH_1: case TensorSamplerFormat::C_W_H: @@ -2305,7 +2302,7 @@ public: std::string tensor_component_y() const { const auto format = _sampler.format; - switch(format) + switch (format) { case TensorSamplerFormat::C_WH_1: return _tensor->component(TensorComponentType::Dim1xDim2); @@ -2321,7 +2318,7 @@ public: std::string tensor_component_z() const { const auto format = _sampler.format; - switch(format) + switch (format) { case TensorSamplerFormat::C_WH_1: return "1"; @@ -2337,7 +2334,7 @@ public: std::string tensor_component_stride_y() const { const auto format = _sampler.format; - switch(format) + switch (format) { case TensorSamplerFormat::C_WH_1: case TensorSamplerFormat::C_W_H: @@ -2352,7 +2349,7 @@ public: std::string tensor_component_stride_z() const { const auto format = _sampler.format; - switch(format) + switch (format) { case TensorSamplerFormat::C_WH_1: return "0"; @@ -2368,7 +2365,7 @@ public: std::string tensor_component_stride_batch() const { const auto format = _sampler.format; - switch(format) + switch (format) { case TensorSamplerFormat::C_WH_1: case TensorSamplerFormat::C_W_H: @@ -2384,7 +2381,7 @@ public: { auto t = _tensor->format(); const auto format = _sampler.format; - switch(format) + switch (format) { case TensorSamplerFormat::C_WH_1: case TensorSamplerFormat::C_W_H: @@ -2400,7 +2397,7 @@ public: { auto t = _tensor->format(); const auto format = _sampler.format; - switch(format) + switch (format) { case TensorSamplerFormat::C_WH_1: return (t.shape[1] * t.shape[2]) == 1; @@ -2417,7 +2414,7 @@ public: { auto t = _tensor->format(); const auto format = _sampler.format; - switch(format) + switch (format) { case TensorSamplerFormat::C_WH_1: return true; @@ -2434,7 +2431,7 @@ public: { auto t = _tensor->format(); const auto format = _sampler.format; - switch(format) + switch (format) { case TensorSamplerFormat::C_WH_1: case TensorSamplerFormat::C_W_H: @@ -2463,7 +2460,7 @@ private: struct GpuKernelWriterAttribute { - bool return_tensor_component_by_value{ false }; + bool return_tensor_component_by_value{false}; }; enum class RoundingMode @@ -2489,7 +2486,8 @@ public: virtual void declare_tile(const std::string &name, const TileInfo &info) = 0; - virtual void declare_const_tile(const std::string &name, const std::vector> &in, DataType dt) = 0; + virtual void + declare_const_tile(const std::string &name, const std::vector> &in, DataType dt) = 0; virtual void write_text(const std::string &x) = 0; @@ -2498,48 +2496,82 @@ public: virtual void compound_statement_end() = 0; // Operations - virtual void op_get_global_id(const Operand &dst_var, int32_t dim) = 0; + virtual void op_get_global_id(const Operand &dst_var, int32_t dim) = 0; - virtual void op_get_global_coord(const Operand &dst, const Operand &step, const TensorOperand &tensor, int32_t dim) = 0; + virtual void + op_get_global_coord(const Operand &dst, const Operand &step, const TensorOperand &tensor, int32_t dim) = 0; - virtual void op_get_global_batch(const Operand &dst, const TensorOperand &tensor) = 0; + virtual void op_get_global_batch(const Operand &dst, const TensorOperand &tensor) = 0; - virtual void op_get_global_size(const Operand &dst_var, int32_t dim) = 0; + virtual void op_get_global_size(const Operand &dst_var, int32_t dim) = 0; - virtual void op_unary_expression(const Operand &dst, UnaryOp op, const Operand &src) = 0; + virtual void op_unary_expression(const Operand &dst, UnaryOp op, const Operand &src) = 0; - virtual void op_binary_expression(const Operand &dst, const Operand &lhs, BinaryOp op, const Operand &rhs) = 0; + virtual void op_binary_expression(const Operand &dst, const Operand &lhs, BinaryOp op, const Operand &rhs) = 0; - virtual void op_assign(const Operand &dst_name, const Operand &src_name) = 0; + virtual void op_assign(const Operand &dst_name, const Operand &src_name) = 0; - virtual void op_unary_elementwise_function(const Operand &dst_name, UnaryFunction func, const Operand &src_name) = 0; + virtual void + op_unary_elementwise_function(const Operand &dst_name, UnaryFunction func, const Operand &src_name) = 0; - virtual void op_binary_elementwise_function(const Operand &dst_name, BinaryFunction func, const Operand &first_name, const Operand &second_name) = 0; + virtual void op_binary_elementwise_function(const Operand &dst_name, + BinaryFunction func, + const Operand &first_name, + const Operand &second_name) = 0; - virtual void op_ternary_elementwise_function(const Operand &dst_name, TernaryFunction func, const Operand &first_name, const Operand &second_name, const Operand &third_name) = 0; + virtual void op_ternary_elementwise_function(const Operand &dst_name, + TernaryFunction func, + const Operand &first_name, + const Operand &second_name, + const Operand &third_name) = 0; - virtual void op_if_header(const Operand &lhs, BinaryOp op, const Operand &rhs) = 0; + virtual void op_if_header(const Operand &lhs, BinaryOp op, const Operand &rhs) = 0; - virtual void op_else_if_header(const Operand &lhs, BinaryOp op, const Operand &rhs) = 0; + virtual void op_else_if_header(const Operand &lhs, BinaryOp op, const Operand &rhs) = 0; - virtual void op_else_header() = 0; + virtual void op_else_header() = 0; - virtual void op_for_loop_header(const Operand &var_name, BinaryOp cond_op, const Operand &cond_value, const Operand &update_var, AssignmentOp update_op, const Operand &update_value) = 0; + virtual void op_for_loop_header(const Operand &var_name, + BinaryOp cond_op, + const Operand &cond_value, + const Operand &update_var, + AssignmentOp update_op, + const Operand &update_value) = 0; - virtual void op_load_indirect(const TensorOperand &tensor, const Operand &dst, const Operand &x, const Operand &y_indirect, const Operand &z, const Operand &b = Operand("0", OperandType::ScalarInt32)) = 0; + virtual void op_load_indirect(const TensorOperand &tensor, + const Operand &dst, + const Operand &x, + const Operand &y_indirect, + const Operand &z, + const Operand &b = Operand("0", OperandType::ScalarInt32)) = 0; - virtual void op_load_immediate(const TensorOperand &tensor, const Operand &dst, const Operand &x, const Operand &y, const Operand &z, const Operand &b = Operand("0", OperandType::ScalarInt32), const Operand &dilation_y = Operand("1", OperandType::ScalarInt32)) = 0; + virtual void op_load_immediate(const TensorOperand &tensor, + const Operand &dst, + const Operand &x, + const Operand &y, + const Operand &z, + const Operand &b = Operand("0", OperandType::ScalarInt32), + const Operand &dilation_y = Operand("1", OperandType::ScalarInt32)) = 0; - virtual void op_store_immediate(const TensorOperand &tensor, const Operand &src, const Operand &x, const Operand &y, const Operand &z, const Operand &b = Operand("0", OperandType::ScalarInt32)) = 0; + virtual void op_store_immediate(const TensorOperand &tensor, + const Operand &src, + const Operand &x, + const Operand &y, + const Operand &z, + const Operand &b = Operand("0", OperandType::ScalarInt32)) = 0; - virtual void op_cast_expression(const Operand &dst, const Operand &src, ConvertPolicy policy) = 0; + virtual void op_cast_expression(const Operand &dst, const Operand &src, ConvertPolicy policy) = 0; - virtual void op_return() = 0; + virtual void op_return() = 0; // Utils // It is the process of converting - virtual void util_get_indirect_buffer(const Operand &dst, const TensorOperand &tensor, const Operand &x, - const Operand &y, const Operand &x_off, const Operand &y_off) = 0; + virtual void util_get_indirect_buffer(const Operand &dst, + const TensorOperand &tensor, + const Operand &x, + const Operand &y, + const Operand &x_off, + const Operand &y_off) = 0; }; enum class GpuLoadStoreType @@ -2586,12 +2618,11 @@ public: ClLoadStoreBufferHelperWriter &operator=(const ClLoadStoreBufferHelperWriter &) = default; - static bool - validate(IGpuKernelWriter *x, GpuTensor3dMapper mapper, GpuLoadStoreType type, IVectorTile *dst) + static bool validate(IGpuKernelWriter *x, GpuTensor3dMapper mapper, GpuLoadStoreType type, IVectorTile *dst) { CKW_UNUSED(x, type, dst); - if(mapper.gpu_sampler().storage != GpuSamplerTensorStorage::BufferUint8Ptr) + if (mapper.gpu_sampler().storage != GpuSamplerTensorStorage::BufferUint8Ptr) { return false; } @@ -2675,10 +2706,10 @@ public: out_of_bound_finalize_y(dst); // The left over load/store will be written in the finalize stage - if(_ls_width_part.size() != 0) + if (_ls_width_part.size() != 0) { int32_t w = 0; - for(auto &p : _ls_width_part) + for (auto &p : _ls_width_part) { const std::string dst0 = _dst->vector(w, p, idx_y).str; const std::string coord_x = _coord_x + " + " + std::to_string(w); @@ -2698,8 +2729,8 @@ public: } private: - IVectorTile *_dst{ nullptr }; - int32_t _ls_width_full{ 0 }; + IVectorTile *_dst{nullptr}; + int32_t _ls_width_full{0}; std::vector _ls_width_part{}; std::vector, std::string>> _leftovers_x{}; std::string _coord_x{}; @@ -2709,13 +2740,13 @@ private: void out_of_bound_initialize_x(std::string &coord) { - if(_mapper.gpu_sampler().address_mode_x == TensorSamplerAddressModeX::OverlappingMin) + if (_mapper.gpu_sampler().address_mode_x == TensorSamplerAddressModeX::OverlappingMin) { auto tensor_format = _mapper.tensor_argument()->format(); auto shape = tensor_format.shape; _ls_width_part = decompose_leftover_ls_vector_width(shape[0] % _ls_width_full); - if(_ls_width_part.size() != 0) + if (_ls_width_part.size() != 0) { _writer->write_text("if(" + coord + " > 0)\n"); _writer->compound_statement_begin(); @@ -2725,16 +2756,16 @@ private: void out_of_bound_finalize_x() { - if(_mapper.gpu_sampler().address_mode_x == TensorSamplerAddressModeX::OverlappingMin) + if (_mapper.gpu_sampler().address_mode_x == TensorSamplerAddressModeX::OverlappingMin) { - if(_ls_width_part.size() != 0) + if (_ls_width_part.size() != 0) { _writer->compound_statement_end(); _writer->write_text("else\n"); _writer->compound_statement_begin(); out_of_bound_initialize_z(_coord_orig_z); - for(auto &i : _leftovers_x) + for (auto &i : _leftovers_x) { out_of_bound_initialize_y(i.first.second); _writer->write_text(i.second); @@ -2753,7 +2784,7 @@ private: const auto address_mode_y = _mapper.gpu_sampler().address_mode_y; - switch(address_mode_y) + switch (address_mode_y) { case TensorSamplerAddressModeY::Skip: case TensorSamplerAddressModeY::ClampToBorder: @@ -2799,7 +2830,7 @@ private: { const auto address_mode_y = _mapper.gpu_sampler().address_mode_y; - switch(address_mode_y) + switch (address_mode_y) { case TensorSamplerAddressModeY::ClampToBorder: case TensorSamplerAddressModeY::ClampToBorderMaxEdgeOnly: @@ -2816,7 +2847,7 @@ private: assert(false); } - switch(address_mode_y) + switch (address_mode_y) { case TensorSamplerAddressModeY::ClampToBorder: case TensorSamplerAddressModeY::ClampToBorderMinEdgeOnly: @@ -2841,7 +2872,7 @@ private: const auto address_mode_z = _mapper.gpu_sampler().address_mode_z; - switch(address_mode_z) + switch (address_mode_z) { case TensorSamplerAddressModeZ::Skip: max = _mapper.tensor_component_z(); @@ -2880,7 +2911,7 @@ private: { const auto address_mode_z = _mapper.gpu_sampler().address_mode_z; - switch(address_mode_z) + switch (address_mode_z) { case TensorSamplerAddressModeZ::Skip: case TensorSamplerAddressModeZ::SkipMinEdgeOnly: @@ -2899,7 +2930,7 @@ private: { std::vector x; - switch(ls_leftover_vector_width) + switch (ls_leftover_vector_width) { case 0: break; @@ -2961,13 +2992,13 @@ private: return x; } - std::string to_ls_buffer(GpuLoadStoreType type, int32_t vector_width, const std::string &data, - const std::string &address) + std::string + to_ls_buffer(GpuLoadStoreType type, int32_t vector_width, const std::string &data, const std::string &address) { - switch(type) + switch (type) { case GpuLoadStoreType::Load: - if(vector_width != 1) + if (vector_width != 1) { return data + " = vload" + std::to_string(vector_width) + "(0, " + address + ")"; } @@ -2977,7 +3008,7 @@ private: } break; case GpuLoadStoreType::Store: - if(vector_width != 1) + if (vector_width != 1) { return "vstore" + std::to_string(vector_width) + "(" + data + ", 0, " + address + ")"; } @@ -2993,25 +3024,25 @@ private: } } - std::string to_ls_buffer_address(const std::string &x, const std::string &y, const std::string &z, - const std::string &b) const + std::string + to_ls_buffer_address(const std::string &x, const std::string &y, const std::string &z, const std::string &b) const { - auto tensor_storage = static_cast(_mapper.gpu_sampler().storage); + auto tensor_storage = static_cast(_mapper.gpu_sampler().storage); assert(tensor_storage == GpuTensorStorage::BufferUint8Ptr); - const std::string ptr_buf = _mapper.tensor_argument()->storage(tensor_storage); - const std::string dst_type = get_cl_data_type(_dst->format().dt, 1); + const std::string ptr_buf = _mapper.tensor_argument()->storage(tensor_storage); + const std::string dst_type = get_cl_data_type(_dst->format().dt, 1); std::string address; address += "(__global "; address += dst_type; address += "*)("; address += ptr_buf; - if(x != "0" && (_mapper.is_one_component_x() != true)) + if (x != "0" && (_mapper.is_one_component_x() != true)) { address += " + ("; address += x + ") * sizeof(" + dst_type + ")"; } - if(y != "0") + if (y != "0") { const std::string stride_y = _mapper.tensor_component_stride_y(); address += " + ("; @@ -3019,7 +3050,7 @@ private: address += " * "; address += stride_y; } - if(z != "0" && (_mapper.is_one_component_z() != true)) + if (z != "0" && (_mapper.is_one_component_z() != true)) { const std::string stride_z = _mapper.tensor_component_stride_z(); address += " + ("; @@ -3027,7 +3058,7 @@ private: address += " * "; address += stride_z; } - if(b != "0" && (_mapper.is_one_component_batch() != true)) + if (b != "0" && (_mapper.is_one_component_batch() != true)) { const std::string stride_b = _mapper.tensor_component_stride_batch(); address += " + ("; @@ -3043,32 +3074,32 @@ private: class ClLoadStoreImage2dHelperWriter : public IGpuLoadStoreHelperWriter { public: - static bool - validate(IGpuKernelWriter *x, const GpuTensor3dMapper &mapper, GpuLoadStoreType type, IVectorTile *dst) + static bool validate(IGpuKernelWriter *x, const GpuTensor3dMapper &mapper, GpuLoadStoreType type, IVectorTile *dst) { CKW_UNUSED(x); - if(dst->format().w != 4) + if (dst->format().w != 4) { return false; } - if(mapper.gpu_sampler().address_mode_x != TensorSamplerAddressModeX::None) + if (mapper.gpu_sampler().address_mode_x != TensorSamplerAddressModeX::None) { return false; } - if(mapper.gpu_sampler().address_mode_z != TensorSamplerAddressModeZ::None) + if (mapper.gpu_sampler().address_mode_z != TensorSamplerAddressModeZ::None) { return false; } - if(mapper.gpu_sampler().storage != GpuSamplerTensorStorage::Image2dReadOnly && type == GpuLoadStoreType::Load) + if (mapper.gpu_sampler().storage != GpuSamplerTensorStorage::Image2dReadOnly && type == GpuLoadStoreType::Load) { return false; } - if(mapper.gpu_sampler().storage != GpuSamplerTensorStorage::Image2dWriteOnly && type == GpuLoadStoreType::Store) + if (mapper.gpu_sampler().storage != GpuSamplerTensorStorage::Image2dWriteOnly && + type == GpuLoadStoreType::Store) { return false; } - if((dst->format().dt != DataType::Fp32) && (dst->format().dt != DataType::Fp16)) + if ((dst->format().dt != DataType::Fp32) && (dst->format().dt != DataType::Fp16)) { return false; } @@ -3134,8 +3165,8 @@ public: } private: - IVectorTile *_dst{ nullptr }; - int32_t _ls_width_full{ 0 }; + IVectorTile *_dst{nullptr}; + int32_t _ls_width_full{0}; std::string _coord_x{}; std::string _coord_z{}; std::string _coord_b{}; @@ -3146,7 +3177,7 @@ private: const auto address_mode_y = _mapper.gpu_sampler().address_mode_y; - switch(address_mode_y) + switch (address_mode_y) { case TensorSamplerAddressModeY::Skip: max = _mapper.tensor_component_y(); @@ -3182,7 +3213,7 @@ private: const auto address_mode_y = _mapper.gpu_sampler().address_mode_y; - switch(address_mode_y) + switch (address_mode_y) { case TensorSamplerAddressModeY::Skip: case TensorSamplerAddressModeY::SkipMinEdgeOnly: @@ -3195,16 +3226,19 @@ private: } }; - std::string to_ls_image2d(GpuLoadStoreType type, int32_t vector_width, const std::string &data, - const std::string &sampler, const std::string &coord) + std::string to_ls_image2d(GpuLoadStoreType type, + int32_t vector_width, + const std::string &data, + const std::string &sampler, + const std::string &coord) { CKW_UNUSED(vector_width); auto tensor_storage = static_cast(_mapper.gpu_sampler().storage); const std::string image2d_obj = _mapper.tensor_argument()->storage(tensor_storage); - const std::string post_fix = _dst->format().dt == DataType::Fp32 ? "f" : "h"; + const std::string post_fix = _dst->format().dt == DataType::Fp32 ? "f" : "h"; - switch(type) + switch (type) { case GpuLoadStoreType::Load: return data + " = read_image" + post_fix + "(" + image2d_obj + ", " + sampler + ", " + coord + ")"; @@ -3223,7 +3257,7 @@ private: { const auto address_mode_y = _mapper.gpu_sampler().address_mode_y; - switch(address_mode_y) + switch (address_mode_y) { case TensorSamplerAddressModeY::None: return "CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_NONE | CLK_FILTER_NEAREST"; @@ -3245,17 +3279,17 @@ private: } } - std::string to_ls_image2d_coord(const std::string &x, const std::string &y, const std::string &z, - const std::string &b) const + std::string + to_ls_image2d_coord(const std::string &x, const std::string &y, const std::string &z, const std::string &b) const { std::string coord_x = "(" + x + ") >> 2"; std::string coord_y = "("; - if(y != "0") + if (y != "0") { coord_y += y; } - if(z != "0" && (_mapper.is_one_component_z() != true)) + if (z != "0" && (_mapper.is_one_component_z() != true)) { const std::string dim = _mapper.tensor_component_y(); coord_y += " + ("; @@ -3263,7 +3297,7 @@ private: coord_y += " * "; coord_y += dim; } - if(b != "0" && (_mapper.is_one_component_batch() != true)) + if (b != "0" && (_mapper.is_one_component_batch() != true)) { const std::string dim0 = _mapper.tensor_component_y(); const std::string dim1 = _mapper.tensor_component_z(); @@ -3292,7 +3326,7 @@ public: create(IGpuKernelWriter *x, const GpuTensor3dMapper &mapper, GpuLoadStoreType type) { const auto tensor_storage = mapper.gpu_sampler().storage; - switch(tensor_storage) + switch (tensor_storage) { case GpuSamplerTensorStorage::BufferUint8Ptr: return std::make_unique(x, mapper, type); @@ -3352,14 +3386,14 @@ public: IVectorTile *x = _data->tiles[name]; - for(auto &t : x->underlying_source_variables()) + for (auto &t : x->underlying_source_variables()) { _data->code += t.type.str + " " + t.str + ";\n"; } } - void declare_const_tile(const std::string &name, const std::vector> &in, - DataType dt) override + void + declare_const_tile(const std::string &name, const std::vector> &in, DataType dt) override { assert(_data->tiles[name] == nullptr); _data->tiles.insert(name, in, dt); @@ -3387,7 +3421,8 @@ public: { assert(dst_var.type() == OperandType::Tile); assert(_data->tiles.has_tile(dst_var.value())); - assert(_data->tiles[dst_var.value()]->format().w == 1 && _data->tiles[dst_var.value()]->format().h == 1); // It must be a scalar variable + assert(_data->tiles[dst_var.value()]->format().w == 1 && + _data->tiles[dst_var.value()]->format().h == 1); // It must be a scalar variable auto var = _data->tiles[dst_var.value()]; @@ -3397,8 +3432,10 @@ public: _data->code += ");\n"; }; - void op_get_global_coord(const Operand &o_dst, const Operand &o_step, const TensorOperand &o_tensor, - int32_t dim) override + void op_get_global_coord(const Operand &o_dst, + const Operand &o_step, + const TensorOperand &o_tensor, + int32_t dim) override { OperandUnpacker operands(_data->tiles, _data->arguments); auto dst = operands.unpack(o_dst); @@ -3412,17 +3449,17 @@ public: GpuTensor3dMapper mapper(tensor, gpu_sampler); - switch(dim) + switch (dim) { case 0: - if(mapper.is_one_component_x()) + if (mapper.is_one_component_x()) { _data->code += dst->scalar(0, 0).str; _data->code += " = 0;\n"; } else { - if(mapper.gpu_sampler().address_mode_x == TensorSamplerAddressModeX::OverlappingMin) + if (mapper.gpu_sampler().address_mode_x == TensorSamplerAddressModeX::OverlappingMin) { // Validation: Check: fixed tensor shape // TO BE CHANGED @@ -3441,14 +3478,14 @@ public: } break; case 1: - if(mapper.is_one_component_y()) + if (mapper.is_one_component_y()) { _data->code += dst->scalar(0, 0).str; _data->code += " = 0;\n"; } else { - if(mapper.gpu_sampler().address_mode_y == TensorSamplerAddressModeY::OverlappingMin) + if (mapper.gpu_sampler().address_mode_y == TensorSamplerAddressModeY::OverlappingMin) { } else @@ -3461,7 +3498,7 @@ public: } break; case 2: - if(mapper.is_one_component_z()) + if (mapper.is_one_component_z()) { _data->code += dst->scalar(0, 0).str; _data->code += " = 0;\n"; @@ -3490,7 +3527,7 @@ public: GpuTensor3dMapper mapper(tensor, gpu_sampler); - if(mapper.is_one_component_batch()) + if (mapper.is_one_component_batch()) { _data->code += dst->scalar(0, 0).str; _data->code += " = 0;\n"; @@ -3506,7 +3543,8 @@ public: { assert(dst_var.type() == OperandType::Tile); assert(_data->tiles.has_tile(dst_var.value())); - assert(_data->tiles[dst_var.value()]->format().w == 1 && _data->tiles[dst_var.value()]->format().h == 1); // It must be a scalar variable + assert(_data->tiles[dst_var.value()]->format().w == 1 && + _data->tiles[dst_var.value()]->format().h == 1); // It must be a scalar variable auto var = _data->tiles[dst_var.value()]; @@ -3532,7 +3570,7 @@ public: const std::string src_prefix = broadcast_src_x ? "(" + dt + ")" : ""; // Broadcasting on Y is automatic - for(int32_t y = 0; y < dst_h; ++y) + for (int32_t y = 0; y < dst_h; ++y) { _data->code += dst->vector(y).str; _data->code += " = "; @@ -3542,7 +3580,9 @@ public: } } - void op_binary_expression(const Operand &dst_name, const Operand &lhs_name, BinaryOp op, + void op_binary_expression(const Operand &dst_name, + const Operand &lhs_name, + BinaryOp op, const Operand &rhs_name) override { OperandUnpacker operands(_data->tiles, _data->arguments); @@ -3556,14 +3596,14 @@ public: const int32_t lhs_w = lhs->format().w; const int32_t rhs_w = rhs->format().w; - if(op == BinaryOp::MatMul_Nt_T) + if (op == BinaryOp::MatMul_Nt_T) { assert((dst->format().dt == DataType::Fp32) || (dst->format().dt == DataType::Fp16)); - for(int32_t y = 0; y < dst_h; ++y) + for (int32_t y = 0; y < dst_h; ++y) { - for(int32_t x = 0; x < dst_w; ++x) + for (int32_t x = 0; x < dst_w; ++x) { - for(int32_t k = 0; k < lhs_w; ++k) + for (int32_t k = 0; k < lhs_w; ++k) { _data->code += dst->scalar(x, y).str; _data->code += " = fma("; @@ -3583,12 +3623,14 @@ public: const bool broadcast_lhs_x = dst_w != 1 && lhs_w == 1; const bool broadcast_rhs_x = dst_w != 1 && rhs_w == 1; - const std::string lhs_prefix = broadcast_lhs_x ? "(" + dst->underlying_source_variables()[0].type.str + ")" : ""; - const std::string rhs_prefix = broadcast_rhs_x ? "(" + dst->underlying_source_variables()[0].type.str + ")" : ""; - const std::string op_str = to_string(op); + const std::string lhs_prefix = + broadcast_lhs_x ? "(" + dst->underlying_source_variables()[0].type.str + ")" : ""; + const std::string rhs_prefix = + broadcast_rhs_x ? "(" + dst->underlying_source_variables()[0].type.str + ")" : ""; + const std::string op_str = to_string(op); // Broadcasting on Y is automatic - for(int32_t y = 0; y < dst_h; ++y) + for (int32_t y = 0; y < dst_h; ++y) { _data->code += dst->vector(y).str; _data->code += " = "; @@ -3607,13 +3649,13 @@ public: const IVectorTile *src = operands.unpack(o_src); const IVectorTile *dst = operands.unpack(o_dst); // const int32_t dst_w = dst->format().w; - const int32_t dst_h = dst->format().h; - const std::string dt = dst->underlying_source_variables()[0].type.str; - const bool is_float = (dst->format().dt == DataType::Fp32) || (dst->format().dt == DataType::Fp16); - const std::string sat = ((policy == ConvertPolicy::Saturate && !is_float) ? "_sat" : ""); + const int32_t dst_h = dst->format().h; + const std::string dt = dst->underlying_source_variables()[0].type.str; + const bool is_float = (dst->format().dt == DataType::Fp32) || (dst->format().dt == DataType::Fp16); + const std::string sat = ((policy == ConvertPolicy::Saturate && !is_float) ? "_sat" : ""); // Broadcasting on Y is automatic - for(int32_t y = 0; y < dst_h; ++y) + for (int32_t y = 0; y < dst_h; ++y) { _data->code += dst->vector(y).str; _data->code += " = convert_" + dt + sat + "("; @@ -3638,7 +3680,7 @@ public: const std::string src_prefix = broadcast_src_x ? "(" + dt + ")" : ""; // Broadcasting on Y is automatic - for(int32_t y = 0; y < dst_h; ++y) + for (int32_t y = 0; y < dst_h; ++y) { _data->code += dst->vector(y).str; _data->code += " = "; @@ -3647,8 +3689,7 @@ public: } } - void - op_unary_elementwise_function(const Operand &dst_name, UnaryFunction func, const Operand &src_name) override + void op_unary_elementwise_function(const Operand &dst_name, UnaryFunction func, const Operand &src_name) override { OperandUnpacker operands(_data->tiles, _data->arguments); const IVectorTile *src = operands.unpack(src_name); @@ -3665,12 +3706,12 @@ public: const std::string src_prefix = "(" + dt + ")"; // Broadcasting on Y is automatic - for(int32_t y = 0; y < dst_h; ++y) + for (int32_t y = 0; y < dst_h; ++y) { _data->code += dst->vector(y).str; _data->code += " = "; - switch(func) + switch (func) { case UnaryFunction::Exp: _data->code += "exp("; @@ -3708,7 +3749,10 @@ public: } } - void op_binary_elementwise_function(const Operand &dst_name, BinaryFunction func, const Operand &first_name, const Operand &second_name) override + void op_binary_elementwise_function(const Operand &dst_name, + BinaryFunction func, + const Operand &first_name, + const Operand &second_name) override { OperandUnpacker operands(_data->tiles, _data->arguments); const IVectorTile *first = operands.unpack(first_name); @@ -3726,12 +3770,12 @@ public: const bool is_float = (datatype.dt == DataType::Fp32 || datatype.dt == DataType::Fp16); // Broadcasting on Y is automatic - for(int32_t y = 0; y < dst_h; ++y) + for (int32_t y = 0; y < dst_h; ++y) { _data->code += dst->vector(y).str; _data->code += " = "; - switch(func) + switch (func) { case BinaryFunction::Min: _data->code += is_float ? "fmin(" : "min("; @@ -3750,7 +3794,11 @@ public: } } - void op_ternary_elementwise_function(const Operand &dst_name, TernaryFunction func, const Operand &first_name, const Operand &second_name, const Operand &third_name) override + void op_ternary_elementwise_function(const Operand &dst_name, + TernaryFunction func, + const Operand &first_name, + const Operand &second_name, + const Operand &third_name) override { OperandUnpacker operands(_data->tiles, _data->arguments); const IVectorTile *first = operands.unpack(first_name); @@ -3758,8 +3806,8 @@ public: const IVectorTile *third = operands.unpack(third_name); const IVectorTile *dst = operands.unpack(dst_name); - const int32_t dst_h = dst->format().h; - const std::string dt = dst->underlying_source_variables()[0].type.str; + const int32_t dst_h = dst->format().h; + const std::string dt = dst->underlying_source_variables()[0].type.str; // Always perform an explicit cast. See similar comments in op_unary_elementwise_function const std::string first_prefix = "(" + dt + ")"; @@ -3767,12 +3815,12 @@ public: const std::string third_prefix = "(" + dt + ")"; // Broadcasting on Y is automatic - for(int32_t y = 0; y < dst_h; ++y) + for (int32_t y = 0; y < dst_h; ++y) { _data->code += dst->vector(y).str; _data->code += " = "; - switch(func) + switch (func) { case TernaryFunction::Select: _data->code += "select("; @@ -3822,7 +3870,12 @@ public: _data->code += "else\n"; } - void op_for_loop_header(const Operand& var_name, BinaryOp cond_op, const Operand& cond_value_name, const Operand &update_var_name, AssignmentOp update_op, const Operand& update_value_name) override + void op_for_loop_header(const Operand &var_name, + BinaryOp cond_op, + const Operand &cond_value_name, + const Operand &update_var_name, + AssignmentOp update_op, + const Operand &update_value_name) override { OperandUnpacker operands(_data->tiles, _data->arguments); const IVectorTile *var = operands.unpack(var_name); @@ -3850,9 +3903,13 @@ public: _data->code += "\n"; } - void op_load_immediate(const TensorOperand &o_tensor, const Operand &o_dst, const Operand &o_x, - const Operand &o_y, const Operand &o_z, const Operand &o_batch_idx, - const Operand &dilation_y) override + void op_load_immediate(const TensorOperand &o_tensor, + const Operand &o_dst, + const Operand &o_x, + const Operand &o_y, + const Operand &o_z, + const Operand &o_batch_idx, + const Operand &dilation_y) override { OperandUnpacker operands(_data->tiles, _data->arguments); @@ -3875,10 +3932,10 @@ public: // Initialize the constant part load_writer->initialize(dst, x, z, b); - for(int i = 0; i < dst->format().h; ++i) + for (int i = 0; i < dst->format().h; ++i) { std::string coord_y = y->scalar(0, 0).str + " + " + std::to_string(i); - if(dil_y->scalar(0, 0).str != "1") + if (dil_y->scalar(0, 0).str != "1") { coord_y += " * " + dil_y->scalar(0, 0).str; } @@ -3888,9 +3945,12 @@ public: load_writer->finalize(); } - void op_load_indirect(const TensorOperand &o_tensor, const Operand &o_dst, const Operand &o_x, - const Operand &o_indirect_h, const Operand &o_z, - const Operand &o_batch_idx) override + void op_load_indirect(const TensorOperand &o_tensor, + const Operand &o_dst, + const Operand &o_x, + const Operand &o_indirect_h, + const Operand &o_z, + const Operand &o_batch_idx) override { OperandUnpacker operands(_data->tiles, _data->arguments); @@ -3912,7 +3972,7 @@ public: // Initialize the constant part load_writer->initialize(dst, x, z, b); - for(int i = 0; i < dst->format().h; ++i) + for (int i = 0; i < dst->format().h; ++i) { load_writer->write(std::make_pair(i, y_ind->scalar(0, i).str)); } @@ -3920,9 +3980,12 @@ public: load_writer->finalize(); } - void op_store_immediate(const TensorOperand &tensor_name, const Operand &src_name, const Operand &x_name, - const Operand &y_name, const Operand &z_name, - const Operand &batch_index_name) override + void op_store_immediate(const TensorOperand &tensor_name, + const Operand &src_name, + const Operand &x_name, + const Operand &y_name, + const Operand &z_name, + const Operand &batch_index_name) override { OperandUnpacker operands(_data->tiles, _data->arguments); @@ -3946,7 +4009,7 @@ public: int32_t tile_h = src->format().h; - for(int m0 = tile_h - 1; m0 >= 0; m0--) + for (int m0 = tile_h - 1; m0 >= 0; m0--) { store_writer->write(std::make_pair(m0, y->scalar(0, 0).str + " + " + std::to_string(m0))); } @@ -3959,8 +4022,12 @@ public: _data->code += "return;\n"; } - void util_get_indirect_buffer(const Operand &o_dst, const TensorOperand &o_tensor, const Operand &o_x, - const Operand &o_y, const Operand &o_x_off, const Operand &o_y_off) override + void util_get_indirect_buffer(const Operand &o_dst, + const TensorOperand &o_tensor, + const Operand &o_x, + const Operand &o_y, + const Operand &o_x_off, + const Operand &o_y_off) override { OperandUnpacker operands(_data->tiles, _data->arguments); const IVectorTile *dst = operands.unpack(o_dst); @@ -4002,7 +4069,7 @@ public: declare_tile("_y_s", TileInfo(DataType::Int32)); auto x_s = operands.unpack(Operand("_x_s")); auto y_s = operands.unpack(Operand("_y_s")); - for(int i = 0; i < dst->format().h; ++i) + for (int i = 0; i < dst->format().h; ++i) { // x_s = (xi_0 + x_k); // y_s = (yi_0 + y_k); @@ -4060,8 +4127,8 @@ public: } private: - GpuKernelWriterDataHolder *_data{ nullptr }; - GpuKernelWriterAttribute *_attr{ nullptr }; + GpuKernelWriterDataHolder *_data{nullptr}; + GpuKernelWriterAttribute *_attr{nullptr}; }; /** IGpuKernelWriter factory class */ @@ -4074,10 +4141,9 @@ public: * * @return IGpuKernelWriter */ - static std::unique_ptr - create(GpuKernelWriterAttribute *attr, GpuKernelWriterDataHolder *x) + static std::unique_ptr create(GpuKernelWriterAttribute *attr, GpuKernelWriterDataHolder *x) { - switch(x->programming_language()) + switch (x->programming_language()) { case GpuTargetLanguage::OpenCL: return std::make_unique(attr, x); @@ -4094,9 +4160,9 @@ adjust_step(TensorSamplerFormat tensor_format, int32_t step, const TensorInfo *t { auto tensor = tensor_info_id->shape; - int32_t dim[3] = { 0 }; + int32_t dim[3] = {0}; - switch(tensor_format) + switch (tensor_format) { case TensorSamplerFormat::C_W_H: dim[0] = tensor[0]; diff --git a/compute_kernel_writer/prototype/src/TensorOperand.cpp b/compute_kernel_writer/prototype/src/TensorOperand.cpp index c6725d3b26..d1aefbbb71 100644 --- a/compute_kernel_writer/prototype/src/TensorOperand.cpp +++ b/compute_kernel_writer/prototype/src/TensorOperand.cpp @@ -23,10 +23,12 @@ */ #include "ckw/TensorOperand.h" + #include "ckw/Error.h" #include "ckw/Kernel.h" #include "ckw/TensorInfo.h" #include "ckw/TileOperand.h" + #include "src/Prototype.h" namespace ckw @@ -35,9 +37,11 @@ namespace ckw namespace { -TensorComponentOperand &get_or_create_component(TensorOperand &tensor, std::unique_ptr &ptr, TensorComponentType component) +TensorComponentOperand &get_or_create_component(TensorOperand &tensor, + std::unique_ptr &ptr, + TensorComponentType component) { - if(ptr == nullptr) + if (ptr == nullptr) { ptr = std::make_unique(tensor, component); } @@ -59,7 +63,7 @@ TensorOperand::TensorOperand(const std::string &name, const TensorInfo &info, Te prototype::Operand TensorOperand::create_impl_operand(prototype::IGpuKernelWriter *writer) const { CKW_UNUSED(writer); - return { name() }; + return {name()}; } const TensorInfo &TensorOperand::info() const @@ -206,9 +210,9 @@ TensorComponentType TensorComponentOperand::component_type() const prototype::Operand TensorComponentOperand::create_impl_operand(prototype::IGpuKernelWriter *writer) const { CKW_UNUSED(writer); - prototype::OperandType type{ prototype::OperandType::Unknown }; + prototype::OperandType type{prototype::OperandType::Unknown}; - switch(_component) + switch (_component) { case TensorComponentType::OffsetFirstElement: type = prototype::OperandType::TensorDataOffset; diff --git a/compute_kernel_writer/prototype/src/TensorTileSampler.cpp b/compute_kernel_writer/prototype/src/TensorTileSampler.cpp index 28e54df3a5..bf9f946ce8 100644 --- a/compute_kernel_writer/prototype/src/TensorTileSampler.cpp +++ b/compute_kernel_writer/prototype/src/TensorTileSampler.cpp @@ -23,6 +23,7 @@ */ #include "ckw/TensorTileSampler.h" + #include "ckw/TileOperand.h" #include "ckw/types/TensorSamplerTypes.h" @@ -33,24 +34,47 @@ TensorTileSampler::TensorTileSampler() { } -TensorTileSampler::TensorTileSampler( - TileOperand &x, TileOperand &y, TileOperand &z, TileOperand &b, - TensorSamplerFormat format, - TensorSamplerAddressModeX address_mode_x, - TensorSamplerAddressModeY address_mode_y, - TensorSamplerAddressModeZ address_mode_z) - : _x(&x), _y(&y), _z(&z), _b(&b), _height(0), _width(0), _format(format), _address_mode_x(address_mode_x), _address_mode_y(address_mode_y), _address_mode_z(address_mode_z) -{ -} - -TensorTileSampler::TensorTileSampler( - TileOperand &x, TileOperand &y, TileOperand &z, TileOperand &b, - int32_t height, int32_t width, - TensorSamplerFormat format, - TensorSamplerAddressModeX address_mode_x, - TensorSamplerAddressModeY address_mode_y, - TensorSamplerAddressModeZ address_mode_z) - : _x(&x), _y(&y), _z(&z), _b(&b), _height(height), _width(width), _format(format), _address_mode_x(address_mode_x), _address_mode_y(address_mode_y), _address_mode_z(address_mode_z) +TensorTileSampler::TensorTileSampler(TileOperand &x, + TileOperand &y, + TileOperand &z, + TileOperand &b, + TensorSamplerFormat format, + TensorSamplerAddressModeX address_mode_x, + TensorSamplerAddressModeY address_mode_y, + TensorSamplerAddressModeZ address_mode_z) + : _x(&x), + _y(&y), + _z(&z), + _b(&b), + _height(0), + _width(0), + _format(format), + _address_mode_x(address_mode_x), + _address_mode_y(address_mode_y), + _address_mode_z(address_mode_z) +{ +} + +TensorTileSampler::TensorTileSampler(TileOperand &x, + TileOperand &y, + TileOperand &z, + TileOperand &b, + int32_t height, + int32_t width, + TensorSamplerFormat format, + TensorSamplerAddressModeX address_mode_x, + TensorSamplerAddressModeY address_mode_y, + TensorSamplerAddressModeZ address_mode_z) + : _x(&x), + _y(&y), + _z(&z), + _b(&b), + _height(height), + _width(width), + _format(format), + _address_mode_x(address_mode_x), + _address_mode_y(address_mode_y), + _address_mode_z(address_mode_z) { } diff --git a/compute_kernel_writer/prototype/src/TileInfo.cpp b/compute_kernel_writer/prototype/src/TileInfo.cpp index 66d8cb1620..273266eedc 100644 --- a/compute_kernel_writer/prototype/src/TileInfo.cpp +++ b/compute_kernel_writer/prototype/src/TileInfo.cpp @@ -26,18 +26,15 @@ namespace ckw { -TileInfo::TileInfo(DataType dt) - : _dt(dt), _shape({ { 1, 1 } }) +TileInfo::TileInfo(DataType dt) : _dt(dt), _shape({{1, 1}}) { } -TileInfo::TileInfo(DataType dt, int32_t w) - : _dt(dt), _shape({ { w, 1 } }) +TileInfo::TileInfo(DataType dt, int32_t w) : _dt(dt), _shape({{w, 1}}) { } -TileInfo::TileInfo(DataType dt, int32_t h, int32_t w) - : _dt(dt), _shape({ { w, h } }) +TileInfo::TileInfo(DataType dt, int32_t h, int32_t w) : _dt(dt), _shape({{w, h}}) { } diff --git a/compute_kernel_writer/prototype/src/TileOperand.cpp b/compute_kernel_writer/prototype/src/TileOperand.cpp index 0eb2ca6a64..e09c833d96 100644 --- a/compute_kernel_writer/prototype/src/TileOperand.cpp +++ b/compute_kernel_writer/prototype/src/TileOperand.cpp @@ -23,47 +23,43 @@ */ #include "ckw/TileOperand.h" + #include "ckw/Error.h" + #include "src/Prototype.h" namespace ckw { TileOperand::TileOperand(const std::string &name, const TileInfo &info) - : OperandBase(name), - _info(info), - _value{ std::vector{ "0" } }, - _constant(false) + : OperandBase(name), _info(info), _value{std::vector{"0"}}, _constant(false) { } TileOperand::TileOperand(const std::string &name, DataType data_type) - : OperandBase(name), - _info(TileInfo{ data_type }), - _value{ std::vector{ "0" } }, - _constant(false) + : OperandBase(name), _info(TileInfo{data_type}), _value{std::vector{"0"}}, _constant(false) { } TileOperand::TileOperand(const std::string &name, int32_t value) : OperandBase(name), - _info(TileInfo{ DataType::Int32 }), - _value{ std::vector{ std::to_string(value) } }, + _info(TileInfo{DataType::Int32}), + _value{std::vector{std::to_string(value)}}, _constant(true) { } TileOperand::TileOperand(const std::string &name, float value) : OperandBase(name), - _info(TileInfo{ DataType::Fp32 }), - _value{ std::vector{ std::to_string(value) } }, + _info(TileInfo{DataType::Fp32}), + _value{std::vector{std::to_string(value)}}, _constant(true) { } TileOperand::TileOperand(const std::string &name, const TileContainer &vals, DataType dt) : OperandBase(name), - _info(TileInfo{ dt, static_cast(vals.size()), static_cast(vals[0].size()) }), + _info(TileInfo{dt, static_cast(vals.size()), static_cast(vals[0].size())}), _value(vals), _constant(true) { @@ -73,11 +69,11 @@ prototype::Operand TileOperand::create_impl_operand(prototype::IGpuKernelWriter { CKW_UNUSED(writer); - if(_constant) + if (_constant) { - if(is_scalar()) + if (is_scalar()) { - switch(_info.data_type()) + switch (_info.data_type()) { case DataType::Int32: return prototype::Operand(_value[0][0], prototype::OperandType::ScalarInt32); diff --git a/compute_kernel_writer/src/Error.cpp b/compute_kernel_writer/src/Error.cpp index c5dae2eb75..e1e4bffcec 100644 --- a/compute_kernel_writer/src/Error.cpp +++ b/compute_kernel_writer/src/Error.cpp @@ -28,8 +28,8 @@ namespace ckw { -std::string create_error_msg(const std::string &file, const std::string &func, const std::string &line, - const std::string &msg) +std::string +create_error_msg(const std::string &file, const std::string &func, const std::string &line, const std::string &msg) { std::string err; err += "[COMPUTE_KERNEL_WRITER][ERROR]:"; @@ -38,4 +38,4 @@ std::string create_error_msg(const std::string &file, const std::string &func, c err += " " + msg; return err; } -} // namespace ckw \ No newline at end of file +} // namespace ckw diff --git a/compute_kernel_writer/src/Helpers.cpp b/compute_kernel_writer/src/Helpers.cpp index 799f79a187..82d4c4e917 100644 --- a/compute_kernel_writer/src/Helpers.cpp +++ b/compute_kernel_writer/src/Helpers.cpp @@ -22,15 +22,15 @@ * SOFTWARE. */ -#include "ckw/Error.h" - #include "src/Helpers.h" +#include "ckw/Error.h" + namespace ckw { std::string dec_to_hex_as_string(int32_t dec) { - switch(dec) + switch (dec) { case 0: case 1: diff --git a/compute_kernel_writer/src/ITensorArgument.h b/compute_kernel_writer/src/ITensorArgument.h index 838bd40f85..ece45a4dc4 100644 --- a/compute_kernel_writer/src/ITensorArgument.h +++ b/compute_kernel_writer/src/ITensorArgument.h @@ -28,6 +28,7 @@ #include "ckw/TensorInfo.h" #include "ckw/types/TensorComponentType.h" #include "ckw/types/TensorStorageType.h" + #include "src/ITile.h" #include @@ -41,8 +42,8 @@ class ITensorComponent; /** Tensor storage variable */ struct TensorStorageVariable { - std::string val{ "" }; /** Tensor storage as a string */ - TensorStorageType type{ TensorStorageType::Unknown }; /** Tensor storage type */ + std::string val{""}; /** Tensor storage as a string */ + TensorStorageType type{TensorStorageType::Unknown}; /** Tensor storage type */ }; /** Tensor argument base class. @@ -83,8 +84,8 @@ public: } protected: - TensorInfo _info{}; // Tensor info - std::string _basename{ "" }; // Tensor name + TensorInfo _info{}; // Tensor info + std::string _basename{""}; // Tensor name }; /** Tensor component argument base class */ diff --git a/compute_kernel_writer/src/ITensorComponent.h b/compute_kernel_writer/src/ITensorComponent.h index e2775b62b0..f9c9d8fd81 100644 --- a/compute_kernel_writer/src/ITensorComponent.h +++ b/compute_kernel_writer/src/ITensorComponent.h @@ -26,6 +26,7 @@ #define CKW_SRC_ITENSORCOMPONENT_H #include "ckw/types/TensorComponentType.h" + #include "src/ITile.h" namespace ckw diff --git a/compute_kernel_writer/src/ITile.h b/compute_kernel_writer/src/ITile.h index 73b7315fb5..8eaac5ac12 100644 --- a/compute_kernel_writer/src/ITile.h +++ b/compute_kernel_writer/src/ITile.h @@ -37,15 +37,15 @@ using TileContainer = std::vector>; /** Tile descriptor which reports the underlying datatype and vector length */ struct TileVariableDescriptor { - DataType dt{ DataType::Unknown }; /** Data type */ - int32_t len{ 1 }; /** Number of elements in a single variable. For example, 1 for scalar */ + DataType dt{DataType::Unknown}; /** Data type */ + int32_t len{1}; /** Number of elements in a single variable. For example, 1 for scalar */ }; /** Tile variable */ struct TileVariable { - std::string str{ "" }; /** Tile variable as a string */ - TileVariableDescriptor desc{}; /** Tile value descriptor which reports the datatype and vector length */ + std::string str{""}; /** Tile variable as a string */ + TileVariableDescriptor desc{}; /** Tile value descriptor which reports the datatype and vector length */ }; /** Interface to provide support for scalar access for a Tile. diff --git a/compute_kernel_writer/src/Kernel.cpp b/compute_kernel_writer/src/Kernel.cpp index bfb0f46300..12389b3816 100644 --- a/compute_kernel_writer/src/Kernel.cpp +++ b/compute_kernel_writer/src/Kernel.cpp @@ -23,6 +23,7 @@ */ #include "ckw/Kernel.h" + #include "ckw/types/TargetLanguage.h" namespace ckw diff --git a/compute_kernel_writer/src/KernelArgument.cpp b/compute_kernel_writer/src/KernelArgument.cpp index a31ca1757b..a640d36507 100644 --- a/compute_kernel_writer/src/KernelArgument.cpp +++ b/compute_kernel_writer/src/KernelArgument.cpp @@ -23,6 +23,7 @@ */ #include "ckw/KernelArgument.h" + #include "ckw/Error.h" namespace ckw diff --git a/compute_kernel_writer/src/KernelWriter.cpp b/compute_kernel_writer/src/KernelWriter.cpp index 0bea1200b7..a478231c09 100644 --- a/compute_kernel_writer/src/KernelWriter.cpp +++ b/compute_kernel_writer/src/KernelWriter.cpp @@ -23,14 +23,16 @@ */ #include "ckw/KernelWriter.h" + #include "ckw/Error.h" #include "ckw/TileOperand.h" #include "ckw/types/TargetArchitecture.h" #include "ckw/types/TargetLanguage.h" -#include "src/TileView.h" + #include "src/cl/CLKernelWriter.h" #include "src/cl/CLTensorArgument.h" #include "src/cl/CLTile.h" +#include "src/TileView.h" #include @@ -42,7 +44,7 @@ KernelWriter::~KernelWriter() = default; std::unique_ptr KernelWriter::create_instance(TargetArchitecture architecture, TargetLanguage language) { CKW_UNUSED(architecture); - switch(language) + switch (language) { case TargetLanguage::OpenCL: // Currently this is the oldest and the only supported GPU architecture. @@ -95,7 +97,7 @@ TileOperand KernelWriter::create_tile_operand(ITile &tile) std::tuple KernelWriter::get_tile(const TileOperand &operand) { - return { *operand._tile, { operand._row_start, operand._row_end, operand._col_start, operand._col_end } }; + return {*operand._tile, {operand._row_start, operand._row_end, operand._col_start, operand._col_end}}; } TensorOperand KernelWriter::create_tensor_operand(ITensor &tensor) diff --git a/compute_kernel_writer/src/Tensor3dMapper.cpp b/compute_kernel_writer/src/Tensor3dMapper.cpp index 7384b924da..acef6412a4 100644 --- a/compute_kernel_writer/src/Tensor3dMapper.cpp +++ b/compute_kernel_writer/src/Tensor3dMapper.cpp @@ -26,19 +26,19 @@ #include "ckw/Error.h" #include "ckw/types/TensorSamplerTypes.h" + #include "src/ITensor.h" #include "src/ITile.h" namespace ckw { -Tensor3dMapper::Tensor3dMapper(ITensor *tensor, TensorSamplerFormat format) - : _tensor(tensor), _format(format) +Tensor3dMapper::Tensor3dMapper(ITensor *tensor, TensorSamplerFormat format) : _tensor(tensor), _format(format) { } TileVariable Tensor3dMapper::dim_x() const { - switch(_format) + switch (_format) { case TensorSamplerFormat::Dim0_Dim1xDim2_1: case TensorSamplerFormat::Dim0_Dim1_Dim2: @@ -51,7 +51,7 @@ TileVariable Tensor3dMapper::dim_x() const TileVariable Tensor3dMapper::dim_y() const { - switch(_format) + switch (_format) { case TensorSamplerFormat::Dim0_Dim1xDim2_1: return _tensor->component(TensorComponentType::Dim1xDim2).scalar(0, 0); @@ -67,10 +67,10 @@ TileVariable Tensor3dMapper::dim_z() const { TileVariable dim_one; - switch(_format) + switch (_format) { case TensorSamplerFormat::Dim0_Dim1xDim2_1: - dim_one = _tensor->component(TensorComponentType::Dim3).scalar(0, 0); + dim_one = _tensor->component(TensorComponentType::Dim3).scalar(0, 0); dim_one.str = "1"; return dim_one; case TensorSamplerFormat::Dim0_Dim1_Dim2: @@ -85,7 +85,7 @@ TileVariable Tensor3dMapper::dim_batch() const { TileVariable dim_one; - switch(_format) + switch (_format) { case TensorSamplerFormat::Dim0_Dim1xDim2_1: case TensorSamplerFormat::Dim0_Dim1_Dim2: @@ -98,7 +98,7 @@ TileVariable Tensor3dMapper::dim_batch() const TileVariable Tensor3dMapper::stride_x() const { - switch(_format) + switch (_format) { case TensorSamplerFormat::Dim0_Dim1xDim2_1: case TensorSamplerFormat::Dim0_Dim1_Dim2: @@ -111,7 +111,7 @@ TileVariable Tensor3dMapper::stride_x() const TileVariable Tensor3dMapper::stride_y() const { - switch(_format) + switch (_format) { case TensorSamplerFormat::Dim0_Dim1xDim2_1: case TensorSamplerFormat::Dim0_Dim1_Dim2: @@ -126,10 +126,10 @@ TileVariable Tensor3dMapper::stride_z() const { TileVariable stride_zero; - switch(_format) + switch (_format) { case TensorSamplerFormat::Dim0_Dim1xDim2_1: - stride_zero = _tensor->component(TensorComponentType::Stride3).scalar(0, 0); + stride_zero = _tensor->component(TensorComponentType::Stride3).scalar(0, 0); stride_zero.str = "0"; return stride_zero; case TensorSamplerFormat::Dim0_Dim1_Dim2: @@ -142,7 +142,7 @@ TileVariable Tensor3dMapper::stride_z() const TileVariable Tensor3dMapper::stride_batch() const { - switch(_format) + switch (_format) { case TensorSamplerFormat::Dim0_Dim1xDim2_1: case TensorSamplerFormat::Dim0_Dim1_Dim2: @@ -152,4 +152,4 @@ TileVariable Tensor3dMapper::stride_batch() const return _tensor->component(TensorComponentType::Unknown).scalar(0, 0); } } -} // namespace ckw \ No newline at end of file +} // namespace ckw diff --git a/compute_kernel_writer/src/Tensor3dMapper.h b/compute_kernel_writer/src/Tensor3dMapper.h index fa68ac2d15..e94b595193 100644 --- a/compute_kernel_writer/src/Tensor3dMapper.h +++ b/compute_kernel_writer/src/Tensor3dMapper.h @@ -74,8 +74,8 @@ public: TileVariable stride_batch() const; private: - ITensor *_tensor; - TensorSamplerFormat _format; + ITensor *_tensor; + TensorSamplerFormat _format; }; } // namespace ckw diff --git a/compute_kernel_writer/src/TensorOperand.cpp b/compute_kernel_writer/src/TensorOperand.cpp index 5ad24c6276..bf11d0d332 100644 --- a/compute_kernel_writer/src/TensorOperand.cpp +++ b/compute_kernel_writer/src/TensorOperand.cpp @@ -23,13 +23,13 @@ */ #include "ckw/TensorOperand.h" + #include "src/ITensor.h" namespace ckw { -TensorOperand::TensorOperand(ITensor &tensor) - : _tensor(tensor) +TensorOperand::TensorOperand(ITensor &tensor) : _tensor(tensor) { } @@ -108,4 +108,4 @@ TileOperand TensorOperand::offset_first_element_in_bytes() return TileOperand(_tensor.component(TensorComponentType::OffsetFirstElement)); } -} // namespace ckw \ No newline at end of file +} // namespace ckw diff --git a/compute_kernel_writer/src/TensorSampler.cpp b/compute_kernel_writer/src/TensorSampler.cpp index 2ee8df4bca..91d5af2fd0 100644 --- a/compute_kernel_writer/src/TensorSampler.cpp +++ b/compute_kernel_writer/src/TensorSampler.cpp @@ -32,7 +32,11 @@ TensorSampler::TensorSampler(TensorStorageType storage, TensorSamplerAddressModeX address_mode_x, TensorSamplerAddressModeY address_mode_y, TensorSamplerAddressModeZ address_mode_z) - : _storage(storage), _format(format), _address_mode_x(address_mode_x), _address_mode_y(address_mode_y), _address_mode_z(address_mode_z) + : _storage(storage), + _format(format), + _address_mode_x(address_mode_x), + _address_mode_y(address_mode_y), + _address_mode_z(address_mode_z) { } diff --git a/compute_kernel_writer/src/TensorUtils.cpp b/compute_kernel_writer/src/TensorUtils.cpp index 24836092d4..17fc9547ae 100644 --- a/compute_kernel_writer/src/TensorUtils.cpp +++ b/compute_kernel_writer/src/TensorUtils.cpp @@ -23,6 +23,7 @@ */ #include "src/TensorUtils.h" + #include "ckw/Error.h" #include "ckw/TensorInfo.h" #include "ckw/types/TensorComponentType.h" @@ -31,10 +32,10 @@ namespace ckw { TensorComponentType get_tensor_dimension(TensorDataLayout layout, TensorDataLayoutComponent component) { - switch(layout) + switch (layout) { case TensorDataLayout::Nhwc: - switch(component) + switch (component) { case TensorDataLayoutComponent::C: return TensorComponentType::Dim0; @@ -49,7 +50,7 @@ TensorComponentType get_tensor_dimension(TensorDataLayout layout, TensorDataLayo return TensorComponentType::Unknown; } case TensorDataLayout::Ndhwc: - switch(component) + switch (component) { case TensorDataLayoutComponent::C: return TensorComponentType::Dim0; @@ -73,10 +74,10 @@ TensorComponentType get_tensor_dimension(TensorDataLayout layout, TensorDataLayo TensorComponentType get_tensor_stride(TensorDataLayout layout, TensorDataLayoutComponent component) { - switch(layout) + switch (layout) { case TensorDataLayout::Nhwc: - switch(component) + switch (component) { case TensorDataLayoutComponent::C: return TensorComponentType::Stride0; @@ -91,7 +92,7 @@ TensorComponentType get_tensor_stride(TensorDataLayout layout, TensorDataLayoutC return TensorComponentType::Unknown; } case TensorDataLayout::Ndhwc: - switch(component) + switch (component) { case TensorDataLayoutComponent::C: return TensorComponentType::Stride0; diff --git a/compute_kernel_writer/src/TileInfo.cpp b/compute_kernel_writer/src/TileInfo.cpp index 66d8cb1620..273266eedc 100644 --- a/compute_kernel_writer/src/TileInfo.cpp +++ b/compute_kernel_writer/src/TileInfo.cpp @@ -26,18 +26,15 @@ namespace ckw { -TileInfo::TileInfo(DataType dt) - : _dt(dt), _shape({ { 1, 1 } }) +TileInfo::TileInfo(DataType dt) : _dt(dt), _shape({{1, 1}}) { } -TileInfo::TileInfo(DataType dt, int32_t w) - : _dt(dt), _shape({ { w, 1 } }) +TileInfo::TileInfo(DataType dt, int32_t w) : _dt(dt), _shape({{w, 1}}) { } -TileInfo::TileInfo(DataType dt, int32_t h, int32_t w) - : _dt(dt), _shape({ { w, h } }) +TileInfo::TileInfo(DataType dt, int32_t h, int32_t w) : _dt(dt), _shape({{w, h}}) { } diff --git a/compute_kernel_writer/src/TileOperand.cpp b/compute_kernel_writer/src/TileOperand.cpp index 3dfa2b8b2b..865ef85a13 100644 --- a/compute_kernel_writer/src/TileOperand.cpp +++ b/compute_kernel_writer/src/TileOperand.cpp @@ -23,7 +23,9 @@ */ #include "ckw/TileOperand.h" + #include "ckw/Error.h" + #include "src/ITile.h" namespace ckw @@ -34,7 +36,8 @@ TileOperand::TileOperand(ITile &tile) { } -TileOperand::TileOperand(const TileOperand &operand, int32_t row_start, int32_t row_end, int32_t col_start, int32_t col_end) +TileOperand::TileOperand( + const TileOperand &operand, int32_t row_start, int32_t row_end, int32_t col_start, int32_t col_end) : _tile(operand._tile), _row_start(row_start), _row_end(row_end), _col_start(col_start), _col_end(col_end) { CKW_ASSERT(row_start >= 0 && row_start < _tile->info().height()); @@ -50,7 +53,8 @@ TileOperand TileOperand::tile(int32_t row_start, int32_t row_end, int32_t col_st CKW_ASSERT(col_start >= 0 && _col_start + col_start < _col_end); CKW_ASSERT(col_end > col_start && _col_start + col_end <= _col_end); - return TileOperand(*this, _row_start + row_start, _row_start + row_end, _col_start + col_start, _col_start + col_end); + return TileOperand(*this, _row_start + row_start, _row_start + row_end, _col_start + col_start, + _col_start + col_end); } TileOperand TileOperand::row(int32_t row) const diff --git a/compute_kernel_writer/src/TileView.h b/compute_kernel_writer/src/TileView.h index e0d034fa8d..50ae66b389 100644 --- a/compute_kernel_writer/src/TileView.h +++ b/compute_kernel_writer/src/TileView.h @@ -27,6 +27,7 @@ #include "ckw/Error.h" #include "ckw/types/DataType.h" + #include "src/ITile.h" #include @@ -81,8 +82,7 @@ public: * * @param[in] tile The tile object. */ - TileView(const T &tile) - : _tile(&tile), _area(0, tile.info().height(), 0, tile.info().width()) + TileView(const T &tile) : _tile(&tile), _area(0, tile.info().height(), 0, tile.info().width()) { } @@ -91,8 +91,7 @@ public: * @param[in] tile The tile object. * @param[in] area The rectangular active area. */ - TileView(const T &tile, const TileArea &area) - : _tile(&tile), _area(area) + TileView(const T &tile, const TileArea &area) : _tile(&tile), _area(area) { } @@ -176,7 +175,8 @@ public: /** Get whether the tile view refers to the whole tile. */ bool is_full_tile() const { - return row_start() == 0 && row_end() == _tile->info().height() && col_start() == 0 && col_end() == _tile->info().width(); + return row_start() == 0 && row_end() == _tile->info().height() && col_start() == 0 && + col_end() == _tile->info().width(); } private: diff --git a/compute_kernel_writer/src/cl/CLHelpers.cpp b/compute_kernel_writer/src/cl/CLHelpers.cpp index ff4408b1a3..8e4a932764 100644 --- a/compute_kernel_writer/src/cl/CLHelpers.cpp +++ b/compute_kernel_writer/src/cl/CLHelpers.cpp @@ -28,6 +28,7 @@ #include "ckw/types/DataType.h" #include "ckw/types/Operators.h" #include "ckw/types/TensorStorageType.h" + #include "src/types/DataTypeHelpers.h" namespace ckw @@ -35,7 +36,7 @@ namespace ckw bool cl_validate_vector_length(int32_t len) { bool valid_vector_length = true; - if(len < 1 || len > 16 || (len > 4 && len < 8) || (len > 8 && len < 16)) + if (len < 1 || len > 16 || (len > 4 && len < 8) || (len > 8 && len < 16)) { valid_vector_length = false; } @@ -44,14 +45,14 @@ bool cl_validate_vector_length(int32_t len) std::string cl_get_variable_datatype_as_string(DataType dt, int32_t len) { - if(cl_validate_vector_length(len) == false) + if (cl_validate_vector_length(len) == false) { CKW_THROW_MSG("Unsupported vector length"); return ""; } std::string res; - switch(dt) + switch (dt) { case DataType::Fp32: res += "float"; @@ -85,7 +86,7 @@ std::string cl_get_variable_datatype_as_string(DataType dt, int32_t len) return ""; } - if(len > 1) + if (len > 1) { res += std::to_string(len); } @@ -95,7 +96,7 @@ std::string cl_get_variable_datatype_as_string(DataType dt, int32_t len) int32_t cl_round_up_to_nearest_valid_vector_width(int32_t width) { - switch(width) + switch (width) { case 1: return 1; @@ -128,7 +129,7 @@ int32_t cl_round_up_to_nearest_valid_vector_width(int32_t width) std::string cl_get_variable_storagetype_as_string(TensorStorageType storage) { std::string res; - switch(storage) + switch (storage) { case TensorStorageType::BufferUint8Ptr: res += "__global uchar*"; @@ -148,7 +149,7 @@ std::string cl_get_variable_storagetype_as_string(TensorStorageType storage) std::string cl_get_assignment_op_as_string(AssignmentOp op) { - switch(op) + switch (op) { case AssignmentOp::Increment: return "+="; @@ -163,34 +164,34 @@ std::string cl_get_assignment_op_as_string(AssignmentOp op) std::tuple cl_get_unary_op(UnaryOp op) { - switch(op) + switch (op) { case UnaryOp::LogicalNot: - return { false, "!" }; + return {false, "!"}; case UnaryOp::BitwiseNot: - return { false, "~" }; + return {false, "~"}; case UnaryOp::Exp: - return { true, "exp" }; + return {true, "exp"}; case UnaryOp::Tanh: - return { true, "tanh" }; + return {true, "tanh"}; case UnaryOp::Sqrt: - return { true, "sqrt" }; + return {true, "sqrt"}; case UnaryOp::Erf: - return { true, "erf" }; + return {true, "erf"}; case UnaryOp::Fabs: - return { true, "fabs" }; + return {true, "fabs"}; case UnaryOp::Log: - return { true, "log" }; + return {true, "log"}; case UnaryOp::Round: - return { true, "round" }; + return {true, "round"}; default: CKW_THROW_MSG("Unsupported unary operation!"); @@ -201,52 +202,52 @@ std::tuple cl_get_binary_op(BinaryOp op, DataType data_type) { const auto is_float = is_data_type_float(data_type); - switch(op) + switch (op) { case BinaryOp::Add: - return { false, "+" }; + return {false, "+"}; case BinaryOp::Sub: - return { false, "-" }; + return {false, "-"}; case BinaryOp::Mul: - return { false, "*" }; + return {false, "*"}; case BinaryOp::Div: - return { false, "/" }; + return {false, "/"}; case BinaryOp::Mod: - return { false, "%" }; + return {false, "%"}; case BinaryOp::Equal: - return { false, "==" }; + return {false, "=="}; case BinaryOp::Less: - return { false, "<" }; + return {false, "<"}; case BinaryOp::LessEqual: - return { false, "<=" }; + return {false, "<="}; case BinaryOp::Greater: - return { false, ">" }; + return {false, ">"}; case BinaryOp::GreaterEqual: - return { false, ">=" }; + return {false, ">="}; case BinaryOp::LogicalAnd: - return { false, "&&" }; + return {false, "&&"}; case BinaryOp::LogicalOr: - return { false, "||" }; + return {false, "||"}; case BinaryOp::BitwiseXOR: - return { false, "^" }; + return {false, "^"}; case BinaryOp::Min: - return { true, is_float ? "fmin" : "min" }; + return {true, is_float ? "fmin" : "min"}; case BinaryOp::Max: - return { true, is_float ? "fmax" : "max" }; + return {true, is_float ? "fmax" : "max"}; default: CKW_THROW_MSG("Unsupported binary operator/function!"); @@ -255,13 +256,13 @@ std::tuple cl_get_binary_op(BinaryOp op, DataType data_type) std::tuple cl_get_ternary_op(TernaryOp op) { - switch(op) + switch (op) { case TernaryOp::Select: - return { true, "select" }; + return {true, "select"}; case TernaryOp::Clamp: - return { true, "clamp" }; + return {true, "clamp"}; default: CKW_THROW_MSG("Unsupported ternary function!"); @@ -273,7 +274,7 @@ std::string cl_data_type_rounded_up_to_valid_vector_width(DataType dt, int32_t w std::string data_type; const int32_t w = cl_round_up_to_nearest_valid_vector_width(width); data_type += cl_get_variable_datatype_as_string(dt, 1); - if(w != 1) + if (w != 1) { data_type += std::to_string(w); } @@ -284,7 +285,7 @@ std::vector cl_decompose_vector_width(int32_t vector_width) { std::vector x; - switch(vector_width) + switch (vector_width) { case 0: break; diff --git a/compute_kernel_writer/src/cl/CLKernelWriter.cpp b/compute_kernel_writer/src/cl/CLKernelWriter.cpp index 2db9c139b7..62e6853a7a 100644 --- a/compute_kernel_writer/src/cl/CLKernelWriter.cpp +++ b/compute_kernel_writer/src/cl/CLKernelWriter.cpp @@ -31,14 +31,15 @@ #include "ckw/types/DataType.h" #include "ckw/types/MemoryOperation.h" #include "ckw/types/TargetLanguage.h" -#include "src/ITensorComponent.h" -#include "src/TileView.h" + #include "src/cl/CLHelpers.h" #include "src/cl/CLTensorArgument.h" #include "src/cl/CLTile.h" #include "src/cl/helpers/CLMemoryOpBufferHelper.h" #include "src/cl/helpers/CLMemoryOpImage2dHelper.h" #include "src/cl/helpers/ICLMemoryOpHelper.h" +#include "src/ITensorComponent.h" +#include "src/TileView.h" #include "src/types/DataTypeHelpers.h" #include @@ -63,14 +64,14 @@ std::unique_ptr CLKernelWriter::emit_kernel(const std::string &name) // Create the list of arguments. std::vector arguments; - for(const auto &tensor : _tensors) + for (const auto &tensor : _tensors) { const auto tensor_id = tensor->info().id(); const auto storages = tensor->storages(); const auto components = tensor->components(); - for(const auto &storage : storages) + for (const auto &storage : storages) { code += cl_get_variable_storagetype_as_string(storage.type); code += " "; @@ -80,7 +81,7 @@ std::unique_ptr CLKernelWriter::emit_kernel(const std::string &name) arguments.emplace_back(tensor_id, storage.type); } - for(const auto &component : components) + for (const auto &component : components) { const auto &tile = component->tile(); const auto &tile_info = tile.info(); @@ -96,7 +97,7 @@ std::unique_ptr CLKernelWriter::emit_kernel(const std::string &name) } } - if(code.size() >= 2 && code[code.size() - 2] == ',' && code[code.size() - 1] == '\n') + if (code.size() >= 2 && code[code.size() - 2] == ',' && code[code.size() - 1] == '\n') { // Remove the last comma in the argument list. code.pop_back(); @@ -127,11 +128,12 @@ void CLKernelWriter::op_assign(const TileOperand &dst, const TileOperand &src) const std::string src_prefix = broadcast_src_x ? "(" + data_type_str + ")" : ""; CKW_ASSERT_MSG(src_view.data_type() == dst_view.data_type(), "Source and destination type must match."); - CKW_ASSERT_MSG(src_view.height() == dst_h || src_view.height() == 1, "Tile height must match or source is broadcasting in y dimension."); + CKW_ASSERT_MSG(src_view.height() == dst_h || src_view.height() == 1, + "Tile height must match or source is broadcasting in y dimension."); CKW_ASSERT_MSG(src_w == dst_w || src_w == 1, "Tile width must match or source is broadcasting in x dimension."); // Broadcasting on y dimension is automatic (see CLTile::vector). - for(int32_t y = 0; y < dst_h; ++y) + for (int32_t y = 0; y < dst_h; ++y) { append_code(dst_view.vector(y).str, " = ", src_prefix, src_view.vector(y).str, ";\n"); } @@ -158,13 +160,15 @@ void CLKernelWriter::op_cast(const TileOperand &dst, const TileOperand &src, Con const std::string prefix = broadcast_x ? "(" + dst_type_str + ")" : ""; CKW_ASSERT_MSG(src_view.data_type() != dst_view.data_type(), "Source and destination type must be different."); - CKW_ASSERT_MSG(src_view.height() == dst_h || src_view.height() == 1, "Tile height must match or source is broadcasting in y dimension."); + CKW_ASSERT_MSG(src_view.height() == dst_h || src_view.height() == 1, + "Tile height must match or source is broadcasting in y dimension."); CKW_ASSERT_MSG(src_w == dst_w || src_w == 1, "Tile width must match or source is broadcasting in x dimension."); // Broadcasting on y dimension is automatic (see CLTile::vector). - for(int32_t y = 0; y < dst_h; ++y) + for (int32_t y = 0; y < dst_h; ++y) { - append_code(dst_view.vector(y).str, " = ", prefix, "convert_", convert_type_str, sat, "(", src_view.vector(y).str, ");\n"); + append_code(dst_view.vector(y).str, " = ", prefix, "convert_", convert_type_str, sat, "(", + src_view.vector(y).str, ");\n"); } } @@ -189,11 +193,12 @@ void CLKernelWriter::op_unary(const TileOperand &dst, UnaryOp op, const TileOper const auto op_suffix = op_is_func ? ")" : ""; CKW_ASSERT_MSG(src_view.data_type() == dst_view.data_type(), "Source and destination type must match."); - CKW_ASSERT_MSG(src_view.height() == dst_h || src_view.height() == 1, "Tile height must match or source is broadcasting in y dimension."); + CKW_ASSERT_MSG(src_view.height() == dst_h || src_view.height() == 1, + "Tile height must match or source is broadcasting in y dimension."); CKW_ASSERT_MSG(src_w == dst_w || src_w == 1, "Tile width must match or source is broadcasting in x dimension."); // Broadcasting on y dimension is automatic (see CLTile::vector). - for(int32_t y = 0; y < dst_h; ++y) + for (int32_t y = 0; y < dst_h; ++y) { append_code(dst_view.vector(y).str, " = ", src_prefix, op_prefix, src_view.vector(y).str, op_suffix, ";\n"); } @@ -214,27 +219,28 @@ void CLKernelWriter::op_binary(const TileOperand &dst, BinaryOp op, const TileOp CKW_ASSERT_MSG(lhs_view.data_type() == rhs_view.data_type(), "LHS and RHS type must match."); - CKW_ASSERT_MSG(lhs_view.height() == dst_h || lhs_view.height() == 1, "LHS tile height must match or source is broadcasting in y dimension."); - CKW_ASSERT_MSG(rhs_view.height() == dst_h || rhs_view.height() == 1, "RHS tile height must match or source is broadcasting in y dimension."); + CKW_ASSERT_MSG(lhs_view.height() == dst_h || lhs_view.height() == 1, + "LHS tile height must match or source is broadcasting in y dimension."); + CKW_ASSERT_MSG(rhs_view.height() == dst_h || rhs_view.height() == 1, + "RHS tile height must match or source is broadcasting in y dimension."); - CKW_ASSERT_MSG(lhs_w == dst_w || lhs_w == 1, "LHS tile width must match destination or LHS is broadcasting in x dimension."); - CKW_ASSERT_MSG(rhs_w == dst_w || rhs_w == 1, "RHS tile width must match destination or RHS is broadcasting in x dimension."); + CKW_ASSERT_MSG(lhs_w == dst_w || lhs_w == 1, + "LHS tile width must match destination or LHS is broadcasting in x dimension."); + CKW_ASSERT_MSG(rhs_w == dst_w || rhs_w == 1, + "RHS tile width must match destination or RHS is broadcasting in x dimension."); - if(op == BinaryOp::MatMul_Nt_T) + if (op == BinaryOp::MatMul_Nt_T) { CKW_ASSERT(is_data_type_float(data_type)); - for(int32_t y = 0; y < dst_h; ++y) + for (int32_t y = 0; y < dst_h; ++y) { - for(int32_t x = 0; x < dst_w; ++x) + for (int32_t x = 0; x < dst_w; ++x) { - for(int32_t k = 0; k < lhs_w; ++k) + for (int32_t k = 0; k < lhs_w; ++k) { - append_code( - dst_view.scalar(x, y).str, " = fma(", - lhs_view.scalar(k, y).str, ", ", - rhs_view.scalar(k, x).str, ", ", - dst_view.scalar(x, y).str, ");\n"); + append_code(dst_view.scalar(x, y).str, " = fma(", lhs_view.scalar(k, y).str, ", ", + rhs_view.scalar(k, x).str, ", ", dst_view.scalar(x, y).str, ");\n"); } } } @@ -258,14 +264,16 @@ void CLKernelWriter::op_binary(const TileOperand &dst, BinaryOp op, const TileOp const std::string op_suffix = op_is_func ? ");\n" : ";\n"; // Broadcasting on y dimension is automatic (see CLTile::vector). - for(int32_t y = 0; y < dst_h; ++y) + for (int32_t y = 0; y < dst_h; ++y) { - append_code(dst_view.vector(y).str, op_prefix, lhs_prefix, lhs_view.vector(y).str, op_separator, rhs_prefix, rhs_view.vector(y).str, op_suffix); + append_code(dst_view.vector(y).str, op_prefix, lhs_prefix, lhs_view.vector(y).str, op_separator, rhs_prefix, + rhs_view.vector(y).str, op_suffix); } } } -void CLKernelWriter::op_ternary(const TileOperand &dst, TernaryOp op, const TileOperand &first, const TileOperand &second, const TileOperand &third) +void CLKernelWriter::op_ternary( + const TileOperand &dst, TernaryOp op, const TileOperand &first, const TileOperand &second, const TileOperand &third) { const auto dst_view = to_cl_tile_view(dst); const auto first_view = to_cl_tile_view(first); @@ -297,37 +305,42 @@ void CLKernelWriter::op_ternary(const TileOperand &dst, TernaryOp op, const Tile CKW_ASSERT_MSG(second_view.data_type() == dst_view.data_type(), "2nd source and destination type must match."); CKW_ASSERT_MSG(third_view.data_type() == dst_view.data_type(), "3rd source and destination type must match."); - CKW_ASSERT_MSG(first_view.height() == dst_h || first_view.height() == 1, "1st tile height must match or source is broadcasting in y dimension."); - CKW_ASSERT_MSG(second_view.height() == dst_h || second_view.height() == 1, "2nd tile height must match or source is broadcasting in y dimension."); - CKW_ASSERT_MSG(third_view.height() == dst_h || third_view.height() == 1, "3rd tile height must match or source is broadcasting in y dimension."); + CKW_ASSERT_MSG(first_view.height() == dst_h || first_view.height() == 1, + "1st tile height must match or source is broadcasting in y dimension."); + CKW_ASSERT_MSG(second_view.height() == dst_h || second_view.height() == 1, + "2nd tile height must match or source is broadcasting in y dimension."); + CKW_ASSERT_MSG(third_view.height() == dst_h || third_view.height() == 1, + "3rd tile height must match or source is broadcasting in y dimension."); - CKW_ASSERT_MSG(first_w == dst_w || first_w == 1, "1st tile width must match or source is broadcasting in x dimension."); - CKW_ASSERT_MSG(second_w == dst_w || second_w == 1, "2nd tile width must match or source is broadcasting in x dimension."); - CKW_ASSERT_MSG(third_w == dst_w || third_w == 1, "3rd tile width must match or source is broadcasting in x dimension."); + CKW_ASSERT_MSG(first_w == dst_w || first_w == 1, + "1st tile width must match or source is broadcasting in x dimension."); + CKW_ASSERT_MSG(second_w == dst_w || second_w == 1, + "2nd tile width must match or source is broadcasting in x dimension."); + CKW_ASSERT_MSG(third_w == dst_w || third_w == 1, + "3rd tile width must match or source is broadcasting in x dimension."); // Broadcasting on y dimension is automatic (see CLTile::vector). - for(int32_t y = 0; y < dst_h; ++y) + for (int32_t y = 0; y < dst_h; ++y) { - append_code( - dst_view.vector(y).str, " = ", op_name, "(", - first_prefix, first_view.vector(y).str, ", ", - second_prefix, second_view.vector(y).str, ", ", - third_prefix, third_view.vector(y).str, ");\n"); + append_code(dst_view.vector(y).str, " = ", op_name, "(", first_prefix, first_view.vector(y).str, ", ", + second_prefix, second_view.vector(y).str, ", ", third_prefix, third_view.vector(y).str, ");\n"); } } -void CLKernelWriter::op_if_generic(const TileOperand &lhs, BinaryOp op, const TileOperand &rhs, const std::function &body, bool is_else_if) +void CLKernelWriter::op_if_generic( + const TileOperand &lhs, BinaryOp op, const TileOperand &rhs, const std::function &body, bool is_else_if) { const auto lhs_view = to_cl_tile_view(lhs); const auto rhs_view = to_cl_tile_view(rhs); const auto op_name = std::get<1>(cl_get_binary_op(op, lhs_view.data_type())); - CKW_ASSERT(op == BinaryOp::Less || op == BinaryOp::LessEqual || op == BinaryOp::Equal || op == BinaryOp::GreaterEqual || op == BinaryOp::Greater); + CKW_ASSERT(op == BinaryOp::Less || op == BinaryOp::LessEqual || op == BinaryOp::Equal || + op == BinaryOp::GreaterEqual || op == BinaryOp::Greater); CKW_ASSERT(lhs_view.is_scalar()); CKW_ASSERT(rhs_view.is_scalar()); - if(is_else_if) + if (is_else_if) { append_code("else "); } @@ -337,12 +350,18 @@ void CLKernelWriter::op_if_generic(const TileOperand &lhs, BinaryOp op, const Ti append_code("}\n"); } -void CLKernelWriter::op_if(const TileOperand &lhs, BinaryOp op, const TileOperand &rhs, const std::function &body) +void CLKernelWriter::op_if(const TileOperand &lhs, + BinaryOp op, + const TileOperand &rhs, + const std::function &body) { op_if_generic(lhs, op, rhs, body, false /* is_else_if */); } -void CLKernelWriter::op_else_if(const TileOperand &lhs, BinaryOp op, const TileOperand &rhs, const std::function &body) +void CLKernelWriter::op_else_if(const TileOperand &lhs, + BinaryOp op, + const TileOperand &rhs, + const std::function &body) { op_if_generic(lhs, op, rhs, body, true /* is_else_if */); } @@ -354,10 +373,13 @@ void CLKernelWriter::op_else(const std::function &body) append_code("}\n"); } -void CLKernelWriter::op_for_loop( - const TileOperand &var, BinaryOp cond_op, const TileOperand &cond_value, - const TileOperand &update_var, AssignmentOp update_op, const TileOperand &update_value, - const std::function &body) +void CLKernelWriter::op_for_loop(const TileOperand &var, + BinaryOp cond_op, + const TileOperand &cond_value, + const TileOperand &update_var, + AssignmentOp update_op, + const TileOperand &update_value, + const std::function &body) { const auto var_view = to_cl_tile_view(var); const auto cond_value_view = to_cl_tile_view(cond_value); @@ -373,11 +395,12 @@ void CLKernelWriter::op_for_loop( CKW_ASSERT(update_var_view.data_type() == update_value_view.data_type()); const auto cond_op_name = std::get<1>(cl_get_binary_op(cond_op, var_view.data_type())); - CKW_ASSERT(cond_op == BinaryOp::Less || cond_op == BinaryOp::LessEqual || cond_op == BinaryOp::Equal || cond_op == BinaryOp::GreaterEqual || cond_op == BinaryOp::Greater); + CKW_ASSERT(cond_op == BinaryOp::Less || cond_op == BinaryOp::LessEqual || cond_op == BinaryOp::Equal || + cond_op == BinaryOp::GreaterEqual || cond_op == BinaryOp::Greater); - append_code( - "for (; ", var_view.scalar(0, 0).str, " ", cond_op_name, " ", cond_value_view.scalar(0, 0).str, "; ", - update_var_view.scalar(0, 0).str, " ", cl_get_assignment_op_as_string(update_op), " ", update_value_view.scalar(0, 0).str, ")\n{\n"); + append_code("for (; ", var_view.scalar(0, 0).str, " ", cond_op_name, " ", cond_value_view.scalar(0, 0).str, "; ", + update_var_view.scalar(0, 0).str, " ", cl_get_assignment_op_as_string(update_op), " ", + update_value_view.scalar(0, 0).str, ")\n{\n"); write_body(body); append_code("}\n"); } @@ -404,7 +427,7 @@ void CLKernelWriter::op_print(const std::string &prefix, const std::vector 1) + if (width > 1) { row_format += "v" + std::to_string(width); } - switch(data_type) + switch (data_type) { case DataType::Fp32: row_format += "hlg"; @@ -452,7 +475,7 @@ void CLKernelWriter::op_print(const std::string &prefix, const std::vector 1) + if (width > 1) { row_format = "[" + row_format + "]"; } @@ -460,14 +483,14 @@ void CLKernelWriter::op_print(const std::string &prefix, const std::vector &e) - { - return e->name() == fullname; - }) - == _tiles.end(), - "There is already a tile with name: " + fullname); + CKW_ASSERT_MSG(std::find_if(_tiles.begin(), _tiles.end(), + [=](const std::unique_ptr &e) + { return e->name() == fullname; }) == _tiles.end(), + "There is already a tile with name: " + fullname); auto tile = std::make_unique(fullname, tile_info); - for(int32_t row = 0; row < height; ++row) + for (int32_t row = 0; row < height; ++row) { const std::string cl_type = cl_get_variable_datatype_as_string(data_type, width); append_code(cl_type, " ", tile->vector(row).str, ";\n"); @@ -578,40 +596,40 @@ TileView CLKernelWriter::to_cl_tile_view(const TileOperand &operand) con { bool found = false; - for(const auto &t : _tiles) + for (const auto &t : _tiles) { - if(&tile == t.get()) + if (&tile == t.get()) { found = true; break; } } - for(const auto &t : _constant_tiles) + for (const auto &t : _constant_tiles) { - if(&tile == t.get()) + if (&tile == t.get()) { found = true; break; } } - if(!found) + if (!found) { - for(const auto &t : _tensors) + for (const auto &t : _tensors) { const auto components = t->components(); - for(const auto component : components) + for (const auto component : components) { - if(&tile == &component->tile()) + if (&tile == &component->tile()) { found = true; break; } } - if(found) + if (found) { break; } @@ -622,66 +640,106 @@ TileView CLKernelWriter::to_cl_tile_view(const TileOperand &operand) con } #endif // COMPUTE_KERNEL_WRITER_ASSERTS_ENABLED - return { static_cast(tile), area }; + return {static_cast(tile), area}; } -void CLKernelWriter::op_load(const TileOperand &tile_op, const TensorOperand &tensor_op, TensorSampler &sampler, - const TileOperand &x, const TileOperand &y, const TileOperand &z, const TileOperand &batch) +void CLKernelWriter::op_load(const TileOperand &tile_op, + const TensorOperand &tensor_op, + TensorSampler &sampler, + const TileOperand &x, + const TileOperand &y, + const TileOperand &z, + const TileOperand &batch) { - const CLTile dilation_x({ { "1" } }, DataType::Int32); - const CLTile dilation_y({ { "1" } }, DataType::Int32); + const CLTile dilation_x({{"1"}}, DataType::Int32); + const CLTile dilation_y({{"1"}}, DataType::Int32); - op_load_store(MemoryOperation::Load, tile_op, tensor_op, sampler, x, y, z, batch, dilation_x, dilation_y, false /* indirect buffer */); + op_load_store(MemoryOperation::Load, tile_op, tensor_op, sampler, x, y, z, batch, dilation_x, dilation_y, + false /* indirect buffer */); } -void CLKernelWriter::op_load_dilated(const TileOperand &tile_op, const TensorOperand &tensor_op, TensorSampler &sampler, - const TileOperand &x, const TileOperand &y, const TileOperand &z, const TileOperand &batch, - const TileOperand &dilation_x, const TileOperand &dilation_y) +void CLKernelWriter::op_load_dilated(const TileOperand &tile_op, + const TensorOperand &tensor_op, + TensorSampler &sampler, + const TileOperand &x, + const TileOperand &y, + const TileOperand &z, + const TileOperand &batch, + const TileOperand &dilation_x, + const TileOperand &dilation_y) { const auto dil_x_view = to_cl_tile_view(dilation_x); const auto dil_y_view = to_cl_tile_view(dilation_y); - op_load_store(MemoryOperation::Load, tile_op, tensor_op, sampler, x, y, z, batch, dil_x_view, dil_y_view, false /* indirect buffer */); + op_load_store(MemoryOperation::Load, tile_op, tensor_op, sampler, x, y, z, batch, dil_x_view, dil_y_view, + false /* indirect buffer */); } -void CLKernelWriter::op_store(const TensorOperand &tensor_op, const TileOperand &tile_op, TensorSampler &sampler, - const TileOperand &x, const TileOperand &y, const TileOperand &z, const TileOperand &batch) +void CLKernelWriter::op_store(const TensorOperand &tensor_op, + const TileOperand &tile_op, + TensorSampler &sampler, + const TileOperand &x, + const TileOperand &y, + const TileOperand &z, + const TileOperand &batch) { - const CLTile dilation_x({ { "1" } }, DataType::Int32); - const CLTile dilation_y({ { "1" } }, DataType::Int32); + const CLTile dilation_x({{"1"}}, DataType::Int32); + const CLTile dilation_y({{"1"}}, DataType::Int32); - op_load_store(MemoryOperation::Store, tile_op, tensor_op, sampler, x, y, z, batch, dilation_x, dilation_y, false /* indirect buffer */); + op_load_store(MemoryOperation::Store, tile_op, tensor_op, sampler, x, y, z, batch, dilation_x, dilation_y, + false /* indirect buffer */); } -void CLKernelWriter::op_store_dilated(const TensorOperand &tensor_op, const TileOperand &tile_op, TensorSampler &sampler, - const TileOperand &x, const TileOperand &y, const TileOperand &z, const TileOperand &batch, - const TileOperand &dilation_x, const TileOperand &dilation_y) +void CLKernelWriter::op_store_dilated(const TensorOperand &tensor_op, + const TileOperand &tile_op, + TensorSampler &sampler, + const TileOperand &x, + const TileOperand &y, + const TileOperand &z, + const TileOperand &batch, + const TileOperand &dilation_x, + const TileOperand &dilation_y) { const auto dil_x_view = to_cl_tile_view(dilation_x); const auto dil_y_view = to_cl_tile_view(dilation_y); - op_load_store(MemoryOperation::Store, tile_op, tensor_op, sampler, x, y, z, batch, dil_x_view, dil_y_view, false /* indirect buffer */); + op_load_store(MemoryOperation::Store, tile_op, tensor_op, sampler, x, y, z, batch, dil_x_view, dil_y_view, + false /* indirect buffer */); } -void CLKernelWriter::op_load_indirect(const TileOperand &tile_op, const TensorOperand &tensor_op, TensorSampler &sampler, - const TileOperand &x, const TileOperand &y, const TileOperand &z, const TileOperand &batch) +void CLKernelWriter::op_load_indirect(const TileOperand &tile_op, + const TensorOperand &tensor_op, + TensorSampler &sampler, + const TileOperand &x, + const TileOperand &y, + const TileOperand &z, + const TileOperand &batch) { - const CLTile dilation_x({ { "1" } }, DataType::Int32); - const CLTile dilation_y({ { "1" } }, DataType::Int32); + const CLTile dilation_x({{"1"}}, DataType::Int32); + const CLTile dilation_y({{"1"}}, DataType::Int32); - op_load_store(MemoryOperation::Load, tile_op, tensor_op, sampler, x, y, z, batch, dilation_x, dilation_y, true /* indirect buffer */); + op_load_store(MemoryOperation::Load, tile_op, tensor_op, sampler, x, y, z, batch, dilation_x, dilation_y, + true /* indirect buffer */); } -void CLKernelWriter::op_load_store(MemoryOperation op, const TileOperand &tile_op, const TensorOperand &tensor_op, TensorSampler &sampler, - const TileOperand &x, const TileOperand &y, const TileOperand &z, const TileOperand &batch, - const TileView &dilation_x, const TileView &dilation_y, bool indirect_buffer) +void CLKernelWriter::op_load_store(MemoryOperation op, + const TileOperand &tile_op, + const TensorOperand &tensor_op, + TensorSampler &sampler, + const TileOperand &x, + const TileOperand &y, + const TileOperand &z, + const TileOperand &batch, + const TileView &dilation_x, + const TileView &dilation_y, + bool indirect_buffer) { CKW_UNUSED(dilation_x); CKW_ASSERT(dilation_x.is_scalar()); CKW_ASSERT(dilation_y.is_scalar()); CKW_ASSERT(dilation_x.scalar(0, 0).str == "((int)(1))"); // Dilation in x dimension is not implemented yet - if(indirect_buffer) + if (indirect_buffer) { CKW_ASSERT(dilation_y.scalar(0, 0).str == "((int)(1))" && dilation_x.scalar(0, 0).str == "((int)(1))"); } @@ -689,7 +747,7 @@ void CLKernelWriter::op_load_store(MemoryOperation op, const TileOperand &tile_o ITensor &tensor = get_tensor(tensor_op); std::unique_ptr helper; - switch(sampler.storage()) + switch (sampler.storage()) { case TensorStorageType::BufferUint8Ptr: helper = std::make_unique(this, &tensor, &sampler, op); @@ -717,13 +775,13 @@ void CLKernelWriter::op_load_store(MemoryOperation op, const TileOperand &tile_o helper->initialize(&tile, &x_tile, &z_tile, &batch_tile); - for(int row = 0; row < tile.info().height(); ++row) + for (int row = 0; row < tile.info().height(); ++row) { - if(!indirect_buffer) + if (!indirect_buffer) { std::string coord_y = y_tile.scalar(0, 0).str + " + " + std::to_string(row); - if(dilation_y.scalar(0, 0).str != "((int)(1))") + if (dilation_y.scalar(0, 0).str != "((int)(1))") { coord_y += " * " + dilation_y.scalar(0, 0).str; } diff --git a/compute_kernel_writer/src/cl/CLKernelWriter.h b/compute_kernel_writer/src/cl/CLKernelWriter.h index d7cf24d5e6..6485bae512 100644 --- a/compute_kernel_writer/src/cl/CLKernelWriter.h +++ b/compute_kernel_writer/src/cl/CLKernelWriter.h @@ -26,6 +26,7 @@ #define CKW_SRC_CL_CLKERNELWRITER_H #include "ckw/KernelWriter.h" + #include "src/TileView.h" #include @@ -73,7 +74,11 @@ public: void op_binary(const TileOperand &dst, BinaryOp op, const TileOperand &first, const TileOperand &second) override; - void op_ternary(const TileOperand &dst, TernaryOp op, const TileOperand &first, const TileOperand &second, const TileOperand &third) override; + void op_ternary(const TileOperand &dst, + TernaryOp op, + const TileOperand &first, + const TileOperand &second, + const TileOperand &third) override; // ============================================================================================= // Flow control @@ -81,14 +86,18 @@ public: void op_if(const TileOperand &lhs, BinaryOp op, const TileOperand &rhs, const std::function &body) override; - void op_else_if(const TileOperand &lhs, BinaryOp op, const TileOperand &rhs, const std::function &body) override; + void + op_else_if(const TileOperand &lhs, BinaryOp op, const TileOperand &rhs, const std::function &body) override; void op_else(const std::function &body) override; - void op_for_loop( - const TileOperand &var, BinaryOp cond_op, const TileOperand &cond_value, - const TileOperand &update_var, AssignmentOp update_op, const TileOperand &update_value, - const std::function &body) override; + void op_for_loop(const TileOperand &var, + BinaryOp cond_op, + const TileOperand &cond_value, + const TileOperand &update_var, + AssignmentOp update_op, + const TileOperand &update_value, + const std::function &body) override; void op_return() override; @@ -132,26 +141,49 @@ public: // Memory Operations // ============================================================================================= - void op_load( - const TileOperand &tile_op, const TensorOperand &tensor_op, TensorSampler &sampler, - const TileOperand &x, const TileOperand &y, const TileOperand &z, const TileOperand &batch) override; - - void op_load_dilated( - const TileOperand &tile_op, const TensorOperand &tensor_op, TensorSampler &sampler, - const TileOperand &x, const TileOperand &y, const TileOperand &z, const TileOperand &batch, - const TileOperand &dilation_x, const TileOperand &dilation_y) override; - - void op_store( - const TensorOperand &tensor_op, const TileOperand &tile_op, TensorSampler &sampler, - const TileOperand &x, const TileOperand &y, const TileOperand &z, const TileOperand &batch) override; - - void op_store_dilated( - const TensorOperand &tensor_op, const TileOperand &tile_op, TensorSampler &sampler, - const TileOperand &x, const TileOperand &y, const TileOperand &z, const TileOperand &batch, - const TileOperand &dilation_x, const TileOperand &dilation_y) override; - - void op_load_indirect(const TileOperand &tile_op, const TensorOperand &tensor_op, TensorSampler &sampler, - const TileOperand &x, const TileOperand &y, const TileOperand &z, const TileOperand &batch) override; + void op_load(const TileOperand &tile_op, + const TensorOperand &tensor_op, + TensorSampler &sampler, + const TileOperand &x, + const TileOperand &y, + const TileOperand &z, + const TileOperand &batch) override; + + void op_load_dilated(const TileOperand &tile_op, + const TensorOperand &tensor_op, + TensorSampler &sampler, + const TileOperand &x, + const TileOperand &y, + const TileOperand &z, + const TileOperand &batch, + const TileOperand &dilation_x, + const TileOperand &dilation_y) override; + + void op_store(const TensorOperand &tensor_op, + const TileOperand &tile_op, + TensorSampler &sampler, + const TileOperand &x, + const TileOperand &y, + const TileOperand &z, + const TileOperand &batch) override; + + void op_store_dilated(const TensorOperand &tensor_op, + const TileOperand &tile_op, + TensorSampler &sampler, + const TileOperand &x, + const TileOperand &y, + const TileOperand &z, + const TileOperand &batch, + const TileOperand &dilation_x, + const TileOperand &dilation_y) override; + + void op_load_indirect(const TileOperand &tile_op, + const TensorOperand &tensor_op, + TensorSampler &sampler, + const TileOperand &x, + const TileOperand &y, + const TileOperand &z, + const TileOperand &batch) override; protected: /** Return a tile view containing a reference to @ref CLTile object and the active area. @@ -181,9 +213,17 @@ protected: // For helper functions private: /** Helper method to consolidate all load/store logic in this class */ - void op_load_store(MemoryOperation op, const TileOperand &tile_op, const TensorOperand &tensor_op, TensorSampler &sampler, - const TileOperand &x, const TileOperand &y, const TileOperand &z, const TileOperand &batch, - const TileView &dilation_x, const TileView &dilation_y, bool indirect_buffer); + void op_load_store(MemoryOperation op, + const TileOperand &tile_op, + const TensorOperand &tensor_op, + TensorSampler &sampler, + const TileOperand &x, + const TileOperand &y, + const TileOperand &z, + const TileOperand &batch, + const TileView &dilation_x, + const TileView &dilation_y, + bool indirect_buffer); /** This function is the generic function to write both `if` and `else if` blocks. * @@ -195,7 +235,11 @@ private: * @param[in] body The function that writes the body of the else-if block. * @param[in] is_else_if True if this is an `else if` block, otherwise this is an `if` block. */ - void op_if_generic(const TileOperand &lhs, BinaryOp op, const TileOperand &rhs, const std::function &body, bool is_else_if); + void op_if_generic(const TileOperand &lhs, + BinaryOp op, + const TileOperand &rhs, + const std::function &body, + bool is_else_if); // For attributes private: diff --git a/compute_kernel_writer/src/cl/CLTensorArgument.cpp b/compute_kernel_writer/src/cl/CLTensorArgument.cpp index 7d4dc958df..e53de2830d 100644 --- a/compute_kernel_writer/src/cl/CLTensorArgument.cpp +++ b/compute_kernel_writer/src/cl/CLTensorArgument.cpp @@ -23,11 +23,13 @@ */ #include "src/cl/CLTensorArgument.h" + #include "ckw/Error.h" -#include "src/ITensorArgument.h" -#include "src/ITensorComponent.h" + #include "src/cl/CLHelpers.h" #include "src/cl/CLTensorComponent.h" +#include "src/ITensorArgument.h" +#include "src/ITensorComponent.h" #include "src/types/TensorComponentType.h" #include @@ -48,25 +50,23 @@ CLTensorComponent &CLTensorArgument::cl_component(TensorComponentType x) { // Return the component if it has already been created. { - const auto it = std::find_if( - _components_used.begin(), _components_used.end(), - [=](const std::unique_ptr &item) - { - return item->component_type() == x; - }); + const auto it = + std::find_if(_components_used.begin(), _components_used.end(), + [=](const std::unique_ptr &item) { return item->component_type() == x; }); - if(it != _components_used.end()) + if (it != _components_used.end()) { return **it; } } - if(_return_dims_by_value) + if (_return_dims_by_value) { uint32_t component_type = static_cast(x); - const bool is_dimension = (component_type & static_cast(TensorComponentBitmask::Dimension)) != 0; - const bool is_folded_dimensions = (component_type & static_cast(TensorComponentBitmask::FoldedDimensions)) != 0; + const bool is_dimension = (component_type & static_cast(TensorComponentBitmask::Dimension)) != 0; + const bool is_folded_dimensions = + (component_type & static_cast(TensorComponentBitmask::FoldedDimensions)) != 0; constexpr auto bitmask_all = static_cast(TensorComponentIndexBitmask::All); constexpr auto bitmask_index_0 = static_cast(TensorComponentIndexBitmask::Index0); @@ -83,16 +83,16 @@ CLTensorComponent &CLTensorArgument::cl_component(TensorComponentType x) CKW_ASSERT(bitmask_index_2 == bitmask_index_3 >> 4); // If we have a dimension or folded dimensions, we can return the corresponding value if it is not dynamic (not equal to -1) - if(is_dimension == true || is_folded_dimensions == true) + if (is_dimension == true || is_folded_dimensions == true) { component_type = component_type & bitmask_all; int32_t idx = 1; - for(int32_t i = 0; i < tensor_component_index_max_count; ++i) + for (int32_t i = 0; i < tensor_component_index_max_count; ++i) { uint32_t dim_idx = component_type & bitmask_index_0; - if(dim_idx == 0) + if (dim_idx == 0) { // Stop at the first nibble containing 0 break; @@ -104,7 +104,7 @@ CLTensorComponent &CLTensorArgument::cl_component(TensorComponentType x) // Get the dimension value const int32_t dim_val = _info.shape()[dim_idx]; - if(dim_val == kDynamicTensorDimensionValue) + if (dim_val == kDynamicTensorDimensionValue) { // We cannot return the dimension by value if it is dynamic. // Therefore, force the idx variable to kDynamicTensorDimensionValue and break the loop. @@ -118,7 +118,7 @@ CLTensorComponent &CLTensorArgument::cl_component(TensorComponentType x) component_type >>= 4; } - if(idx != kDynamicTensorDimensionValue) + if (idx != kDynamicTensorDimensionValue) { _components_used.emplace_back(std::make_unique(*this, x, idx)); @@ -141,14 +141,10 @@ TensorStorageVariable &CLTensorArgument::storage(TensorStorageType x) { // Return the storage if it has already been created. { - const auto it = std::find_if( - _storages_used.begin(), _storages_used.end(), - [=](const TensorStorageVariable &item) - { - return item.type == x; - }); + const auto it = std::find_if(_storages_used.begin(), _storages_used.end(), + [=](const TensorStorageVariable &item) { return item.type == x; }); - if(it != _storages_used.end()) + if (it != _storages_used.end()) { return *it; } @@ -167,7 +163,7 @@ std::string CLTensorArgument::create_storage_name(TensorStorageType x) const { std::string var_name = _basename; - switch(x) + switch (x) { case TensorStorageType::BufferUint8Ptr: var_name += "_ptr"; @@ -198,9 +194,9 @@ std::vector CLTensorArgument::components() const { std::vector components; - for(const auto &component : _components_used) + for (const auto &component : _components_used) { - if(component->is_assignable()) + if (component->is_assignable()) { components.push_back(component.get()); } diff --git a/compute_kernel_writer/src/cl/CLTensorArgument.h b/compute_kernel_writer/src/cl/CLTensorArgument.h index 4cbbee21ee..35df51422e 100644 --- a/compute_kernel_writer/src/cl/CLTensorArgument.h +++ b/compute_kernel_writer/src/cl/CLTensorArgument.h @@ -26,7 +26,9 @@ #include "ckw/types/TensorComponentType.h" #include "ckw/types/TensorStorageType.h" + #include "src/ITensor.h" + #include #include #include @@ -67,7 +69,7 @@ public: * unlike @ref CLTensorComponent::component which is for the public API and only returns * a reference to a generic @ref ITile object. */ - CLTensorComponent& cl_component(TensorComponentType component_type); + CLTensorComponent &cl_component(TensorComponentType component_type); // Inherited method overridden TensorStorageVariable &storage(TensorStorageType x) override; @@ -78,7 +80,7 @@ public: private: std::string create_storage_name(TensorStorageType x) const; - bool _return_dims_by_value{ false }; + bool _return_dims_by_value{false}; std::vector _storages_used{}; std::vector> _components_used{}; }; diff --git a/compute_kernel_writer/src/cl/CLTensorComponent.cpp b/compute_kernel_writer/src/cl/CLTensorComponent.cpp index c29b307748..dbe2036768 100644 --- a/compute_kernel_writer/src/cl/CLTensorComponent.cpp +++ b/compute_kernel_writer/src/cl/CLTensorComponent.cpp @@ -23,8 +23,10 @@ */ #include "src/cl/CLTensorComponent.h" + #include "ckw/Error.h" #include "ckw/types/TensorComponentType.h" + #include "src/cl/CLTensorArgument.h" #include "src/cl/CLTile.h" @@ -38,7 +40,7 @@ std::string create_component_name(const std::string &name, TensorComponentType x { std::string var_name(name); - switch(x) + switch (x) { case TensorComponentType::OffsetFirstElement: var_name += "_offset_first_element"; @@ -93,12 +95,13 @@ std::string create_component_name(const std::string &name, TensorComponentType x } // namespace CLTensorComponent::CLTensorComponent(const CLTensorArgument &tensor, TensorComponentType component_type) - : CLTile(create_component_name(tensor.name(), component_type), TileInfo(DataType::Int32)), _component_type(component_type) + : CLTile(create_component_name(tensor.name(), component_type), TileInfo(DataType::Int32)), + _component_type(component_type) { } CLTensorComponent::CLTensorComponent(const CLTensorArgument &tensor, TensorComponentType component_type, int32_t value) - : CLTile({ { std::to_string(value) } }, DataType::Int32), _component_type(component_type) + : CLTile({{std::to_string(value)}}, DataType::Int32), _component_type(component_type) { CKW_UNUSED(tensor); } diff --git a/compute_kernel_writer/src/cl/CLTensorComponent.h b/compute_kernel_writer/src/cl/CLTensorComponent.h index 42a42666dc..731597ebbf 100644 --- a/compute_kernel_writer/src/cl/CLTensorComponent.h +++ b/compute_kernel_writer/src/cl/CLTensorComponent.h @@ -26,8 +26,9 @@ #define CKW_SRC_CL_CLTENSORCOMPONENT_H #include "ckw/types/TensorComponentType.h" -#include "src/ITensorComponent.h" + #include "src/cl/CLTile.h" +#include "src/ITensorComponent.h" namespace ckw { @@ -72,7 +73,7 @@ public: TensorComponentType component_type() const override; private: - TensorComponentType _component_type{ TensorComponentType::Unknown }; + TensorComponentType _component_type{TensorComponentType::Unknown}; }; } // namespace ckw diff --git a/compute_kernel_writer/src/cl/CLTile.cpp b/compute_kernel_writer/src/cl/CLTile.cpp index 0cce69a9e1..f6e271e813 100644 --- a/compute_kernel_writer/src/cl/CLTile.cpp +++ b/compute_kernel_writer/src/cl/CLTile.cpp @@ -21,20 +21,20 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ +#include "src/cl/CLTile.h" + #include "ckw/Error.h" #include "ckw/TileInfo.h" -#include "src/Helpers.h" #include "src/cl/CLHelpers.h" -#include "src/cl/CLTile.h" +#include "src/Helpers.h" #include #include namespace ckw { -CLTile::CLTile(const std::string &name, const TileInfo &info) - : _is_constant(false) +CLTile::CLTile(const std::string &name, const TileInfo &info) : _is_constant(false) { validate_tile_info(info); @@ -42,8 +42,7 @@ CLTile::CLTile(const std::string &name, const TileInfo &info) _info = info; } -CLTile::CLTile(const TileContainer &vals, DataType dt) - : _is_constant(true) +CLTile::CLTile(const TileContainer &vals, DataType dt) : _is_constant(true) { const int32_t w = vals[0].size(); const int32_t h = vals.size(); @@ -56,9 +55,9 @@ CLTile::CLTile(const TileContainer &vals, DataType dt) _vals = TileContainer(h, std::vector(w)); - for(int32_t y = 0; y < h; ++y) + for (int32_t y = 0; y < h; ++y) { - for(int32_t x = 0; x < w; ++x) + for (int32_t x = 0; x < w; ++x) { _vals[y][x] = vals[y][x]; } @@ -81,7 +80,7 @@ TileVariable CLTile::scalar(int32_t row, int32_t col) const col = clamp(col, static_cast(0), _info.width() - 1); row = clamp(row, static_cast(0), _info.height() - 1); - if(_is_constant) + if (_is_constant) { // We can use the vector method to retrieve the scalar variable stored in the constant tile return vector(row, col, 1); @@ -94,7 +93,7 @@ TileVariable CLTile::scalar(int32_t row, int32_t col) const t.desc.len = 1; // This check is required because if the width has only one element, we cannot use .s0 - if(_info.width() != 1) + if (_info.width() != 1) { // Automatic broadcasting t.str += ".s" + dec_to_hex_as_string(col); @@ -109,7 +108,7 @@ TileVariable CLTile::vector(int32_t row) const // Clamp to nearest valid edge row = clamp(row, static_cast(0), _info.height() - 1); - if(_is_constant) + if (_is_constant) { return vector(row, 0, _info.width()); } @@ -138,14 +137,14 @@ TileVariable CLTile::vector(int32_t row, int32_t col_start, int32_t width) const t.desc.dt = _info.data_type(); t.desc.len = width; - if(_is_constant) + if (_is_constant) { // The vector has the following form: ((data_typeN)(val0, val1,..., ValN-1)) t.str = "((" + cl_get_variable_datatype_as_string(t.desc.dt, width) + ")"; t.str += "("; int32_t col = col_start; - for(; col < width - 1; ++col) + for (; col < width - 1; ++col) { t.str += _vals[row][col]; t.str += ", "; @@ -157,10 +156,10 @@ TileVariable CLTile::vector(int32_t row, int32_t col_start, int32_t width) const { t.str = create_var_name(row); - if(_info.width() != 1 && _info.width() != width) + if (_info.width() != 1 && _info.width() != width) { t.str += ".s"; - for(int i = 0; i < width; ++i) + for (int i = 0; i < width; ++i) { t.str += dec_to_hex_as_string(col_start + i); } @@ -174,11 +173,11 @@ std::vector CLTile::all() const { std::vector vars; - if(_is_constant) + if (_is_constant) { - for(int32_t y = 0; y < _info.height(); ++y) + for (int32_t y = 0; y < _info.height(); ++y) { - for(int32_t x = 0; x < _info.width(); ++x) + for (int32_t x = 0; x < _info.width(); ++x) { // We can use the vector method to retrieve all the scalar variables stored in the constant tile TileVariable t = vector(y, x, 1); @@ -188,7 +187,7 @@ std::vector CLTile::all() const } else { - for(int32_t y = 0; y < _info.height(); ++y) + for (int32_t y = 0; y < _info.height(); ++y) { TileVariable t; t.str = create_var_name(y); @@ -211,7 +210,7 @@ std::string CLTile::create_var_name(int32_t row) const std::string var_name = _basename; // If a scalar variable, we do not append the row index - if(_info.height() > 1) + if (_info.height() > 1) { var_name += "__"; var_name += std::to_string(row); @@ -222,7 +221,7 @@ std::string CLTile::create_var_name(int32_t row) const std::vector CLTile::supported_vector_lengths() const { - return std::vector{ 1, 2, 3, 4, 8, 16 }; + return std::vector{1, 2, 3, 4, 8, 16}; } void CLTile::validate_tile_info(const TileInfo &info) const diff --git a/compute_kernel_writer/src/cl/CLTile.h b/compute_kernel_writer/src/cl/CLTile.h index 1fb0fc9dbe..498cf51034 100644 --- a/compute_kernel_writer/src/cl/CLTile.h +++ b/compute_kernel_writer/src/cl/CLTile.h @@ -25,6 +25,7 @@ #define COMPUTE_KERNEL_WRITER_SRC_CL_CLTILE_H #include "src/ITile.h" + #include namespace ckw @@ -75,9 +76,9 @@ private: std::string create_var_name(int32_t row) const; - TileInfo _info{ DataType::Unknown }; - std::string _basename{ "" }; - bool _is_constant{ false }; + TileInfo _info{DataType::Unknown}; + std::string _basename{""}; + bool _is_constant{false}; TileContainer _vals{}; }; } // namespace ckw diff --git a/compute_kernel_writer/src/cl/helpers/CLMemoryOpBufferHelper.cpp b/compute_kernel_writer/src/cl/helpers/CLMemoryOpBufferHelper.cpp index f906bcd4b1..a98ebed8fa 100644 --- a/compute_kernel_writer/src/cl/helpers/CLMemoryOpBufferHelper.cpp +++ b/compute_kernel_writer/src/cl/helpers/CLMemoryOpBufferHelper.cpp @@ -28,20 +28,25 @@ #include "ckw/types/MemoryOperation.h" #include "ckw/types/TensorStorageType.h" -#include "src/ITensor.h" -#include "src/Tensor3dMapper.h" #include "src/cl/CLHelpers.h" #include "src/cl/CLKernelWriter.h" #include "src/cl/CLTensorArgument.h" #include "src/cl/CLTile.h" +#include "src/ITensor.h" +#include "src/Tensor3dMapper.h" namespace ckw { -bool CLMemoryOpBufferHelper::validate(const CLKernelWriter *writer, const ITensor *tensor, const TensorSampler *sampler, const Tensor3dMapper *mapper, MemoryOperation op, const CLTile *dst) +bool CLMemoryOpBufferHelper::validate(const CLKernelWriter *writer, + const ITensor *tensor, + const TensorSampler *sampler, + const Tensor3dMapper *mapper, + MemoryOperation op, + const CLTile *dst) { CKW_UNUSED(writer, tensor, mapper, op, dst); - if(sampler->storage() != TensorStorageType::BufferUint8Ptr) + if (sampler->storage() != TensorStorageType::BufferUint8Ptr) { return false; } @@ -97,15 +102,15 @@ bool CLMemoryOpBufferHelper::validate(const CLKernelWriter *writer, const ITenso */ void CLMemoryOpBufferHelper::initialize(const CLTile *dst, const CLTile *x, const CLTile *z, const CLTile *b) { - _dst = dst; + _dst = dst; CKW_ASSERT(validate(_writer, _tensor, _sampler, _mapper.get(), _op, _dst)); _ls_width_full = dst->info().width(); - _coord_x = x->scalar(0, 0).str; - _coord_z = z->scalar(0, 0).str; - _coord_b = b->scalar(0, 0).str; - _coord_orig_z = _coord_z; + _coord_x = x->scalar(0, 0).str; + _coord_z = z->scalar(0, 0).str; + _coord_b = b->scalar(0, 0).str; + _coord_orig_z = _coord_z; out_of_bound_initialize_x(_coord_x); out_of_bound_initialize_z(_coord_z); @@ -126,10 +131,10 @@ void CLMemoryOpBufferHelper::write_row(int32_t row_id, const std::string &coord_ out_of_bound_finalize_y(dst); // The left over load/store will be written in the finalize stage - if(_ls_width_part.size() != 0) + if (_ls_width_part.size() != 0) { int32_t col_start = 0; - for(int32_t partial_width : _ls_width_part) + for (int32_t partial_width : _ls_width_part) { const std::string dst = _dst->vector(row_id, col_start, partial_width).str; const std::string coord_x = _coord_x + " + " + std::to_string(col_start); @@ -150,13 +155,13 @@ void CLMemoryOpBufferHelper::finalize() void CLMemoryOpBufferHelper::out_of_bound_initialize_x(const std::string &coord) { - if(_sampler->address_mode_x() == TensorSamplerAddressModeX::OverlappingMin) + if (_sampler->address_mode_x() == TensorSamplerAddressModeX::OverlappingMin) { - TensorInfo tensor_info = _tensor->info(); - TensorShape shape = tensor_info.shape(); + TensorInfo tensor_info = _tensor->info(); + TensorShape shape = tensor_info.shape(); _ls_width_part = cl_decompose_vector_width(shape[0] % _ls_width_full); - if(_ls_width_part.size() != 0) + if (_ls_width_part.size() != 0) { _writer->op_write_raw_code("if(" + coord + " > 0)\n{\n"); } @@ -165,14 +170,14 @@ void CLMemoryOpBufferHelper::out_of_bound_initialize_x(const std::string &coord) void CLMemoryOpBufferHelper::out_of_bound_finalize_x() { - if(_sampler->address_mode_x() == TensorSamplerAddressModeX::OverlappingMin) + if (_sampler->address_mode_x() == TensorSamplerAddressModeX::OverlappingMin) { - if(_ls_width_part.size() != 0) + if (_ls_width_part.size() != 0) { _writer->op_write_raw_code("}\nelse\n{\n"); out_of_bound_initialize_z(_coord_orig_z); - for(LeftoverDescriptor leftover_desc : _leftovers_x) + for (LeftoverDescriptor leftover_desc : _leftovers_x) { out_of_bound_initialize_y(leftover_desc.coord); _writer->op_write_raw_code(leftover_desc.statement); @@ -191,7 +196,7 @@ void CLMemoryOpBufferHelper::out_of_bound_initialize_y(const std::string &coord) const TensorSamplerAddressModeY address_mode_y = _sampler->address_mode_y(); - switch(address_mode_y) + switch (address_mode_y) { case TensorSamplerAddressModeY::ClampToBorderMaxOnly: // Not to be moved outside the case because it marks the relevant tensor component as used even if we dont't use the variable @@ -212,7 +217,7 @@ void CLMemoryOpBufferHelper::out_of_bound_finalize_y(const std::string &dst) { const TensorSamplerAddressModeY address_mode_y = _sampler->address_mode_y(); - switch(address_mode_y) + switch (address_mode_y) { case TensorSamplerAddressModeY::ClampToBorderMaxOnly: _writer->op_write_raw_code("}\nelse\n{\n"); @@ -234,7 +239,7 @@ void CLMemoryOpBufferHelper::out_of_bound_initialize_z(const std::string &coord) CKW_UNUSED(coord); const TensorSamplerAddressModeZ address_mode_z = _sampler->address_mode_z(); - switch(address_mode_z) + switch (address_mode_z) { case TensorSamplerAddressModeZ::None: break; @@ -247,7 +252,7 @@ void CLMemoryOpBufferHelper::out_of_bound_finalize_z() { const TensorSamplerAddressModeZ address_mode_z = _sampler->address_mode_z(); - switch(address_mode_z) + switch (address_mode_z) { case TensorSamplerAddressModeZ::None: break; @@ -256,13 +261,15 @@ void CLMemoryOpBufferHelper::out_of_bound_finalize_z() } } -std::string CLMemoryOpBufferHelper::to_statement(MemoryOperation op, int32_t vector_width, const std::string &data, - const std::string &address) const +std::string CLMemoryOpBufferHelper::to_statement(MemoryOperation op, + int32_t vector_width, + const std::string &data, + const std::string &address) const { - switch(op) + switch (op) { case MemoryOperation::Load: - if(vector_width != 1) + if (vector_width != 1) { return data + " = vload" + std::to_string(vector_width) + "(0, " + address + ")"; } @@ -272,7 +279,7 @@ std::string CLMemoryOpBufferHelper::to_statement(MemoryOperation op, int32_t vec } break; case MemoryOperation::Store: - if(vector_width != 1) + if (vector_width != 1) { return "vstore" + std::to_string(vector_width) + "(" + data + ", 0, " + address + ")"; } @@ -288,26 +295,28 @@ std::string CLMemoryOpBufferHelper::to_statement(MemoryOperation op, int32_t vec return ""; } -std::string CLMemoryOpBufferHelper::to_buffer_address(const std::string &x, const std::string &y, const std::string &z, - const std::string &b) const +std::string CLMemoryOpBufferHelper::to_buffer_address(const std::string &x, + const std::string &y, + const std::string &z, + const std::string &b) const { TensorStorageType tensor_storage = _sampler->storage(); CKW_ASSERT(tensor_storage == TensorStorageType::BufferUint8Ptr); - const std::string ptr_buf = _tensor->storage(tensor_storage).val; - const std::string dst_type = cl_data_type_rounded_up_to_valid_vector_width(_dst->info().data_type(), 1); + const std::string ptr_buf = _tensor->storage(tensor_storage).val; + const std::string dst_type = cl_data_type_rounded_up_to_valid_vector_width(_dst->info().data_type(), 1); std::string address; address += "(__global "; address += dst_type; address += "*)("; address += ptr_buf; - if(x != "0" && (_mapper->dim_x().str != "1")) + if (x != "0" && (_mapper->dim_x().str != "1")) { address += " + ("; address += x + ") * sizeof(" + dst_type + ")"; } - if(y != "0") + if (y != "0") { const std::string stride_y = _mapper->stride_y().str; address += " + ("; @@ -315,7 +324,7 @@ std::string CLMemoryOpBufferHelper::to_buffer_address(const std::string &x, cons address += " * "; address += stride_y; } - if(z != "0" && (_mapper->dim_z().str != "1")) + if (z != "0" && (_mapper->dim_z().str != "1")) { const std::string stride_z = _mapper->stride_z().str; address += " + ("; @@ -323,7 +332,7 @@ std::string CLMemoryOpBufferHelper::to_buffer_address(const std::string &x, cons address += " * "; address += stride_z; } - if(b != "0" && (_mapper->dim_batch().str != "1")) + if (b != "0" && (_mapper->dim_batch().str != "1")) { const std::string stride_b = _mapper->stride_batch().str; address += " + ("; diff --git a/compute_kernel_writer/src/cl/helpers/CLMemoryOpBufferHelper.h b/compute_kernel_writer/src/cl/helpers/CLMemoryOpBufferHelper.h index 9bcd571a81..4e1a842fe1 100644 --- a/compute_kernel_writer/src/cl/helpers/CLMemoryOpBufferHelper.h +++ b/compute_kernel_writer/src/cl/helpers/CLMemoryOpBufferHelper.h @@ -27,9 +27,9 @@ #include "src/cl/helpers/ICLMemoryOpHelper.h" +#include #include #include -#include namespace ckw { @@ -65,20 +65,25 @@ private: struct LeftoverDescriptor { LeftoverDescriptor(const std::string &dst, const std::string &coord, const std::string &statement) - : dst(dst), coord(coord), statement(statement) + : dst(dst), coord(coord), statement(statement) { } - std::string dst{}; // Describes the destination tile or part of it - std::string coord{}; // Describes the coordinate to be used in boundary checks - std::string statement{}; // Describes the memory operation statement + std::string dst{}; // Describes the destination tile or part of it + std::string coord{}; // Describes the coordinate to be used in boundary checks + std::string statement{}; // Describes the memory operation statement }; std::vector _ls_width_part{}; std::vector _leftovers_x{}; std::string _coord_orig_z{}; - static bool validate(const CLKernelWriter *writer, const ITensor *tensor, const TensorSampler *sampler, const Tensor3dMapper *mapper, MemoryOperation op, const CLTile *dst); + static bool validate(const CLKernelWriter *writer, + const ITensor *tensor, + const TensorSampler *sampler, + const Tensor3dMapper *mapper, + MemoryOperation op, + const CLTile *dst); void out_of_bound_initialize_x(const std::string &coord); void out_of_bound_finalize_x(); @@ -87,8 +92,10 @@ private: void out_of_bound_initialize_z(const std::string &coord); void out_of_bound_finalize_z(); - std::string to_statement(MemoryOperation op, int32_t vector_width, const std::string &data, const std::string &address) const; - std::string to_buffer_address(const std::string &x, const std::string &y, const std::string &z, const std::string &b) const; + std::string + to_statement(MemoryOperation op, int32_t vector_width, const std::string &data, const std::string &address) const; + std::string + to_buffer_address(const std::string &x, const std::string &y, const std::string &z, const std::string &b) const; }; } // namespace ckw diff --git a/compute_kernel_writer/src/cl/helpers/CLMemoryOpImage2dHelper.cpp b/compute_kernel_writer/src/cl/helpers/CLMemoryOpImage2dHelper.cpp index 55f88f4136..b7d146bdee 100644 --- a/compute_kernel_writer/src/cl/helpers/CLMemoryOpImage2dHelper.cpp +++ b/compute_kernel_writer/src/cl/helpers/CLMemoryOpImage2dHelper.cpp @@ -28,11 +28,11 @@ #include "ckw/types/MemoryOperation.h" #include "ckw/types/TensorStorageType.h" -#include "src/ITensor.h" -#include "src/Tensor3dMapper.h" #include "src/cl/CLKernelWriter.h" #include "src/cl/CLTensorArgument.h" #include "src/cl/CLTile.h" +#include "src/ITensor.h" +#include "src/Tensor3dMapper.h" namespace ckw { @@ -66,31 +66,36 @@ void CLMemoryOpImage2dHelper::finalize() { } -bool CLMemoryOpImage2dHelper::validate(const CLKernelWriter *writer, const ITensor *tensor, const TensorSampler *sampler, const Tensor3dMapper *mapper, MemoryOperation op, const CLTile *dst) +bool CLMemoryOpImage2dHelper::validate(const CLKernelWriter *writer, + const ITensor *tensor, + const TensorSampler *sampler, + const Tensor3dMapper *mapper, + MemoryOperation op, + const CLTile *dst) { CKW_UNUSED(writer, tensor, mapper); - if(dst->info().width() != 4) + if (dst->info().width() != 4) { return false; } - if(sampler->address_mode_x() != TensorSamplerAddressModeX::None) + if (sampler->address_mode_x() != TensorSamplerAddressModeX::None) { return false; } - if(sampler->address_mode_z() != TensorSamplerAddressModeZ::None) + if (sampler->address_mode_z() != TensorSamplerAddressModeZ::None) { return false; } - if(sampler->storage() != TensorStorageType::Texture2dReadOnly && op == MemoryOperation::Load) + if (sampler->storage() != TensorStorageType::Texture2dReadOnly && op == MemoryOperation::Load) { return false; } - if(sampler->storage() != TensorStorageType::Texture2dWriteOnly && op == MemoryOperation::Store) + if (sampler->storage() != TensorStorageType::Texture2dWriteOnly && op == MemoryOperation::Store) { return false; } - if((dst->info().data_type() != DataType::Fp32) && (dst->info().data_type() != DataType::Fp16)) + if ((dst->info().data_type() != DataType::Fp32) && (dst->info().data_type() != DataType::Fp16)) { return false; } @@ -102,7 +107,7 @@ void CLMemoryOpImage2dHelper::out_of_bound_initialize_y(const std::string &coord CKW_UNUSED(coord); const TensorSamplerAddressModeY address_mode_y = _sampler->address_mode_y(); - switch(address_mode_y) + switch (address_mode_y) { case TensorSamplerAddressModeY::SkipLessThanZero: _writer->op_write_raw_code("if(" + coord + " >= 0)\n{\n"); @@ -118,7 +123,7 @@ void CLMemoryOpImage2dHelper::out_of_bound_initialize_y(const std::string &coord void CLMemoryOpImage2dHelper::out_of_bound_finalize_y() { const TensorSamplerAddressModeY address_mode_y = _sampler->address_mode_y(); - switch(address_mode_y) + switch (address_mode_y) { case TensorSamplerAddressModeY::SkipLessThanZero: _writer->op_write_raw_code("}\n"); @@ -131,15 +136,19 @@ void CLMemoryOpImage2dHelper::out_of_bound_finalize_y() } } -std::string CLMemoryOpImage2dHelper::to_ls_image2d(MemoryOperation op, int32_t vector_width, const std::string &data, const std::string &sampler, const std::string &address) const +std::string CLMemoryOpImage2dHelper::to_ls_image2d(MemoryOperation op, + int32_t vector_width, + const std::string &data, + const std::string &sampler, + const std::string &address) const { CKW_UNUSED(vector_width); const TensorStorageType tensor_storage = _sampler->storage(); - const std::string image2d_obj = _tensor->storage(tensor_storage).val; - const std::string post_fix = _dst->info().data_type() == DataType::Fp32 ? "f" : "h"; + const std::string image2d_obj = _tensor->storage(tensor_storage).val; + const std::string post_fix = _dst->info().data_type() == DataType::Fp32 ? "f" : "h"; - switch(op) + switch (op) { case MemoryOperation::Load: return data + " = read_image" + post_fix + "(" + image2d_obj + ", " + sampler + ", " + address + ")"; @@ -155,7 +164,7 @@ std::string CLMemoryOpImage2dHelper::to_ls_image2d_sampler() const { const auto address_mode_y = _sampler->address_mode_y(); - switch(address_mode_y) + switch (address_mode_y) { case TensorSamplerAddressModeY::None: return "CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_NONE | CLK_FILTER_NEAREST"; @@ -167,17 +176,19 @@ std::string CLMemoryOpImage2dHelper::to_ls_image2d_sampler() const } } -std::string CLMemoryOpImage2dHelper::to_ls_image2d_address(const std::string &x, const std::string &y, const std::string &z, +std::string CLMemoryOpImage2dHelper::to_ls_image2d_address(const std::string &x, + const std::string &y, + const std::string &z, const std::string &b) const { std::string coord_x = "(" + x + ") >> 2"; std::string coord_y = "("; - if(y != "0") + if (y != "0") { coord_y += y; } - if(z != "0" && (_mapper->dim_z().str != "1")) + if (z != "0" && (_mapper->dim_z().str != "1")) { const std::string dim = _mapper->dim_y().str; coord_y += " + ("; @@ -185,7 +196,7 @@ std::string CLMemoryOpImage2dHelper::to_ls_image2d_address(const std::string &x, coord_y += " * "; coord_y += dim; } - if(b != "0" && (_mapper->dim_batch().str != "1")) + if (b != "0" && (_mapper->dim_batch().str != "1")) { const std::string dim0 = _mapper->dim_y().str; const std::string dim1 = _mapper->dim_z().str; diff --git a/compute_kernel_writer/src/cl/helpers/CLMemoryOpImage2dHelper.h b/compute_kernel_writer/src/cl/helpers/CLMemoryOpImage2dHelper.h index 73bede7789..fd9b097a24 100644 --- a/compute_kernel_writer/src/cl/helpers/CLMemoryOpImage2dHelper.h +++ b/compute_kernel_writer/src/cl/helpers/CLMemoryOpImage2dHelper.h @@ -59,14 +59,24 @@ public: void finalize() override; private: - static bool validate(const CLKernelWriter *writer, const ITensor *tensor, const TensorSampler *sampler, const Tensor3dMapper *mapper, MemoryOperation op, const CLTile *dst); + static bool validate(const CLKernelWriter *writer, + const ITensor *tensor, + const TensorSampler *sampler, + const Tensor3dMapper *mapper, + MemoryOperation op, + const CLTile *dst); void out_of_bound_initialize_y(const std::string &coord); void out_of_bound_finalize_y(); - std::string to_ls_image2d(MemoryOperation op, int32_t vector_width, const std::string &data, const std::string &sampler, const std::string &address) const; + std::string to_ls_image2d(MemoryOperation op, + int32_t vector_width, + const std::string &data, + const std::string &sampler, + const std::string &address) const; std::string to_ls_image2d_sampler() const; - std::string to_ls_image2d_address(const std::string &x, const std::string &y, const std::string &z, const std::string &b) const; + std::string + to_ls_image2d_address(const std::string &x, const std::string &y, const std::string &z, const std::string &b) const; }; } // namespace ckw diff --git a/compute_kernel_writer/src/cl/helpers/ICLMemoryOpHelper.h b/compute_kernel_writer/src/cl/helpers/ICLMemoryOpHelper.h index 7f363431e8..f46fee9750 100644 --- a/compute_kernel_writer/src/cl/helpers/ICLMemoryOpHelper.h +++ b/compute_kernel_writer/src/cl/helpers/ICLMemoryOpHelper.h @@ -26,6 +26,7 @@ #define CKW_SRC_CL_HELPERS_ICLMEMORYOPHELPER_H #include "ckw/TensorSampler.h" + #include "src/Tensor3dMapper.h" #include @@ -98,16 +99,16 @@ public: virtual void finalize() = 0; protected: - CLKernelWriter *_writer{ nullptr }; - ITensor *_tensor{ nullptr }; - TensorSampler *_sampler{ nullptr }; - MemoryOperation _op; - std::unique_ptr _mapper{ nullptr }; - const CLTile *_dst{ nullptr }; - int32_t _ls_width_full{ 0 }; - std::string _coord_x{}; - std::string _coord_z{}; - std::string _coord_b{}; + CLKernelWriter *_writer{nullptr}; + ITensor *_tensor{nullptr}; + TensorSampler *_sampler{nullptr}; + MemoryOperation _op; + std::unique_ptr _mapper{nullptr}; + const CLTile *_dst{nullptr}; + int32_t _ls_width_full{0}; + std::string _coord_x{}; + std::string _coord_z{}; + std::string _coord_b{}; }; } // namespace ckw diff --git a/compute_kernel_writer/src/types/ConstantData.cpp b/compute_kernel_writer/src/types/ConstantData.cpp index d2155cf55a..67b1103860 100644 --- a/compute_kernel_writer/src/types/ConstantData.cpp +++ b/compute_kernel_writer/src/types/ConstantData.cpp @@ -30,52 +30,51 @@ namespace ckw { namespace { - template - inline typename std::enable_if::value, std::string>::type to_str(T value) - { - std::stringstream ss; - ss << std::scientific << std::setprecision(std::numeric_limits::max_digits10) << value; - return ss.str(); - } +template +inline typename std::enable_if::value, std::string>::type to_str(T value) +{ + std::stringstream ss; + ss << std::scientific << std::setprecision(std::numeric_limits::max_digits10) << value; + return ss.str(); +} - template - inline typename std::enable_if::value && !std::is_same::value, std::string>::type to_str(T value) - { - return std::to_string(value); - } +template +inline typename std::enable_if::value && !std::is_same::value, std::string>::type +to_str(T value) +{ + return std::to_string(value); +} - template - inline typename std::enable_if::value, std::string>::type to_str(T value) - { - return std::to_string((int) value); - } +template +inline typename std::enable_if::value, std::string>::type to_str(T value) +{ + return std::to_string((int)value); } +} // namespace -template +template ConstantData::ConstantData(std::initializer_list> values, DataType data_type) : _data_type(data_type) { CKW_ASSERT(validate(data_type)); CKW_ASSERT(values.size() > 0); - for(auto value_arr: values) + for (auto value_arr : values) { // Each row must have the same number of elements CKW_ASSERT(value_arr.size() == (*values.begin()).size()); StringVector vec; - std::transform(value_arr.begin(), value_arr.end(), - std::back_inserter(vec), - [](T val) { return to_str(val); }); + std::transform(value_arr.begin(), value_arr.end(), std::back_inserter(vec), [](T val) { return to_str(val); }); _values.push_back(std::move(vec)); } } -template +template bool ConstantData::validate(DataType data_type) { - switch(data_type) + switch (data_type) { case DataType::Fp32: case DataType::Fp16: @@ -107,7 +106,7 @@ template bool ConstantData::validate(DataType); template bool ConstantData::validate(DataType); template bool ConstantData::validate(DataType); -const std::vector>& ConstantData::values() const +const std::vector> &ConstantData::values() const { return _values; } -- cgit v1.2.1