From afd38f0c617d6f89b2b4532c6c44f116617e2b6f Mon Sep 17 00:00:00 2001 From: Felix Thomasmathibalan Date: Wed, 27 Sep 2023 17:46:17 +0100 Subject: Apply clang-format on repository Code is formatted as per a revised clang format configuration file(not part of this delivery). Version 14.0.6 is used. Exclusion List: - files with .cl extension - files that are not strictly C/C++ (e.g. Android.bp, Sconscript ...) And the following directories - compute_kernel_writer/validation/ - tests/ - include/ - src/core/NEON/kernels/convolution/ - src/core/NEON/kernels/arm_gemm/ - src/core/NEON/kernels/arm_conv/ - data/ There will be a follow up for formatting of .cl files and the files under tests/ and compute_kernel_writer/validation/. Signed-off-by: Felix Thomasmathibalan Change-Id: Ib7eb1fcf4e7537b9feaefcfc15098a804a3fde0a Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/10391 Benchmark: Arm Jenkins Tested-by: Arm Jenkins Reviewed-by: Gunes Bayir --- .../prototype/examples/add_exp_store.cpp | 39 +- .../examples/common/ExampleComponentArgument.cpp | 8 +- .../examples/common/ExampleComponentArgument.h | 4 +- .../examples/common/ExampleKernelWriter.cpp | 13 +- .../examples/common/ExampleScopedKernelWriter.cpp | 1 + .../prototype/examples/writer_helper.cpp | 31 +- .../prototype/include/ckw/Error.h | 7 +- .../prototype/include/ckw/KernelArgument.h | 3 +- .../prototype/include/ckw/KernelWriter.h | 32 +- .../prototype/include/ckw/KernelWriterHelper.h | 122 ++-- .../prototype/include/ckw/OperandBase.h | 1 + .../prototype/include/ckw/ScalarValue.h | 8 +- .../prototype/include/ckw/TensorInfo.h | 8 +- .../prototype/include/ckw/TensorOperand.h | 26 +- .../prototype/include/ckw/TensorTileSampler.h | 56 +- .../prototype/include/ckw/TileInfo.h | 2 +- .../prototype/include/ckw/types/Functions.h | 20 +- .../prototype/include/ckw/types/Operators.h | 4 +- .../include/ckw/types/TensorSamplerTypes.h | 40 +- compute_kernel_writer/prototype/src/Kernel.cpp | 19 +- .../prototype/src/KernelArgument.cpp | 4 +- .../prototype/src/KernelWriter.cpp | 82 +-- .../prototype/src/OperandBase.cpp | 3 +- compute_kernel_writer/prototype/src/Prototype.h | 690 +++++++++++---------- .../prototype/src/TensorOperand.cpp | 14 +- .../prototype/src/TensorTileSampler.cpp | 60 +- compute_kernel_writer/prototype/src/TileInfo.cpp | 9 +- .../prototype/src/TileOperand.cpp | 28 +- 28 files changed, 739 insertions(+), 595 deletions(-) (limited to 'compute_kernel_writer/prototype') diff --git a/compute_kernel_writer/prototype/examples/add_exp_store.cpp b/compute_kernel_writer/prototype/examples/add_exp_store.cpp index 6a9884543c..2b640ca01b 100644 --- a/compute_kernel_writer/prototype/examples/add_exp_store.cpp +++ b/compute_kernel_writer/prototype/examples/add_exp_store.cpp @@ -32,7 +32,6 @@ #include "common/ExampleComponentArgument.h" #include "common/ExampleKernelWriter.h" #include "common/ExampleScopedKernelWriter.h" - #include #include @@ -78,14 +77,14 @@ void op_binary_elementwise(ExampleScopedKernelWriter writer, std::vectorhas_tile() && !rhs->has_tile()) + if (!lhs->has_tile() && !rhs->has_tile()) { const auto sampler = create_simple_sampler(writer); writer->op_load_once(lhs, sampler); writer->op_load_once(rhs, sampler); } - else if(lhs->has_tile()) + else if (lhs->has_tile()) { const auto &sampler = lhs->tile_sampler(); writer->op_load_once(rhs, sampler); @@ -101,7 +100,7 @@ void op_binary_elementwise(ExampleScopedKernelWriter writer, std::vectortile_sampler(); // Prepare the output tile. - if(!dst->has_tile()) + if (!dst->has_tile()) { auto &tile = writer->declare_tile("dst_tile", lhs_tile.tile_info()); dst->init_virtual_tensor(tile, sampler); @@ -119,7 +118,7 @@ void op_exp(ExampleScopedKernelWriter writer, std::vectorhas_tile()) + if (!src->has_tile()) { const auto sampler = create_simple_sampler(writer); writer->op_load_once(src, sampler); @@ -129,7 +128,7 @@ void op_exp(ExampleScopedKernelWriter writer, std::vectortile_sampler(); // Prepare the output tile. - if(!dst->has_tile()) + if (!dst->has_tile()) { auto &tile = writer->declare_tile("dst_tile", src_tile.tile_info()); dst->init_virtual_tensor(tile, sampler); @@ -160,34 +159,38 @@ int main() ExampleScopedKernelWriter writer(&root_writer); - const TensorInfo src0_info(DataType::Fp32, TensorShape({ 3, 10, 20, 1, 1 }), TensorDataLayout::Nhwc, 0); - const TensorInfo src1_info(DataType::Fp32, TensorShape({ 3, 10, 20, 1, 1 }), TensorDataLayout::Nhwc, 1); - const TensorInfo dst_info(DataType::Fp32, TensorShape({ 3, 10, 20, 1, 1 }), TensorDataLayout::Nhwc, 2); + const TensorInfo src0_info(DataType::Fp32, TensorShape({3, 10, 20, 1, 1}), TensorDataLayout::Nhwc, 0); + const TensorInfo src1_info(DataType::Fp32, TensorShape({3, 10, 20, 1, 1}), TensorDataLayout::Nhwc, 1); + const TensorInfo dst_info(DataType::Fp32, TensorShape({3, 10, 20, 1, 1}), TensorDataLayout::Nhwc, 2); - ExampleComponentArgument src0(writer->declare_tensor_argument("src0", src0_info, TensorStorageType::BufferUint8Ptr)); - ExampleComponentArgument src1(writer->declare_tensor_argument("src1", src1_info, TensorStorageType::BufferUint8Ptr)); + ExampleComponentArgument src0( + writer->declare_tensor_argument("src0", src0_info, TensorStorageType::BufferUint8Ptr)); + ExampleComponentArgument src1( + writer->declare_tensor_argument("src1", src1_info, TensorStorageType::BufferUint8Ptr)); ExampleComponentArgument dst(writer->declare_tensor_argument("dst", dst_info, TensorStorageType::BufferUint8Ptr)); ExampleComponentArgument ans; - op_binary_elementwise(writer, { &src0, &src1, &ans }); - op_exp(writer, { &ans, &ans }); - op_store(writer, { &ans, &dst }); + op_binary_elementwise(writer, {&src0, &src1, &ans}); + op_exp(writer, {&ans, &ans}); + op_store(writer, {&ans, &dst}); const auto arguments = kernel.arguments(); std::cout << "\n====================\nArguments:\n====================\n"; - for(auto &arg : arguments) + for (auto &arg : arguments) { - switch(arg.type()) + switch (arg.type()) { case ckw::KernelArgument::Type::TensorStorage: - std::cout << "* Tensor storage: ID = " << arg.id() << ", type = " << std::hex << "0x" << static_cast(arg.tensor_storage_type()) << std::dec << "\n"; + std::cout << "* Tensor storage: ID = " << arg.id() << ", type = " << std::hex << "0x" + << static_cast(arg.tensor_storage_type()) << std::dec << "\n"; break; case ckw::KernelArgument::Type::TensorComponent: - std::cout << "* Tensor component: ID = " << arg.id() << ", type = " << std::hex << "0x" << static_cast(arg.tensor_component_type()) << std::dec << "\n"; + std::cout << "* Tensor component: ID = " << arg.id() << ", type = " << std::hex << "0x" + << static_cast(arg.tensor_component_type()) << std::dec << "\n"; break; default: diff --git a/compute_kernel_writer/prototype/examples/common/ExampleComponentArgument.cpp b/compute_kernel_writer/prototype/examples/common/ExampleComponentArgument.cpp index 5a2ec526cc..55223dae0e 100644 --- a/compute_kernel_writer/prototype/examples/common/ExampleComponentArgument.cpp +++ b/compute_kernel_writer/prototype/examples/common/ExampleComponentArgument.cpp @@ -23,19 +23,19 @@ */ #include "ExampleComponentArgument.h" + #include "ckw/Error.h" ExampleComponentArgument::ExampleComponentArgument() { } -ExampleComponentArgument::ExampleComponentArgument(ckw::TensorOperand &tensor) - : _tensor(&tensor) +ExampleComponentArgument::ExampleComponentArgument(ckw::TensorOperand &tensor) : _tensor(&tensor) { } -ExampleComponentArgument & -ExampleComponentArgument::init_virtual_tensor(ckw::TileOperand &tile, const ckw::TensorTileSampler &tile_sampler) +ExampleComponentArgument &ExampleComponentArgument::init_virtual_tensor(ckw::TileOperand &tile, + const ckw::TensorTileSampler &tile_sampler) { CKW_ASSERT(_tile == nullptr); diff --git a/compute_kernel_writer/prototype/examples/common/ExampleComponentArgument.h b/compute_kernel_writer/prototype/examples/common/ExampleComponentArgument.h index 9fdc50ba08..0e029b1157 100644 --- a/compute_kernel_writer/prototype/examples/common/ExampleComponentArgument.h +++ b/compute_kernel_writer/prototype/examples/common/ExampleComponentArgument.h @@ -104,8 +104,8 @@ public: const ckw::TensorTileSampler &tile_sampler() const; private: - ckw::TensorOperand *_tensor{ nullptr }; - ckw::TileOperand *_tile{ nullptr }; + ckw::TensorOperand *_tensor{nullptr}; + ckw::TileOperand *_tile{nullptr}; ckw::TensorTileSampler _tile_sampler{}; }; diff --git a/compute_kernel_writer/prototype/examples/common/ExampleKernelWriter.cpp b/compute_kernel_writer/prototype/examples/common/ExampleKernelWriter.cpp index 6b9f244735..1734ce8823 100644 --- a/compute_kernel_writer/prototype/examples/common/ExampleKernelWriter.cpp +++ b/compute_kernel_writer/prototype/examples/common/ExampleKernelWriter.cpp @@ -23,26 +23,27 @@ */ #include "ExampleKernelWriter.h" -#include "ExampleComponentArgument.h" + #include "ckw/Error.h" #include "ckw/TileInfo.h" -ExampleKernelWriter::ExampleKernelWriter(ckw::Kernel &kernel) - : KernelWriter(kernel) +#include "ExampleComponentArgument.h" + +ExampleKernelWriter::ExampleKernelWriter(ckw::Kernel &kernel) : KernelWriter(kernel) { } void ExampleKernelWriter::op_load_once(ExampleComponentArgument *tensor_or_tile, const ckw::TensorTileSampler &sampler) { - if(!tensor_or_tile->has_tile()) + if (!tensor_or_tile->has_tile()) { CKW_ASSERT(tensor_or_tile->has_tensor()); auto &tensor = tensor_or_tile->tensor(); const auto tile_name = tensor.name() + "_tile"; - auto &tile = declare_tile(tile_name.c_str(), - ckw::TileInfo(tensor.data_type(), sampler.height(), sampler.width())); + auto &tile = + declare_tile(tile_name.c_str(), ckw::TileInfo(tensor.data_type(), sampler.height(), sampler.width())); op_load(tile, tensor, sampler); diff --git a/compute_kernel_writer/prototype/examples/common/ExampleScopedKernelWriter.cpp b/compute_kernel_writer/prototype/examples/common/ExampleScopedKernelWriter.cpp index 7c44fa8749..784d5ffb96 100644 --- a/compute_kernel_writer/prototype/examples/common/ExampleScopedKernelWriter.cpp +++ b/compute_kernel_writer/prototype/examples/common/ExampleScopedKernelWriter.cpp @@ -23,6 +23,7 @@ */ #include "ExampleScopedKernelWriter.h" + #include "ExampleKernelWriter.h" ExampleScopedKernelWriter::ExampleScopedKernelWriter(ExampleKernelWriter *writer) diff --git a/compute_kernel_writer/prototype/examples/writer_helper.cpp b/compute_kernel_writer/prototype/examples/writer_helper.cpp index ccef92dcdf..8623afbf50 100644 --- a/compute_kernel_writer/prototype/examples/writer_helper.cpp +++ b/compute_kernel_writer/prototype/examples/writer_helper.cpp @@ -23,14 +23,14 @@ */ #include "ckw/KernelWriter.h" -#include "../include/ckw/KernelWriterHelper.h" #include "ckw/TensorTileSampler.h" +#include "../include/ckw/KernelWriterHelper.h" #include using namespace ckw; -TensorTileSampler create_simple_sampler(KernelWriter& writer) +TensorTileSampler create_simple_sampler(KernelWriter &writer) { TensorTileSampler sampler; @@ -65,11 +65,11 @@ TensorTileSampler create_simple_sampler(KernelWriter& writer) int main() { - Kernel kernel("test", GpuTargetLanguage::OpenCL); + Kernel kernel("test", GpuTargetLanguage::OpenCL); KernelWriterHelper writer(kernel); - const TensorInfo src_info(DataType::Fp32, TensorShape({ 1, 1, 1, 1, 1 }), TensorDataLayout::Nhwc, 0); - const TensorInfo dst_info(DataType::Fp32, TensorShape({ 1, 1, 1, 1, 1 }), TensorDataLayout::Nhwc, 1); + const TensorInfo src_info(DataType::Fp32, TensorShape({1, 1, 1, 1, 1}), TensorDataLayout::Nhwc, 0); + const TensorInfo dst_info(DataType::Fp32, TensorShape({1, 1, 1, 1, 1}), TensorDataLayout::Nhwc, 1); auto &src_tensor = writer.declare_tensor_argument("src", src_info); auto &dst_tensor = writer.declare_tensor_argument("dst", dst_info); @@ -77,27 +77,24 @@ int main() const auto sampler = create_simple_sampler(writer); auto &src = writer.declare_tile("src_tile", TileInfo(src_tensor.data_type(), sampler.height(), sampler.width())); - auto &other = writer.declare_tile("other_tile", TileInfo(src_tensor.data_type(), sampler.height(), sampler.width())); + auto &other = + writer.declare_tile("other_tile", TileInfo(src_tensor.data_type(), sampler.height(), sampler.width())); auto &dst = writer.declare_tile("dst_tile", TileInfo(src_tensor.data_type(), sampler.height(), sampler.width())); writer.op_load(src, src_tensor, sampler); writer.op_load(other, src_tensor, sampler); writer.op_load(dst, dst_tensor, sampler); - auto test = dst ^ src ^ other; + auto test = dst ^ src ^ other; auto other_test = logical_and(dst, src, other); writer.op_assign(dst, logical_and(dst, src, other)); writer.op_assign(dst, test); writer.op_assign(dst, other_test); writer.op_assign(dst, operator^(operator^(dst, src), other)); - writer.op_if(exp(src) == dst, [&]{ - writer.op_binary_expression(dst, src, BinaryOp::Add, src); - }).op_else_if(exp(src) > dst, [&]{ - writer.op_binary_expression(dst, src, BinaryOp::Add, src); - }).op_else([&] { - writer.op_assign(dst, src); - }); + writer.op_if(exp(src) == dst, [&] { writer.op_binary_expression(dst, src, BinaryOp::Add, src); }) + .op_else_if(exp(src) > dst, [&] { writer.op_binary_expression(dst, src, BinaryOp::Add, src); }) + .op_else([&] { writer.op_assign(dst, src); }); writer.op_assign(dst, src + src * src); writer.op_assign(dst, src * max(src, dst) + src); @@ -106,13 +103,11 @@ int main() writer.op_assign(dst, src ^ dst); writer.op_assign(dst, ~src); - writer.op_for_loop(dst < src, dst += src, [&]{ - writer.op_assign(dst, src + dst); - }); + writer.op_for_loop(dst < src, dst += src, [&] { writer.op_assign(dst, src + dst); }); writer.op_assign(dst += src); writer.op_assign(dst += exp(src)); std::cout << "======== KERNEL ========" << std::endl; std::cout << writer.generate_code() << std::endl; -} \ No newline at end of file +} diff --git a/compute_kernel_writer/prototype/include/ckw/Error.h b/compute_kernel_writer/prototype/include/ckw/Error.h index b18944eac5..aab713c817 100644 --- a/compute_kernel_writer/prototype/include/ckw/Error.h +++ b/compute_kernel_writer/prototype/include/ckw/Error.h @@ -39,11 +39,11 @@ namespace ckw #define CKW_ASSERT_MSG(cond, msg) \ do \ { \ - if(!(cond)) \ + if (!(cond)) \ { \ throw ::std::runtime_error(msg); \ } \ - } while(false) + } while (false) /** If the condition is not met, throw an std::runtime_error. * @@ -56,8 +56,7 @@ namespace ckw * @param[in] precond The condition if is met requires the consequence must also be met. * @param[in] cond The condition that is expected to be true if the precondition is true. */ -#define CKW_ASSERT_IF(precond, cond) \ - CKW_ASSERT_MSG(!(precond) || ((precond) && (cond)), #precond " |-> " #cond) +#define CKW_ASSERT_IF(precond, cond) CKW_ASSERT_MSG(!(precond) || ((precond) && (cond)), #precond " |-> " #cond) /** Mark the variables as unused. * diff --git a/compute_kernel_writer/prototype/include/ckw/KernelArgument.h b/compute_kernel_writer/prototype/include/ckw/KernelArgument.h index af8bcde634..3384a20aef 100644 --- a/compute_kernel_writer/prototype/include/ckw/KernelArgument.h +++ b/compute_kernel_writer/prototype/include/ckw/KernelArgument.h @@ -26,6 +26,7 @@ #define CKW_PROTOTYPE_INCLUDE_CKW_KERNELARGUMENT_H #include "ckw/TensorInfo.h" + #include namespace ckw @@ -98,7 +99,7 @@ private: TensorComponentType tensor_component_type; }; - SubId _sub_id{ 0 }; + SubId _sub_id{0}; }; } // namespace ckw diff --git a/compute_kernel_writer/prototype/include/ckw/KernelWriter.h b/compute_kernel_writer/prototype/include/ckw/KernelWriter.h index fdb5fedc59..f9e0066f91 100644 --- a/compute_kernel_writer/prototype/include/ckw/KernelWriter.h +++ b/compute_kernel_writer/prototype/include/ckw/KernelWriter.h @@ -94,7 +94,9 @@ public: * * @return The @ref TensorOperand object. */ - TensorOperand &declare_tensor_argument(const std::string &name, const TensorInfo &info, TensorStorageType storage_type = TensorStorageType::BufferUint8Ptr); + TensorOperand &declare_tensor_argument(const std::string &name, + const TensorInfo &info, + TensorStorageType storage_type = TensorStorageType::BufferUint8Ptr); /** Declare a compile-time constant scalar argument. * @@ -134,7 +136,10 @@ public: * @param[in] sampler The tensor sampling information. * @param[in] dilation_y Dilation in the Y dimension. */ - void op_load(TileOperand &tile, const TensorOperand &tensor, const TensorTileSampler &sampler, const TileOperand &dilation_y = TileOperand("dil_y", 1)); + void op_load(TileOperand &tile, + const TensorOperand &tensor, + const TensorTileSampler &sampler, + const TileOperand &dilation_y = TileOperand("dil_y", 1)); /** Load the data from the tensor memory to the tile using the indirect buffer approach and respective of the sampling information. * @@ -221,7 +226,10 @@ public: * @param[in] first The first argument tile. * @param[in] second The second argument tile. */ - void op_binary_elementwise_function(const TileOperand &dst, BinaryFunction func, const TileOperand &first, const TileOperand &second); + void op_binary_elementwise_function(const TileOperand &dst, + BinaryFunction func, + const TileOperand &first, + const TileOperand &second); /** Write function applied to scalar value: ` = (, , );`. * @@ -231,7 +239,11 @@ public: * @param[in] second The second argument tile. * @param[in] third The third argument tile. */ - void op_ternary_elementwise_function(const TileOperand &dst, TernaryFunction func, const TileOperand &first, const TileOperand &second, const TileOperand &third); + void op_ternary_elementwise_function(const TileOperand &dst, + TernaryFunction func, + const TileOperand &first, + const TileOperand &second, + const TileOperand &third); /** Write if-statement: `if( ) { }`. * @@ -267,7 +279,13 @@ public: * @param[in, out] update_value The value which is updated at every iteration. * @param[in] body The body of the for-loop. */ - void op_for_loop(const TileOperand &var_name, BinaryOp cond_op, const TileOperand &cond_value_name, const TileOperand &update_var_name, AssignmentOp update_op, const TileOperand &update_value_name, const std::function &body); + void op_for_loop(const TileOperand &var_name, + BinaryOp cond_op, + const TileOperand &cond_value_name, + const TileOperand &update_var_name, + AssignmentOp update_op, + const TileOperand &update_value_name, + const std::function &body); /** Write the return statement: `return;` */ @@ -311,8 +329,8 @@ private: ::std::unique_ptr _impl_attr; ::std::unique_ptr _impl; - int32_t _id_space{ 0 }; - int32_t _max_id_space{ 0 }; + int32_t _id_space{0}; + int32_t _max_id_space{0}; }; } // namespace ckw diff --git a/compute_kernel_writer/prototype/include/ckw/KernelWriterHelper.h b/compute_kernel_writer/prototype/include/ckw/KernelWriterHelper.h index a8be859680..3ba079bbc2 100644 --- a/compute_kernel_writer/prototype/include/ckw/KernelWriterHelper.h +++ b/compute_kernel_writer/prototype/include/ckw/KernelWriterHelper.h @@ -32,8 +32,6 @@ #include #include -#include - /* * By including this header file you will be able to supplement the default * Compute Kernel Writer API with additional syntax to help ease the use of CKW. @@ -154,7 +152,9 @@ struct can_be_assigned : ::std::true_type * @tparam TLeft The type of the destination of the assignment. * @tparam TRight The type of the source assigned to the destination. */ -template ::value && can_be_assigned::value>> +template ::value && can_be_assigned::value>> struct Assignment { TLeft lhs; @@ -173,7 +173,7 @@ struct Assignment template inline Assignment operator+=(TLeft &&lhs, TRight &&rhs) { - return Assignment{ std::forward(lhs), std::forward(rhs), AssignmentOp::Increment }; + return Assignment{std::forward(lhs), std::forward(rhs), AssignmentOp::Increment}; } /** Represents the expression: `\p lhs -= \p rhs`. @@ -187,7 +187,7 @@ inline Assignment operator+=(TLeft &&lhs, TRight &&rhs) template inline Assignment operator-=(TLeft &&lhs, TRight &&rhs) { - return Assignment{ std::forward(lhs), std::forward(rhs), AssignmentOp::Decrement }; + return Assignment{std::forward(lhs), std::forward(rhs), AssignmentOp::Decrement}; } // ================================================== @@ -221,7 +221,7 @@ struct can_be_operand> : ::std::true_type template inline UnaryExpression operator!(TSrc &&src) { - return UnaryExpression{ std::forward(src), UnaryOp::LogicalNot }; + return UnaryExpression{std::forward(src), UnaryOp::LogicalNot}; } /** Represents the expression: `~\p src`. @@ -233,7 +233,7 @@ inline UnaryExpression operator!(TSrc &&src) template inline UnaryExpression operator~(TSrc &&src) { - return UnaryExpression{ std::forward(src), UnaryOp::BitwiseNot }; + return UnaryExpression{std::forward(src), UnaryOp::BitwiseNot}; } // ================================================== @@ -247,7 +247,9 @@ inline UnaryExpression operator~(TSrc &&src) * @tparam TLeft The type of the left argument of the expression. * @tparam TRight The type of the right argument of the expression. */ -template ::value && can_be_operand::value>> +template ::value && can_be_operand::value>> struct BinaryExpression { TLeft lhs; @@ -271,7 +273,7 @@ struct can_be_operand> : ::std::true_type template inline BinaryExpression operator+(TLeft &&lhs, TRight &&rhs) { - return BinaryExpression{ std::forward(lhs), std::forward(rhs), BinaryOp::Add }; + return BinaryExpression{std::forward(lhs), std::forward(rhs), BinaryOp::Add}; } /** Represents the expression: `\p lhs - \p rhs`. @@ -285,7 +287,7 @@ inline BinaryExpression operator+(TLeft &&lhs, TRight &&rhs) template inline BinaryExpression operator-(TLeft &&lhs, TRight &&rhs) { - return BinaryExpression{ std::forward(lhs), std::forward(rhs), BinaryOp::Sub }; + return BinaryExpression{std::forward(lhs), std::forward(rhs), BinaryOp::Sub}; } /** Represents the expression: `\p lhs * \p rhs`. @@ -299,7 +301,7 @@ inline BinaryExpression operator-(TLeft &&lhs, TRight &&rhs) template inline BinaryExpression operator*(TLeft &&lhs, TRight &&rhs) { - return BinaryExpression{ std::forward(lhs), std::forward(rhs), BinaryOp::Mul }; + return BinaryExpression{std::forward(lhs), std::forward(rhs), BinaryOp::Mul}; } /** Represents the expression: `\p lhs / \p rhs`. @@ -313,7 +315,7 @@ inline BinaryExpression operator*(TLeft &&lhs, TRight &&rhs) template inline BinaryExpression operator/(TLeft &&lhs, TRight &&rhs) { - return BinaryExpression{ std::forward(lhs), std::forward(rhs), BinaryOp::Div }; + return BinaryExpression{std::forward(lhs), std::forward(rhs), BinaryOp::Div}; } /** Represents the expression: `\p lhs % \p rhs`. @@ -327,7 +329,7 @@ inline BinaryExpression operator/(TLeft &&lhs, TRight &&rhs) template inline BinaryExpression operator%(TLeft &&lhs, TRight &&rhs) { - return BinaryExpression{ std::forward(lhs), std::forward(rhs), BinaryOp::Mod }; + return BinaryExpression{std::forward(lhs), std::forward(rhs), BinaryOp::Mod}; } /** Represents the expression: `\p lhs == \p rhs`. @@ -341,7 +343,7 @@ inline BinaryExpression operator%(TLeft &&lhs, TRight &&rhs) template inline BinaryExpression operator==(TLeft &&lhs, TRight &&rhs) { - return BinaryExpression{ std::forward(lhs), std::forward(rhs), BinaryOp::Equal }; + return BinaryExpression{std::forward(lhs), std::forward(rhs), BinaryOp::Equal}; } /** Represents the expression: `\p lhs < \p rhs`. @@ -355,7 +357,7 @@ inline BinaryExpression operator==(TLeft &&lhs, TRight &&rhs) template inline BinaryExpression operator<(TLeft &&lhs, TRight &&rhs) { - return BinaryExpression{ std::forward(lhs), std::forward(rhs), BinaryOp::Less }; + return BinaryExpression{std::forward(lhs), std::forward(rhs), BinaryOp::Less}; } /** Represents the expression: `\p lhs <= \p rhs`. @@ -369,7 +371,7 @@ inline BinaryExpression operator<(TLeft &&lhs, TRight &&rhs) template inline BinaryExpression operator<=(TLeft &&lhs, TRight &&rhs) { - return BinaryExpression{ std::forward(lhs), std::forward(rhs), BinaryOp::LessEqual }; + return BinaryExpression{std::forward(lhs), std::forward(rhs), BinaryOp::LessEqual}; } /** Represents the expression: `\p lhs > \p rhs`. @@ -383,7 +385,7 @@ inline BinaryExpression operator<=(TLeft &&lhs, TRight &&rhs) template inline BinaryExpression operator>(TLeft &&lhs, TRight &&rhs) { - return BinaryExpression{ std::forward(lhs), std::forward(rhs), BinaryOp::Greater }; + return BinaryExpression{std::forward(lhs), std::forward(rhs), BinaryOp::Greater}; } /** Represents the expression: `\p lhs >= \p rhs`. @@ -397,7 +399,7 @@ inline BinaryExpression operator>(TLeft &&lhs, TRight &&rhs) template inline BinaryExpression operator>=(TLeft &&lhs, TRight &&rhs) { - return BinaryExpression{ std::forward(lhs), std::forward(rhs), BinaryOp::GreaterEqual }; + return BinaryExpression{std::forward(lhs), std::forward(rhs), BinaryOp::GreaterEqual}; } /** Represents the expression: `\p lhs ^ \p rhs`. @@ -411,7 +413,7 @@ inline BinaryExpression operator>=(TLeft &&lhs, TRight &&rhs) template inline BinaryExpression operator^(TLeft &&lhs, TRight &&rhs) { - return BinaryExpression{ std::forward(lhs), std::forward(rhs), BinaryOp::BitwiseXOR }; + return BinaryExpression{std::forward(lhs), std::forward(rhs), BinaryOp::BitwiseXOR}; } /** Represents the expression: `\p lhs && \p rhs`. @@ -425,7 +427,7 @@ inline BinaryExpression operator^(TLeft &&lhs, TRight &&rhs) template inline BinaryExpression logical_and(TLeft &&lhs, TRight &&rhs) { - return BinaryExpression{ std::forward(lhs), std::forward(rhs), BinaryOp::LogicalAnd }; + return BinaryExpression{std::forward(lhs), std::forward(rhs), BinaryOp::LogicalAnd}; } /** Represents the expression: `\p lhs && \p rhs`. @@ -440,7 +442,7 @@ template inline BinaryExpression, TOps...> logical_and(TLeft &&lhs, TRight &&rhs, TOps &&...ops) { return logical_and( - BinaryExpression{ std::forward(lhs), std::forward(rhs), BinaryOp::LogicalAnd }, + BinaryExpression{std::forward(lhs), std::forward(rhs), BinaryOp::LogicalAnd}, std::forward(ops)...); } @@ -455,7 +457,7 @@ inline BinaryExpression, TOps...> logical_and(TL template inline BinaryExpression logical_or(TLeft &&lhs, TRight &&rhs) { - return BinaryExpression{ std::forward(lhs), std::forward(rhs), BinaryOp::LogicalOr }; + return BinaryExpression{std::forward(lhs), std::forward(rhs), BinaryOp::LogicalOr}; } /** Represents the expression: `\p lhs || \p rhs`. @@ -470,7 +472,7 @@ template inline BinaryExpression, TOps...> logical_or(TLeft &&lhs, TRight &&rhs, TOps &&...ops) { return logical_or( - BinaryExpression{ std::forward(lhs), std::forward(rhs), BinaryOp::LogicalOr }, + BinaryExpression{std::forward(lhs), std::forward(rhs), BinaryOp::LogicalOr}, std::forward(ops)...); } @@ -505,7 +507,7 @@ struct can_be_operand> : ::std::true_type template UnaryElementwiseFunction exp(TSrc &&src) { - return UnaryElementwiseFunction{ std::forward(src), UnaryFunction::Exp }; + return UnaryElementwiseFunction{std::forward(src), UnaryFunction::Exp}; } /** Represents the expression: `tanh(\p src)`. @@ -517,7 +519,7 @@ UnaryElementwiseFunction exp(TSrc &&src) template UnaryElementwiseFunction tanh(TSrc &&src) { - return UnaryElementwiseFunction{ std::forward(src), UnaryFunction::Tanh }; + return UnaryElementwiseFunction{std::forward(src), UnaryFunction::Tanh}; } /** Represents the expression: `sqrt(\p src)`. @@ -529,7 +531,7 @@ UnaryElementwiseFunction tanh(TSrc &&src) template UnaryElementwiseFunction sqrt(TSrc &&src) { - return UnaryElementwiseFunction{ std::forward(src), UnaryFunction::Sqrt }; + return UnaryElementwiseFunction{std::forward(src), UnaryFunction::Sqrt}; } /** Represents the expression: `erf(\p src)`. @@ -541,7 +543,7 @@ UnaryElementwiseFunction sqrt(TSrc &&src) template UnaryElementwiseFunction erf(TSrc &&src) { - return UnaryElementwiseFunction{ std::forward(src), UnaryFunction::Erf }; + return UnaryElementwiseFunction{std::forward(src), UnaryFunction::Erf}; } /** Represents the expression: `fabs(\p src)`. @@ -553,7 +555,7 @@ UnaryElementwiseFunction erf(TSrc &&src) template UnaryElementwiseFunction fabs(TSrc &&src) { - return UnaryElementwiseFunction{ std::forward(src), UnaryFunction::Fabs }; + return UnaryElementwiseFunction{std::forward(src), UnaryFunction::Fabs}; } /** Represents the expression: `log(\p src)`. @@ -565,7 +567,7 @@ UnaryElementwiseFunction fabs(TSrc &&src) template UnaryElementwiseFunction log(TSrc &&src) { - return UnaryElementwiseFunction{ std::forward(src), UnaryFunction::Log }; + return UnaryElementwiseFunction{std::forward(src), UnaryFunction::Log}; } /** Represents the expression: `round(\p src)`. @@ -577,7 +579,7 @@ UnaryElementwiseFunction log(TSrc &&src) template UnaryElementwiseFunction round(TSrc &&src) { - return UnaryElementwiseFunction{ std::forward(src), UnaryFunction::Round }; + return UnaryElementwiseFunction{std::forward(src), UnaryFunction::Round}; } /** Represents the expression: `sizeof(\p src)`. @@ -589,7 +591,7 @@ UnaryElementwiseFunction round(TSrc &&src) template UnaryElementwiseFunction sizeOf(TSrc &&src) { - return UnaryElementwiseFunction{ std::forward(src), UnaryFunction::SizeOf }; + return UnaryElementwiseFunction{std::forward(src), UnaryFunction::SizeOf}; } // ================================================== @@ -603,7 +605,9 @@ UnaryElementwiseFunction sizeOf(TSrc &&src) * @tparam TFirst The type of the left argument of the function. * @tparam TSecond The type of the right argument of the function. */ -template ::value && can_be_operand::value>> +template ::value && can_be_operand::value>> struct BinaryElementwiseFunction { TFirst first; @@ -627,7 +631,8 @@ struct can_be_operand> : ::std::true_ template BinaryElementwiseFunction max(TFirst &&first, TSecond &&second) { - return BinaryElementwiseFunction{ std::forward(first), std::forward(second), BinaryFunction::Max }; + return BinaryElementwiseFunction{std::forward(first), std::forward(second), + BinaryFunction::Max}; } /** Represents the function call: `min(\p first, \p second)`. @@ -641,7 +646,8 @@ BinaryElementwiseFunction max(TFirst &&first, TSecond &&second) template BinaryElementwiseFunction min(TFirst &&first, TSecond &&second) { - return BinaryElementwiseFunction{ std::forward(first), std::forward(second), BinaryFunction::Min }; + return BinaryElementwiseFunction{std::forward(first), std::forward(second), + BinaryFunction::Min}; } // ================================================== @@ -656,7 +662,11 @@ BinaryElementwiseFunction min(TFirst &&first, TSecond &&second) * @tparam TSecond The type of the second argument to the function. * @tparam TThird The type of the third argument to the function. */ -template ::value && can_be_operand::value && can_be_operand::value>> +template ::value && can_be_operand::value && + can_be_operand::value>> struct TernaryElementwiseFunction { TFirst first; @@ -683,7 +693,9 @@ struct can_be_operand> : ::s template TernaryElementwiseFunction select(TFirst &&first, TSecond &&second, TThird &&third) { - return TernaryElementwiseFunction{ std::forward(first), std::forward(second), std::forward(third), TernaryFunction::Select }; + return TernaryElementwiseFunction{std::forward(first), + std::forward(second), + std::forward(third), TernaryFunction::Select}; } /** Helper class used to extend a KernelWriter with additional functionality @@ -715,7 +727,8 @@ public: * @param[in] cond The BinaryExpression representing the condition. * @param[in] body The body of the if-statement. */ - KernelWriterHelper &op_if(const BinaryExpression &cond, const std::function &body) + KernelWriterHelper &op_if(const BinaryExpression &cond, + const std::function &body) { TWriter::op_if(cond.lhs, cond.opcode, cond.rhs, body); return *this; @@ -730,7 +743,8 @@ public: * @param[in] body The body of the if-statement. */ template - KernelWriterHelper &op_if(const BinaryExpression &cond, const std::function &body) + KernelWriterHelper &op_if(const BinaryExpression &cond, + const std::function &body) { auto &tmp1 = declare_temp_tile(cond.lhs.tile_info()); op_assign(tmp1, cond.rhs); @@ -747,7 +761,8 @@ public: * @param[in] body The body of the if-statement. */ template - KernelWriterHelper &op_if(const BinaryExpression &cond, const std::function &body) + KernelWriterHelper &op_if(const BinaryExpression &cond, + const std::function &body) { auto &tmp1 = declare_temp_tile(cond.rhs.tile_info()); op_assign(tmp1, cond.lhs); @@ -766,7 +781,8 @@ public: * @param[in] cond The BinaryExpression representing the condition. * @param[in] body The body of the else-if-statement. */ - KernelWriterHelper &op_else_if(const BinaryExpression &cond, const std::function &body) + KernelWriterHelper &op_else_if(const BinaryExpression &cond, + const std::function &body) { TWriter::op_else_if(cond.lhs, cond.opcode, cond.rhs, body); return *this; @@ -781,7 +797,8 @@ public: * @param[in] body The body of the else-if-statement. */ template - KernelWriterHelper &op_else_if(const BinaryExpression &cond, const std::function &body) + KernelWriterHelper &op_else_if(const BinaryExpression &cond, + const std::function &body) { auto &tmp1 = declare_temp_tile(cond.lhs.tile_info()); op_assign(tmp1, cond.rhs); @@ -798,7 +815,8 @@ public: * @param[in] body The body of the else-if-statement. */ template - KernelWriterHelper &op_else_if(const BinaryExpression &cond, const std::function &body) + KernelWriterHelper &op_else_if(const BinaryExpression &cond, + const std::function &body) { auto &tmp1 = declare_temp_tile(cond.rhs.tile_info()); op_assign(tmp1, cond.lhs); @@ -823,7 +841,9 @@ public: * @param[in] updater The Assignment representing the updater. * @param[in] body The body of the for-loop. */ - void op_for_loop(const BinaryExpression &cond, const Assignment &updater, const std::function &body) + void op_for_loop(const BinaryExpression &cond, + const Assignment &updater, + const std::function &body) { TWriter::op_for_loop(cond.lhs, cond.opcode, cond.rhs, updater.lhs, updater.opcode, updater.rhs, body); } @@ -1029,7 +1049,8 @@ public: * @param[in] dst The tile which is assigned to. * @param[in] exp The TernaryElementwiseFunction representing the expression to be evaluated and assigned. */ - void op_assign(const TileOperand &dst, const TernaryElementwiseFunction &exp) + void op_assign(const TileOperand &dst, + const TernaryElementwiseFunction &exp) { TWriter::op_ternary_elementwise_function(dst, exp.opcode, exp.first, exp.second, exp.third); } @@ -1169,11 +1190,11 @@ public: */ void op_assign(const Assignment &exp) { - if(exp.opcode == AssignmentOp::Increment) + if (exp.opcode == AssignmentOp::Increment) { TWriter::op_binary_expression(exp.lhs, exp.lhs, BinaryOp::Add, exp.rhs); } - else if(exp.opcode == AssignmentOp::Decrement) + else if (exp.opcode == AssignmentOp::Decrement) { TWriter::op_binary_expression(exp.lhs, exp.lhs, BinaryOp::Sub, exp.rhs); } @@ -1192,7 +1213,7 @@ public: { auto &tmp1 = declare_temp_tile(exp.lhs.tile_info()); op_assign(tmp1, exp.rhs); - op_assign(Assignment{ exp.lhs, tmp1, exp.opcode }); + op_assign(Assignment{exp.lhs, tmp1, exp.opcode}); } private: @@ -1241,11 +1262,8 @@ private: template ::value>> TileInfo get_largest_size(const TileInfo &first, const TileInfo &second, const TOps &...ops) { - TileInfo largest = { - first.data_type(), - std::max(first.width(), second.width()), - std::max(first.height(), second.height()) - }; + TileInfo largest = {first.data_type(), std::max(first.width(), second.width()), + std::max(first.height(), second.height())}; return get_largest_size(largest, ops...); } diff --git a/compute_kernel_writer/prototype/include/ckw/OperandBase.h b/compute_kernel_writer/prototype/include/ckw/OperandBase.h index 06d9f82756..9842127339 100644 --- a/compute_kernel_writer/prototype/include/ckw/OperandBase.h +++ b/compute_kernel_writer/prototype/include/ckw/OperandBase.h @@ -26,6 +26,7 @@ #define CKW_PROTOTYPE_INCLUDE_CKW_OPERANDBASE_H #include "ckw/types/DataType.h" + #include namespace ckw diff --git a/compute_kernel_writer/prototype/include/ckw/ScalarValue.h b/compute_kernel_writer/prototype/include/ckw/ScalarValue.h index 16c3f6d441..2a9c42acc8 100644 --- a/compute_kernel_writer/prototype/include/ckw/ScalarValue.h +++ b/compute_kernel_writer/prototype/include/ckw/ScalarValue.h @@ -59,9 +59,9 @@ public: _size = sizeof(T); - if(::std::is_integral::value) + if (::std::is_integral::value) { - if(::std::is_signed::value) + if (::std::is_signed::value) { _type = Type::INT; _value.i64 = value; @@ -90,9 +90,9 @@ public: CKW_ASSERT(::std::is_integral::value || ::std::is_floating_point::value); CKW_ASSERT(sizeof(T) >= _size); - if(::std::is_integral::value) + if (::std::is_integral::value) { - if(::std::is_signed::value) + if (::std::is_signed::value) { CKW_ASSERT(_type == Type::INT || _type == Type::UINT); CKW_ASSERT_IF(_type == Type::UINT, sizeof(T) > _size); diff --git a/compute_kernel_writer/prototype/include/ckw/TensorInfo.h b/compute_kernel_writer/prototype/include/ckw/TensorInfo.h index 55f8101a53..24da7dc8ab 100644 --- a/compute_kernel_writer/prototype/include/ckw/TensorInfo.h +++ b/compute_kernel_writer/prototype/include/ckw/TensorInfo.h @@ -143,10 +143,10 @@ public: int32_t id() const; private: - TensorShape _shape{ { 0 } }; - DataType _dt{ DataType::Unknown }; - TensorDataLayout _dl{ TensorDataLayout::Unknown }; - int32_t _id{ -1 }; + TensorShape _shape{{0}}; + DataType _dt{DataType::Unknown}; + TensorDataLayout _dl{TensorDataLayout::Unknown}; + int32_t _id{-1}; }; } // namespace ckw diff --git a/compute_kernel_writer/prototype/include/ckw/TensorOperand.h b/compute_kernel_writer/prototype/include/ckw/TensorOperand.h index 6d88932c66..c221b449fa 100644 --- a/compute_kernel_writer/prototype/include/ckw/TensorOperand.h +++ b/compute_kernel_writer/prototype/include/ckw/TensorOperand.h @@ -139,21 +139,21 @@ private: TensorInfo _info; TensorStorageType _storage_type; - TileOperand *_tile{ nullptr }; + TileOperand *_tile{nullptr}; TensorTileSampler _tile_sampler{}; - ::std::unique_ptr _stride1{ nullptr }; - ::std::unique_ptr _stride2{ nullptr }; - ::std::unique_ptr _stride3{ nullptr }; - ::std::unique_ptr _stride4{ nullptr }; - ::std::unique_ptr _dim0{ nullptr }; - ::std::unique_ptr _dim1{ nullptr }; - ::std::unique_ptr _dim2{ nullptr }; - ::std::unique_ptr _dim3{ nullptr }; - ::std::unique_ptr _dim4{ nullptr }; - ::std::unique_ptr _dim1_dim2{ nullptr }; - ::std::unique_ptr _dim1_dim2_dim3{ nullptr }; - ::std::unique_ptr _offset_first_element_in_bytes{ nullptr }; + ::std::unique_ptr _stride1{nullptr}; + ::std::unique_ptr _stride2{nullptr}; + ::std::unique_ptr _stride3{nullptr}; + ::std::unique_ptr _stride4{nullptr}; + ::std::unique_ptr _dim0{nullptr}; + ::std::unique_ptr _dim1{nullptr}; + ::std::unique_ptr _dim2{nullptr}; + ::std::unique_ptr _dim3{nullptr}; + ::std::unique_ptr _dim4{nullptr}; + ::std::unique_ptr _dim1_dim2{nullptr}; + ::std::unique_ptr _dim1_dim2_dim3{nullptr}; + ::std::unique_ptr _offset_first_element_in_bytes{nullptr}; }; // ================================================================================================= diff --git a/compute_kernel_writer/prototype/include/ckw/TensorTileSampler.h b/compute_kernel_writer/prototype/include/ckw/TensorTileSampler.h index e1bf0c52b8..606dec3535 100644 --- a/compute_kernel_writer/prototype/include/ckw/TensorTileSampler.h +++ b/compute_kernel_writer/prototype/include/ckw/TensorTileSampler.h @@ -26,6 +26,7 @@ #define CKW_PROTOTYPE_INCLUDE_CKW_TENSORTILESAMPLER_H #include "ckw/types/TensorSamplerTypes.h" + #include namespace ckw @@ -55,12 +56,14 @@ public: * @param[in] address_mode_y The address mode of the y dimension. * @param[in] address_mode_z The address mode of the z dimension. */ - TensorTileSampler( - TileOperand &x, TileOperand &y, TileOperand &z, TileOperand &b, - TensorSamplerFormat format, - TensorSamplerAddressModeX address_mode_x, - TensorSamplerAddressModeY address_mode_y, - TensorSamplerAddressModeZ address_mode_z); + TensorTileSampler(TileOperand &x, + TileOperand &y, + TileOperand &z, + TileOperand &b, + TensorSamplerFormat format, + TensorSamplerAddressModeX address_mode_x, + TensorSamplerAddressModeY address_mode_y, + TensorSamplerAddressModeZ address_mode_z); /** Initialize a new instance of @ref TensorSampler class. * @@ -75,13 +78,16 @@ public: * @param[in] address_mode_y The address mode of the y dimension. * @param[in] address_mode_z The address mode of the z dimension. */ - TensorTileSampler( - TileOperand &x, TileOperand &y, TileOperand &z, TileOperand &b, - int32_t height, int32_t width, - TensorSamplerFormat format, - TensorSamplerAddressModeX address_mode_x, - TensorSamplerAddressModeY address_mode_y, - TensorSamplerAddressModeZ address_mode_z); + TensorTileSampler(TileOperand &x, + TileOperand &y, + TileOperand &z, + TileOperand &b, + int32_t height, + int32_t width, + TensorSamplerFormat format, + TensorSamplerAddressModeX address_mode_x, + TensorSamplerAddressModeY address_mode_y, + TensorSamplerAddressModeZ address_mode_z); /** Get the coordinate in the x dimension. */ const TileOperand &x() const; @@ -144,18 +150,18 @@ public: TensorTileSampler &address_mode_z(TensorSamplerAddressModeZ address_mode_z); private: - TileOperand *_x{ nullptr }; - TileOperand *_y{ nullptr }; - TileOperand *_z{ nullptr }; - TileOperand *_b{ nullptr }; - - int32_t _height{ 0 }; - int32_t _width{ 0 }; - - TensorSamplerFormat _format{ TensorSamplerFormat::Unknown }; - TensorSamplerAddressModeX _address_mode_x{ TensorSamplerAddressModeX::Unknown }; - TensorSamplerAddressModeY _address_mode_y{ TensorSamplerAddressModeY::Unknown }; - TensorSamplerAddressModeZ _address_mode_z{ TensorSamplerAddressModeZ::Unknown }; + TileOperand *_x{nullptr}; + TileOperand *_y{nullptr}; + TileOperand *_z{nullptr}; + TileOperand *_b{nullptr}; + + int32_t _height{0}; + int32_t _width{0}; + + TensorSamplerFormat _format{TensorSamplerFormat::Unknown}; + TensorSamplerAddressModeX _address_mode_x{TensorSamplerAddressModeX::Unknown}; + TensorSamplerAddressModeY _address_mode_y{TensorSamplerAddressModeY::Unknown}; + TensorSamplerAddressModeZ _address_mode_z{TensorSamplerAddressModeZ::Unknown}; }; } // namespace ckw diff --git a/compute_kernel_writer/prototype/include/ckw/TileInfo.h b/compute_kernel_writer/prototype/include/ckw/TileInfo.h index de9e47af2b..e0d064169e 100644 --- a/compute_kernel_writer/prototype/include/ckw/TileInfo.h +++ b/compute_kernel_writer/prototype/include/ckw/TileInfo.h @@ -83,7 +83,7 @@ public: DataType data_type() const; private: - DataType _dt{ DataType::Unknown }; + DataType _dt{DataType::Unknown}; TileShape _shape{}; }; diff --git a/compute_kernel_writer/prototype/include/ckw/types/Functions.h b/compute_kernel_writer/prototype/include/ckw/types/Functions.h index bc1f85c188..c6afaa0ac8 100644 --- a/compute_kernel_writer/prototype/include/ckw/types/Functions.h +++ b/compute_kernel_writer/prototype/include/ckw/types/Functions.h @@ -32,14 +32,14 @@ namespace ckw enum class UnaryFunction : int32_t { - Exp = 0x0000, - Tanh = 0x0001, - Sqrt = 0x0002, - Erf = 0x0003, - Fabs = 0x0004, - Log = 0x0006, - Round = 0x0007, - Floor = 0x0008, + Exp = 0x0000, + Tanh = 0x0001, + Sqrt = 0x0002, + Erf = 0x0003, + Fabs = 0x0004, + Log = 0x0006, + Round = 0x0007, + Floor = 0x0008, // Misc SizeOf = 0x0009, @@ -47,8 +47,8 @@ enum class UnaryFunction : int32_t enum class BinaryFunction : int32_t { - Min = 0x0000, - Max = 0x0001, + Min = 0x0000, + Max = 0x0001, }; enum class TernaryFunction : int32_t diff --git a/compute_kernel_writer/prototype/include/ckw/types/Operators.h b/compute_kernel_writer/prototype/include/ckw/types/Operators.h index 43241170a5..b560996837 100644 --- a/compute_kernel_writer/prototype/include/ckw/types/Operators.h +++ b/compute_kernel_writer/prototype/include/ckw/types/Operators.h @@ -69,8 +69,8 @@ enum class BinaryOp : int32_t enum class AssignmentOp : int32_t { // Unary - Increment = 0x0000, // += - Decrement = 0x0001, // -= + Increment = 0x0000, // += + Decrement = 0x0001, // -= }; } // namespace ckw diff --git a/compute_kernel_writer/prototype/include/ckw/types/TensorSamplerTypes.h b/compute_kernel_writer/prototype/include/ckw/types/TensorSamplerTypes.h index 836bd13c95..63405a0764 100644 --- a/compute_kernel_writer/prototype/include/ckw/types/TensorSamplerTypes.h +++ b/compute_kernel_writer/prototype/include/ckw/types/TensorSamplerTypes.h @@ -39,34 +39,38 @@ enum class TensorSamplerFormat : int32_t enum class TensorSamplerAddressModeX : int32_t { - Unknown = 0, - None = 1, // The user guarantees that the X coordinate is always in-bound - OverlappingMin = 2 // (FIXED shapes only) Reduce the load/store length when x == 0 (MIN). The load length will be width % original length - // Leftover elements can be handled using overlapping. This involves processing some of the elements in the array twice. + Unknown = 0, + None = 1, // The user guarantees that the X coordinate is always in-bound + OverlappingMin = + 2 // (FIXED shapes only) Reduce the load/store length when x == 0 (MIN). The load length will be width % original length + // Leftover elements can be handled using overlapping. This involves processing some of the elements in the array twice. }; enum class TensorSamplerAddressModeY : int32_t { - Unknown = 0, - None = 1, // The user guarantees that the Y coordinate is always in-bound - OverlappingMin = 2, // (FIXED shapes only) Reduce the load/store length when x == 0 (MIN). The load length will be width % original length - Skip = 3, // Skip the read/write - SkipMinEdgeOnly = 4, // Skip greater than or equal to max only. The user guarantees that the Y coordinate is always >= 0 - SkipMaxEdgeOnly = 5, // Skip less than 0 only - ClampToNearest = 6, // Clamp the coordinate to nearest edge (0 or max value allowed on Y) - ClampToMinEdgeOnly = 7, // Clamp the negative coordinate to 0 only. Therefore, we expect Y to be always < MAX - ClampToMaxEdgeOnly = 8, // Clamp the coordinate to the max value allowed on Y only. We expect Y to be always >= 0 - ClampToBorder = 9, // Clamp to border which always has 0 value + Unknown = 0, + None = 1, // The user guarantees that the Y coordinate is always in-bound + OverlappingMin = + 2, // (FIXED shapes only) Reduce the load/store length when x == 0 (MIN). The load length will be width % original length + Skip = 3, // Skip the read/write + SkipMinEdgeOnly = + 4, // Skip greater than or equal to max only. The user guarantees that the Y coordinate is always >= 0 + SkipMaxEdgeOnly = 5, // Skip less than 0 only + ClampToNearest = 6, // Clamp the coordinate to nearest edge (0 or max value allowed on Y) + ClampToMinEdgeOnly = 7, // Clamp the negative coordinate to 0 only. Therefore, we expect Y to be always < MAX + ClampToMaxEdgeOnly = 8, // Clamp the coordinate to the max value allowed on Y only. We expect Y to be always >= 0 + ClampToBorder = 9, // Clamp to border which always has 0 value ClampToBorderMinEdgeOnly = 10, ClampToBorderMaxEdgeOnly = 11 }; enum class TensorSamplerAddressModeZ : int32_t { - Unknown = 0, - None = 1, // The user guarantees that the Y coordinate is always in-bound - Skip = 3, // Skip the read/write - SkipMinEdgeOnly = 4, // Skip greater than or equal to max only. The user guarantees that the Y coordinate is always >= 0 + Unknown = 0, + None = 1, // The user guarantees that the Y coordinate is always in-bound + Skip = 3, // Skip the read/write + SkipMinEdgeOnly = + 4, // Skip greater than or equal to max only. The user guarantees that the Y coordinate is always >= 0 SkipMaxEdgeOnly = 5, // Skip less than 0 only ClampToNearest = 6, // Clamp the coordinate to nearest edge (0 or max value allowed on Y) ClampToMinEdgeOnly = 7, // Clamp the negative coordinate to 0 only. Therefore, we expect Y to be always < MAX diff --git a/compute_kernel_writer/prototype/src/Kernel.cpp b/compute_kernel_writer/prototype/src/Kernel.cpp index 095ac879f1..6228ed17d0 100644 --- a/compute_kernel_writer/prototype/src/Kernel.cpp +++ b/compute_kernel_writer/prototype/src/Kernel.cpp @@ -23,24 +23,27 @@ */ #include "ckw/Kernel.h" + #include "ckw/TensorOperand.h" #include "ckw/types/GpuTargetLanguage.h" + #include "src/Prototype.h" namespace ckw { -Kernel::Kernel(GpuTargetLanguage language) - : Kernel{"unnamed", language} +Kernel::Kernel(GpuTargetLanguage language) : Kernel{"unnamed", language} { } Kernel::Kernel(const char *name, GpuTargetLanguage language) - : _name(name), _kernel(std::make_unique(language)), _operands{}, _tensor_id_operands{} + : _name(name), + _kernel(std::make_unique(language)), + _operands{}, + _tensor_id_operands{} { } - Kernel::~Kernel() { } @@ -50,7 +53,7 @@ const std::string &Kernel::name() const return _name; } -void Kernel::name(const std::string& name) +void Kernel::name(const std::string &name) { _name = name; } @@ -60,14 +63,14 @@ std::vector Kernel::arguments() const const auto impl_args = _kernel->arguments.tensor_argument_declarations(); - for(auto tensor_arg : impl_args) + for (auto tensor_arg : impl_args) { auto tensor = _tensor_id_operands.at(tensor_arg->format().id); arguments.push_back(*tensor); - for(auto component_arg : tensor_arg->component_declarations()) + for (auto component_arg : tensor_arg->component_declarations()) { - switch(component_arg) + switch (component_arg) { case TensorComponentType::OffsetFirstElement: arguments.push_back(tensor->offset_first_element_in_bytes()); diff --git a/compute_kernel_writer/prototype/src/KernelArgument.cpp b/compute_kernel_writer/prototype/src/KernelArgument.cpp index 2b4d7c8cee..24ace28eb3 100644 --- a/compute_kernel_writer/prototype/src/KernelArgument.cpp +++ b/compute_kernel_writer/prototype/src/KernelArgument.cpp @@ -23,14 +23,14 @@ */ #include "ckw/KernelArgument.h" + #include "ckw/Error.h" #include "ckw/TensorOperand.h" namespace ckw { -KernelArgument::KernelArgument(TensorOperand &tensor) - : _type(Type::TensorStorage), _id(tensor.info().id()) +KernelArgument::KernelArgument(TensorOperand &tensor) : _type(Type::TensorStorage), _id(tensor.info().id()) { _sub_id.tensor_storage_type = tensor.storage_type(); } diff --git a/compute_kernel_writer/prototype/src/KernelWriter.cpp b/compute_kernel_writer/prototype/src/KernelWriter.cpp index 5c9a16ee33..9f58d9fefa 100644 --- a/compute_kernel_writer/prototype/src/KernelWriter.cpp +++ b/compute_kernel_writer/prototype/src/KernelWriter.cpp @@ -23,9 +23,11 @@ */ #include "ckw/KernelWriter.h" + #include "ckw/Error.h" #include "ckw/TensorInfo.h" #include "ckw/TensorOperand.h" + #include "src/Prototype.h" #include @@ -38,7 +40,7 @@ namespace inline prototype::TensorInfo create_impl_tensor_info(const TensorInfo &info) { - return prototype::TensorInfo{ info.shape(), info.data_type(), info.data_layout(), info.id() }; + return prototype::TensorInfo{info.shape(), info.data_type(), info.data_layout(), info.id()}; } } // namespace @@ -86,7 +88,8 @@ int32_t KernelWriter::next_id_space() // Tensor and tile declaration // ================================================================================================= -TensorOperand &KernelWriter::declare_tensor_argument(const std::string &name, const TensorInfo &info, TensorStorageType storage_type) +TensorOperand & +KernelWriter::declare_tensor_argument(const std::string &name, const TensorInfo &info, TensorStorageType storage_type) { const auto var_name = generate_variable_name(name); @@ -120,13 +123,11 @@ TileOperand &KernelWriter::declare_tile_operand(std::unique_ptr ope auto &operand = _kernel->register_operand(std::move(operand_ptr)); const auto &name = operand.name(); - if(!operand.is_constant()) + if (!operand.is_constant()) { const auto &info = operand.tile_info(); - _impl->declare_tile( - name, - prototype::TileInfo(info.data_type(), info.width(), info.height())); + _impl->declare_tile(name, prototype::TileInfo(info.data_type(), info.width(), info.height())); } else { @@ -140,16 +141,15 @@ TileOperand &KernelWriter::declare_tile_operand(std::unique_ptr ope // Load and store // ================================================================================================= -void KernelWriter::op_load(TileOperand &tile, const TensorOperand &tensor, const TensorTileSampler &sampler, const TileOperand &dilation_y) +void KernelWriter::op_load(TileOperand &tile, + const TensorOperand &tensor, + const TensorTileSampler &sampler, + const TileOperand &dilation_y) { prototype::TensorOperand impl_tensor( tensor.name(), - prototype::GpuSampler{ - sampler.format(), - prototype::to_gpu_tensor_storage(tensor.storage_type()), - sampler.address_mode_x(), - sampler.address_mode_y(), - sampler.address_mode_z() }); + prototype::GpuSampler{sampler.format(), prototype::to_gpu_tensor_storage(tensor.storage_type()), + sampler.address_mode_x(), sampler.address_mode_y(), sampler.address_mode_z()}); auto impl_x = sampler.x().create_impl_operand(_impl.get()); auto impl_y = sampler.y().create_impl_operand(_impl.get()); @@ -167,12 +167,8 @@ void KernelWriter::op_load_indirect(TileOperand &tile, const TensorOperand &tens { prototype::TensorOperand impl_tensor( tensor.name(), - prototype::GpuSampler{ - sampler.format(), - prototype::to_gpu_tensor_storage(tensor.storage_type()), - sampler.address_mode_x(), - sampler.address_mode_y(), - sampler.address_mode_z() }); + prototype::GpuSampler{sampler.format(), prototype::to_gpu_tensor_storage(tensor.storage_type()), + sampler.address_mode_x(), sampler.address_mode_y(), sampler.address_mode_z()}); auto impl_x = sampler.x().create_impl_operand(_impl.get()); auto impl_y = sampler.y().create_impl_operand(_impl.get()); @@ -194,12 +190,8 @@ void KernelWriter::util_get_indirect_buffer(TileOperand &tile, { prototype::TensorOperand impl_tensor( tensor.name(), - prototype::GpuSampler{ - sampler.format(), - prototype::to_gpu_tensor_storage(tensor.storage_type()), - sampler.address_mode_x(), - sampler.address_mode_y(), - sampler.address_mode_z() }); + prototype::GpuSampler{sampler.format(), prototype::to_gpu_tensor_storage(tensor.storage_type()), + sampler.address_mode_x(), sampler.address_mode_y(), sampler.address_mode_z()}); auto impl_x = x.create_impl_operand(_impl.get()); auto impl_y = y.create_impl_operand(_impl.get()); @@ -215,12 +207,8 @@ void KernelWriter::op_store(TensorOperand &tensor, const TileOperand &tile, cons { prototype::TensorOperand impl_tensor( tensor.name(), - prototype::GpuSampler{ - sampler.format(), - prototype::to_gpu_tensor_storage(tensor.storage_type()), - sampler.address_mode_x(), - sampler.address_mode_y(), - sampler.address_mode_z() }); + prototype::GpuSampler{sampler.format(), prototype::to_gpu_tensor_storage(tensor.storage_type()), + sampler.address_mode_x(), sampler.address_mode_y(), sampler.address_mode_z()}); auto impl_src = tile.create_impl_operand(_impl.get()); auto impl_x = sampler.x().create_impl_operand(_impl.get()); auto impl_y = sampler.y().create_impl_operand(_impl.get()); @@ -250,7 +238,10 @@ void KernelWriter::op_cast_expression(const TileOperand &dst, const TileOperand _impl->op_cast_expression(impl_dst, impl_src, policy); } -void KernelWriter::op_binary_expression(const TileOperand &dst, const TileOperand &lhs, BinaryOp op, const TileOperand &rhs) +void KernelWriter::op_binary_expression(const TileOperand &dst, + const TileOperand &lhs, + BinaryOp op, + const TileOperand &rhs) { auto impl_lhs = lhs.create_impl_operand(_impl.get()); auto impl_rhs = rhs.create_impl_operand(_impl.get()); @@ -275,7 +266,10 @@ void KernelWriter::op_unary_elementwise_function(const TileOperand &dst, UnaryFu _impl->op_unary_elementwise_function(impl_dst, opcode, impl_src); } -void KernelWriter::op_binary_elementwise_function(const TileOperand &dst, BinaryFunction opcode, const TileOperand &first, const TileOperand &second) +void KernelWriter::op_binary_elementwise_function(const TileOperand &dst, + BinaryFunction opcode, + const TileOperand &first, + const TileOperand &second) { auto impl_dst = dst.create_impl_operand(_impl.get()); auto impl_first = first.create_impl_operand(_impl.get()); @@ -284,7 +278,11 @@ void KernelWriter::op_binary_elementwise_function(const TileOperand &dst, Binary _impl->op_binary_elementwise_function(impl_dst, opcode, impl_first, impl_second); } -void KernelWriter::op_ternary_elementwise_function(const TileOperand &dst, TernaryFunction opcode, const TileOperand &first, const TileOperand &second, const TileOperand &third) +void KernelWriter::op_ternary_elementwise_function(const TileOperand &dst, + TernaryFunction opcode, + const TileOperand &first, + const TileOperand &second, + const TileOperand &third) { auto impl_dst = dst.create_impl_operand(_impl.get()); auto impl_first = first.create_impl_operand(_impl.get()); @@ -305,7 +303,10 @@ void KernelWriter::op_if(const TileOperand &lhs, BinaryOp op, const TileOperand _impl->compound_statement_end(); } -void KernelWriter::op_else_if(const TileOperand &lhs, BinaryOp op, const TileOperand &rhs, const std::function &body) +void KernelWriter::op_else_if(const TileOperand &lhs, + BinaryOp op, + const TileOperand &rhs, + const std::function &body) { auto impl_lhs = lhs.create_impl_operand(_impl.get()); auto impl_rhs = rhs.create_impl_operand(_impl.get()); @@ -324,14 +325,21 @@ void KernelWriter::op_else(const std::function &body) _impl->compound_statement_end(); } -void KernelWriter::op_for_loop(const TileOperand &var_name, BinaryOp cond_op, const TileOperand &cond_value_name, const TileOperand &update_var_name, AssignmentOp update_op, const TileOperand &update_value_name, const std::function &body) +void KernelWriter::op_for_loop(const TileOperand &var_name, + BinaryOp cond_op, + const TileOperand &cond_value_name, + const TileOperand &update_var_name, + AssignmentOp update_op, + const TileOperand &update_value_name, + const std::function &body) { auto impl_var_name = var_name.create_impl_operand(_impl.get()); auto impl_cond_value_name = cond_value_name.create_impl_operand(_impl.get()); auto impl_update_var_name = update_var_name.create_impl_operand(_impl.get()); auto impl_update_value_name = update_value_name.create_impl_operand(_impl.get()); - _impl->op_for_loop_header(impl_var_name, cond_op, impl_cond_value_name, impl_update_var_name, update_op, impl_update_value_name); + _impl->op_for_loop_header(impl_var_name, cond_op, impl_cond_value_name, impl_update_var_name, update_op, + impl_update_value_name); _impl->compound_statement_begin(); body(); _impl->compound_statement_end(); diff --git a/compute_kernel_writer/prototype/src/OperandBase.cpp b/compute_kernel_writer/prototype/src/OperandBase.cpp index 59cf846cc7..e0617fdc06 100644 --- a/compute_kernel_writer/prototype/src/OperandBase.cpp +++ b/compute_kernel_writer/prototype/src/OperandBase.cpp @@ -27,8 +27,7 @@ namespace ckw { -OperandBase::OperandBase(const std::string &name) - : _name(name) +OperandBase::OperandBase(const std::string &name) : _name(name) { } diff --git a/compute_kernel_writer/prototype/src/Prototype.h b/compute_kernel_writer/prototype/src/Prototype.h index eb9d7198a9..433eef9e7b 100644 --- a/compute_kernel_writer/prototype/src/Prototype.h +++ b/compute_kernel_writer/prototype/src/Prototype.h @@ -25,12 +25,21 @@ #ifndef CKW_PROTOTYPE_SRC_PROTOTYPE_H #define CKW_PROTOTYPE_SRC_PROTOTYPE_H +#include "ckw/Error.h" +#include "ckw/TensorInfo.h" +#include "ckw/types/ConvertPolicy.h" +#include "ckw/types/DataType.h" +#include "ckw/types/Functions.h" +#include "ckw/types/GpuTargetLanguage.h" +#include "ckw/types/Operators.h" +#include "ckw/types/TensorSamplerTypes.h" + #include #include #include // assert (to be removed) #include #include -#include // int32_t +#include // int32_t #include #include // cout (to be removed) #include @@ -40,15 +49,6 @@ #include #include -#include "ckw/Error.h" -#include "ckw/TensorInfo.h" -#include "ckw/types/ConvertPolicy.h" -#include "ckw/types/DataType.h" -#include "ckw/types/Functions.h" -#include "ckw/types/GpuTargetLanguage.h" -#include "ckw/types/Operators.h" -#include "ckw/types/TensorSamplerTypes.h" - namespace ckw { namespace prototype @@ -83,21 +83,21 @@ enum class GpuExtensions struct TensorInfo { - TensorShape shape{ { 0 } }; - DataType data_type{ DataType::Unknown }; - TensorDataLayout data_layout{ TensorDataLayout::Nhwc }; - int32_t id{ -1 }; + TensorShape shape{{0}}; + DataType data_type{DataType::Unknown}; + TensorDataLayout data_layout{TensorDataLayout::Nhwc}; + int32_t id{-1}; }; struct ComponentAttribute { - GpuCompilationSpeed compilation_speed{ GpuCompilationSpeed::Fast }; - bool overwrite_tile{ true }; + GpuCompilationSpeed compilation_speed{GpuCompilationSpeed::Fast}; + bool overwrite_tile{true}; }; inline std::string data_type_to_cl_type(DataType dt) { - switch(dt) + switch (dt) { case DataType::Fp32: return "float"; @@ -125,7 +125,7 @@ inline std::string data_type_to_cl_type(DataType dt) inline int32_t width_to_cl_vector_size(int32_t width) { - switch(width) + switch (width) { case 1: return 1; @@ -160,7 +160,7 @@ inline std::string get_cl_data_type(DataType dt, int32_t width) std::string data_type; int32_t w = width_to_cl_vector_size(width); data_type += data_type_to_cl_type(dt); - if(w != 1) + if (w != 1) { data_type += std::to_string(w); } @@ -169,7 +169,7 @@ inline std::string get_cl_data_type(DataType dt, int32_t width) inline std::string to_opencl_store(int32_t vector_length) { - if(vector_length != 1) + if (vector_length != 1) { return "vstore" + std::to_string(vector_length) + "("; } @@ -185,24 +185,21 @@ struct TileInfo { } - TileInfo(DataType dt) - : dt(dt), w(1), h(1) + TileInfo(DataType dt) : dt(dt), w(1), h(1) { } - TileInfo(DataType dt, int32_t width) - : dt(dt), w(width), h(1) + TileInfo(DataType dt, int32_t width) : dt(dt), w(width), h(1) { } - TileInfo(DataType dt, int32_t width, int32_t height) - : dt(dt), w(width), h(height) + TileInfo(DataType dt, int32_t width, int32_t height) : dt(dt), w(width), h(height) { } - DataType dt{ DataType::Unknown }; // Data type of the tile - int32_t w{ 0 }; // Width (i.e. c0 - portion of the channels) - int32_t h{ 0 }; // Height (i.e. s0 - portion of the spatial dimensions) + DataType dt{DataType::Unknown}; // Data type of the tile + int32_t w{0}; // Width (i.e. c0 - portion of the channels) + int32_t h{0}; // Height (i.e. s0 - portion of the spatial dimensions) }; inline std::ostream &operator<<(std::ostream &o, const TileInfo &a) @@ -213,14 +210,14 @@ inline std::ostream &operator<<(std::ostream &o, const TileInfo &a) struct DataTypeAsString { - std::string str{ "" }; - DataType dt{ DataType::Unknown }; - int32_t size{ 1 }; + std::string str{""}; + DataType dt{DataType::Unknown}; + int32_t size{1}; }; struct ValueAsString { - std::string str{ "" }; + std::string str{""}; DataTypeAsString type{}; }; @@ -276,8 +273,8 @@ public: virtual bool need_declaration() const = 0; protected: - TileInfo _format{}; // Tile format - std::string _basename{ "" }; // Tile name + TileInfo _format{}; // Tile format + std::string _basename{""}; // Tile name }; // A tile is a collection of variables used to express a 2D data. The variables are vectors in the GPU context. @@ -329,7 +326,7 @@ public: t.type.size = 1; // Check required because if the width has only one element, we cannot use .s0 - if(_format.w != 1) + if (_format.w != 1) { // Automatic broadcasting t.str += ".s" + std::to_string(x); @@ -360,10 +357,10 @@ public: t.type.dt = _format.dt; t.type.size = width; - if(_format.w != 1) + if (_format.w != 1) { t.str += ".s"; - for(int i = 0; i < width; ++i) + for (int i = 0; i < width; ++i) { t.str += to_scalar_hex(x_start + i); } @@ -374,7 +371,7 @@ public: std::vector underlying_source_variables() const override { std::vector vars; - for(int32_t y = 0; y < _format.h; ++y) + for (int32_t y = 0; y < _format.h; ++y) { ValueAsString t; t.str = build_variable_name(y); @@ -401,7 +398,7 @@ private: { std::string var_name = _basename; - if(_format.h == 1) + if (_format.h == 1) { return var_name; } @@ -416,7 +413,7 @@ private: std::string to_scalar_hex(int32_t x) const { - switch(x) + switch (x) { case 0: case 1: @@ -461,9 +458,9 @@ public: _data = std::vector>(_format.h, std::vector(_format.w)); - for(int32_t y = 0; y < _format.h; ++y) + for (int32_t y = 0; y < _format.h; ++y) { - for(int32_t x = 0; x < _format.w; ++x) + for (int32_t x = 0; x < _format.w; ++x) { _data[y][x] = in[y][x]; } @@ -501,20 +498,20 @@ public: t.type.dt = _format.dt; t.type.size = width; - if(width > 1) + if (width > 1) { t.str += "((" + get_cl_data_type(_format.dt, width) + ")("; } int32_t x = x_start; - for(; x < width - 1; ++x) + for (; x < width - 1; ++x) { t.str += scalar(x, y).str; t.str += ", "; } t.str += scalar(x, y).str; - if(width > 1) + if (width > 1) { t.str += "))"; } @@ -526,9 +523,9 @@ public: { std::vector vars; - for(int32_t y = 0; y < _format.h; ++y) + for (int32_t y = 0; y < _format.h; ++y) { - for(int32_t x = 0; x < _format.w; ++x) + for (int32_t x = 0; x < _format.w; ++x) { ValueAsString t; t.str = _data[y][x]; @@ -572,7 +569,7 @@ enum class TensorComponentGroup : int32_t inline std::string to_string(TensorComponentType x) { - switch(x) + switch (x) { case TensorComponentType::Unknown: return "Unknown"; @@ -672,7 +669,7 @@ enum class GpuTensorStorage : int32_t inline GpuTensorStorage to_gpu_tensor_storage(TensorStorageType s) { - switch(s) + switch (s) { case TensorStorageType::Unknown: return GpuTensorStorage::Unknown; @@ -694,7 +691,7 @@ inline GpuTensorStorage to_gpu_tensor_storage(TensorStorageType s) inline TensorStorageType to_tensor_storage(GpuTensorStorage s) { - switch(s) + switch (s) { case GpuTensorStorage::Unknown: return TensorStorageType::Unknown; @@ -755,23 +752,23 @@ public: // Methods to override std::string component(TensorComponentType x) override { - if((static_cast(x) & static_cast(TensorComponentGroup::Constant))) + if ((static_cast(x) & static_cast(TensorComponentGroup::Constant))) { int32_t idx = static_cast(x) & static_cast(TensorComponentIndex::IndexMask); return std::to_string(idx - 1); } - if(_return_by_value_when_possible) + if (_return_by_value_when_possible) { - if((static_cast(x) & static_cast(TensorComponentGroup::Dimension))) + if ((static_cast(x) & static_cast(TensorComponentGroup::Dimension))) { int32_t idx = static_cast(x) & static_cast(TensorComponentIndex::IndexMask); return std::to_string(_format.shape[idx]); } - if((static_cast(x) & static_cast(TensorComponentGroup::FoldedDimension))) + if ((static_cast(x) & static_cast(TensorComponentGroup::FoldedDimension))) { - switch(x) + switch (x) { case TensorComponentType::Dim1xDim2: return std::to_string(_format.shape[1] * _format.shape[2]); @@ -784,7 +781,7 @@ public: } } - if(std::find(_components_required.begin(), _components_required.end(), x) == _components_required.end()) + if (std::find(_components_required.begin(), _components_required.end(), x) == _components_required.end()) { _components_required.push_back(x); } @@ -804,7 +801,7 @@ public: std::string storage(GpuTensorStorage x) override { - if(std::find(_storage_required.begin(), _storage_required.end(), x) == _storage_required.end()) + if (std::find(_storage_required.begin(), _storage_required.end(), x) == _storage_required.end()) { _storage_required.push_back(x); } @@ -814,7 +811,7 @@ public: std::string storage_type_declaration(GpuTensorStorage x) const override { - switch(x) + switch (x) { case GpuTensorStorage::BufferUint8Ptr: return "__global uchar*"; @@ -848,7 +845,7 @@ private: { std::string var_name = _basename; - switch(x) + switch (x) { case GpuTensorStorage::BufferUint8Ptr: return var_name + "_ptr"; @@ -870,7 +867,7 @@ private: { std::string var_name = _basename; - switch(x) + switch (x) { case TensorComponentType::OffsetFirstElement: return var_name + "_offset_first_element"; @@ -900,9 +897,9 @@ private: return var_name; } - bool _return_by_value_when_possible{ false }; - std::vector _storage_required{}; - std::vector _components_required{}; + bool _return_by_value_when_possible{false}; + std::vector _storage_required{}; + std::vector _components_required{}; }; /** @@ -930,16 +927,16 @@ public: struct RegistryTileTableEntry { - RegistryLevel registry_level{ 0 }; - std::unique_ptr tile_object{ nullptr }; + RegistryLevel registry_level{0}; + std::unique_ptr tile_object{nullptr}; }; struct RegistryTileTypeTableEntry { - RegistryTileType tile_type{ RegistryTileType::Tile }; + RegistryTileType tile_type{RegistryTileType::Tile}; RegistryTileName tile_name{}; - RegistryIdSpace registry_idspace{ 0 }; - RegistryLevel registry_level{ 0 }; + RegistryIdSpace registry_idspace{0}; + RegistryLevel registry_level{0}; }; using RegistryTileTable = std::map>; @@ -1002,7 +999,7 @@ public: auto it = _frags.begin(); - while(it != _frags.end()) + while (it != _frags.end()) { x.push_back(it->first); @@ -1026,7 +1023,7 @@ public: // First check whether a tile with the same name exists IVectorTile *result = (*this)[key_var_name]; assert(result == nullptr); - if(result == nullptr) + if (result == nullptr) { std::unique_ptr tile = std::make_unique(var_name, format); @@ -1058,7 +1055,7 @@ public: // First check whether a tile with the same name exists IVectorTile *result = (*this)[key_var_name]; assert(result == nullptr); - if(result == nullptr) + if (result == nullptr) { std::unique_ptr tile = std::make_unique(var_name, format); _frags[key_IdSpace][key_var_name].tile_object = std::move(tile); @@ -1090,7 +1087,7 @@ public: // First check whether a tile with the same name exists IVectorTile *result = (*this)[key_var_name]; assert(result == nullptr); - if(result == nullptr) + if (result == nullptr) { std::unique_ptr tile = std::make_unique(in, dt); _frags[key_IdSpace][key_var_name].tile_object = std::move(tile); @@ -1123,7 +1120,7 @@ public: // First check whether a tile with the same name exists IVectorTile *result = (*this)[key_var_name]; assert(result == nullptr); - if(result == nullptr) + if (result == nullptr) { std::unique_ptr tile = std::make_unique(in, dt); _frags[key_IdSpace][key_var_name].tile_object = std::move(tile); @@ -1153,10 +1150,10 @@ public: IVectorTile *result = nullptr; auto search_IdSpace = _frags.find(key_IdSpace); - if(search_IdSpace != _frags.end()) + if (search_IdSpace != _frags.end()) { auto search_tile = _frags[key_IdSpace].find(key_var_name); - if(search_tile != _frags[key_IdSpace].end()) + if (search_tile != _frags[key_IdSpace].end()) { result = search_tile->second.tile_object.get(); assert(result != nullptr); @@ -1224,7 +1221,7 @@ public: std::map::iterator it = _frag_types[IdSpace].begin(); - while(it != _frag_types[IdSpace].end()) + while (it != _frag_types[IdSpace].end()) { // The following line should be enabled. However, we cannot at this stage // because it used to retrieve the output tile produced by each component. @@ -1259,9 +1256,9 @@ public: // Remove all variables in the local scope std::map::iterator it = _frags[_IdSpace].begin(); - while(it != _frags[_IdSpace].end()) + while (it != _frags[_IdSpace].end()) { - if(it->second.registry_level == _registry_level) + if (it->second.registry_level == _registry_level) { it = _frags[_IdSpace].erase(it); } @@ -1273,9 +1270,9 @@ public: std::map::iterator it_type = _frag_types[_IdSpace].begin(); - while(it_type != _frag_types[_IdSpace].end()) + while (it_type != _frag_types[_IdSpace].end()) { - if(it_type->second.registry_level == _registry_level) + if (it_type->second.registry_level == _registry_level) { it_type = _frag_types[_IdSpace].erase(it_type); } @@ -1302,7 +1299,7 @@ private: std::string generate_tile_name(const std::string &name) { assert(_IdSpace >= 0); - if(_registry_level == 0) + if (_registry_level == 0) { return "_G" + std::to_string(_IdSpace) + "_" + name; } @@ -1314,10 +1311,10 @@ private: RegistryTileTable _frags{}; RegistryTileTypeTable _frag_types{}; - RegistryLevel _registry_level{ 0 }; - RegistryIdSpace _IdSpace{ -1 }; - int32_t _anonymous_frag_count{ 0 }; // Counter used to create the anonymous tiles - GpuTargetLanguage _language{ GpuTargetLanguage::Unknown }; // Gpu programming language + RegistryLevel _registry_level{0}; + RegistryIdSpace _IdSpace{-1}; + int32_t _anonymous_frag_count{0}; // Counter used to create the anonymous tiles + GpuTargetLanguage _language{GpuTargetLanguage::Unknown}; // Gpu programming language }; using TensorEntry = std::unique_ptr; @@ -1388,7 +1385,7 @@ public: auto it = _refs.begin(); - while(it != _refs.end()) + while (it != _refs.end()) { x.push_back(it->first); @@ -1420,12 +1417,12 @@ public: // Check whether a tensor with that tensorID exists auto result = _tensor_arguments.find(tensor_id); - if(result == _tensor_arguments.end()) + if (result == _tensor_arguments.end()) { // It means that we haven't added a tensor with that tensor_id yet. Create a IGpuTensorArgument before creating the reference - std::unique_ptr arg = std::make_unique(var_name, x, - return_by_value_when_possible); - _tensor_arguments[tensor_id] = std::move(arg); + std::unique_ptr arg = + std::make_unique(var_name, x, return_by_value_when_possible); + _tensor_arguments[tensor_id] = std::move(arg); } _refs[key_IdSpace][key_var_name] = tensor_id; @@ -1445,15 +1442,15 @@ public: IGpuTensorArgument *result = nullptr; auto search_IdSpace = _refs.find(key_IdSpace); - if(search_IdSpace != _refs.end()) + if (search_IdSpace != _refs.end()) { auto search_tensor_id = _refs[key_IdSpace].find(key_var_name); - if(search_tensor_id != _refs[key_IdSpace].end()) + if (search_tensor_id != _refs[key_IdSpace].end()) { const int32_t tensor_id = search_tensor_id->second; auto search_tensor_argument = _tensor_arguments.find(tensor_id); - if(search_tensor_argument != _tensor_arguments.end()) + if (search_tensor_argument != _tensor_arguments.end()) { result = search_tensor_argument->second.get(); } @@ -1475,7 +1472,7 @@ public: auto it = _tensor_arguments.begin(); - while(it != _tensor_arguments.end()) + while (it != _tensor_arguments.end()) { args.push_back(it->second.get()); it++; @@ -1499,7 +1496,7 @@ public: auto search_IdSpace = _refs.find(key_IdSpace); - if(search_IdSpace != _refs.end()) + if (search_IdSpace != _refs.end()) { auto search_tensor_id = _refs[key_IdSpace].find(key_var_name); @@ -1527,7 +1524,7 @@ public: auto search_IdSpace = _refs.find(key_IdSpace); - if(search_IdSpace != _refs.end()) + if (search_IdSpace != _refs.end()) { auto search_tensor_id = _refs[key_IdSpace].find(key_var_name); @@ -1550,8 +1547,8 @@ private: std::map _tensor_arguments{}; std::map> _refs{}; - int32_t _IdSpace{ -1 }; - GpuTargetLanguage _language{ GpuTargetLanguage::Unknown }; // Gpu programming language + int32_t _IdSpace{-1}; + GpuTargetLanguage _language{GpuTargetLanguage::Unknown}; // Gpu programming language }; enum class OpType : int32_t @@ -1563,7 +1560,7 @@ enum class OpType : int32_t inline std::string to_string(AssignmentOp op) { - switch(op) + switch (op) { case AssignmentOp::Decrement: return "-="; @@ -1577,7 +1574,7 @@ inline std::string to_string(AssignmentOp op) inline std::string to_string(UnaryOp op) { - switch(op) + switch (op) { case UnaryOp::LogicalNot: return "!"; @@ -1593,7 +1590,7 @@ inline std::string to_string(UnaryOp op) inline std::string to_string(BinaryOp op) { - switch(op) + switch (op) { case BinaryOp::Add: return "+"; @@ -1629,7 +1626,7 @@ inline std::string to_string(BinaryOp op) inline std::string binary_op_string(BinaryOp op) { - switch(op) + switch (op) { case BinaryOp::Add: return "add"; @@ -1698,13 +1695,12 @@ struct ScalarTileCoord { } - ScalarTileCoord(int32_t x0, int32_t y0) - : x(x0), y(y0) + ScalarTileCoord(int32_t x0, int32_t y0) : x(x0), y(y0) { } - int32_t x{ -1 }; - int32_t y{ -1 }; + int32_t x{-1}; + int32_t y{-1}; }; /** @@ -1768,7 +1764,7 @@ public: private: std::string _str{}; - OperandType _type{ OperandType::Unknown }; + OperandType _type{OperandType::Unknown}; ScalarTileCoord _coord{}; }; @@ -1778,16 +1774,15 @@ struct GpuSampler { GpuSampler() = default; - TensorSamplerFormat format{ TensorSamplerFormat::Unknown }; - GpuSamplerTensorStorage storage{ GpuSamplerTensorStorage::Unknown }; - TensorSamplerAddressModeX address_mode_x{ TensorSamplerAddressModeX::Unknown }; - TensorSamplerAddressModeY address_mode_y{ TensorSamplerAddressModeY::Unknown }; - TensorSamplerAddressModeZ address_mode_z{ TensorSamplerAddressModeZ::Unknown }; + TensorSamplerFormat format{TensorSamplerFormat::Unknown}; + GpuSamplerTensorStorage storage{GpuSamplerTensorStorage::Unknown}; + TensorSamplerAddressModeX address_mode_x{TensorSamplerAddressModeX::Unknown}; + TensorSamplerAddressModeY address_mode_y{TensorSamplerAddressModeY::Unknown}; + TensorSamplerAddressModeZ address_mode_z{TensorSamplerAddressModeZ::Unknown}; }; -inline GpuSampler -create_simple_sampler(const TensorInfo *tensor_info_id, GpuSampler sampler, int32_t step_x, int32_t step_y, - int32_t step_z) +inline GpuSampler create_simple_sampler( + const TensorInfo *tensor_info_id, GpuSampler sampler, int32_t step_x, int32_t step_y, int32_t step_z) { CKW_UNUSED(step_x, step_y, step_z); @@ -1804,7 +1799,7 @@ create_simple_sampler(const TensorInfo *tensor_info_id, GpuSampler sampler, int3 int32_t dim_y = 0; int32_t dim_z = 0; - switch(sampler.format) + switch (sampler.format) { case TensorSamplerFormat::C_W_H: dim_x = tensor[0]; @@ -1822,19 +1817,19 @@ create_simple_sampler(const TensorInfo *tensor_info_id, GpuSampler sampler, int3 break; } - if(dim_x == 1) + if (dim_x == 1) { assert(step_x == 1); dst_sampler.address_mode_x = TensorSamplerAddressModeX::None; } - if(dim_y == 1) + if (dim_y == 1) { assert(step_y == 1); dst_sampler.address_mode_y = TensorSamplerAddressModeY::None; } - if(dim_z == 1) + if (dim_z == 1) { assert(step_z == 1); dst_sampler.address_mode_z = TensorSamplerAddressModeZ::None; @@ -1858,8 +1853,12 @@ public: * @param[in] step_y Increment step in the Y direction. Not necessarily it is the same of m0 of tile! * @param[in] step_z Increment step in the Z direction. Not necessarily it is the same of d0 of tile! */ - void initialize(const TensorInfo *tensor_info_id, GpuSamplerTensorStorage tensor_storage, - TensorSamplerFormat tensor_format, int32_t step_x, int32_t step_y, int32_t step_z) + void initialize(const TensorInfo *tensor_info_id, + GpuSamplerTensorStorage tensor_storage, + TensorSamplerFormat tensor_format, + int32_t step_x, + int32_t step_y, + int32_t step_z) { assert(_is_initialized == false); @@ -1908,13 +1907,13 @@ private: sampler.address_mode_z = TensorSamplerAddressModeZ::None; // In the case of texture, we do not need any special checks at the border - if(tensor_storage == GpuSamplerTensorStorage::BufferUint8Ptr) + if (tensor_storage == GpuSamplerTensorStorage::BufferUint8Ptr) { int32_t dim_x = 0; int32_t dim_y = 0; int32_t dim_z = 0; - switch(tensor_format) + switch (tensor_format) { case TensorSamplerFormat::C_W_H: dim_x = tensor[0]; @@ -1932,17 +1931,17 @@ private: break; } - if((dim_x % _step_x) != 0 && dim_x != 1) + if ((dim_x % _step_x) != 0 && dim_x != 1) { sampler.address_mode_x = TensorSamplerAddressModeX::OverlappingMin; } - if((dim_y % _step_y) != 0 && dim_y != 1) + if ((dim_y % _step_y) != 0 && dim_y != 1) { sampler.address_mode_y = TensorSamplerAddressModeY::ClampToMaxEdgeOnly; } - if((dim_z % _step_z) != 0 && dim_z != 1) + if ((dim_z % _step_z) != 0 && dim_z != 1) { sampler.address_mode_z = TensorSamplerAddressModeZ::ClampToMaxEdgeOnly; } @@ -1952,11 +1951,11 @@ private: } GpuSampler _sampler{}; // GpuSampler - int32_t _step_x{ 1 }; - int32_t _step_y{ 1 }; - int32_t _step_z{ 1 }; - const TensorInfo *_tensor_info_id{ nullptr }; - bool _is_initialized{ false }; + int32_t _step_x{1}; + int32_t _step_y{1}; + int32_t _step_z{1}; + const TensorInfo *_tensor_info_id{nullptr}; + bool _is_initialized{false}; }; /** @@ -1965,8 +1964,7 @@ private: class TensorOperand { public: - TensorOperand(const std::string &val, GpuSampler sampler) - : _str(val), _sampler(sampler) + TensorOperand(const std::string &val, GpuSampler sampler) : _str(val), _sampler(sampler) { } @@ -2050,9 +2048,9 @@ private: struct LWS { - int32_t x{ 1 }; - int32_t y{ 1 }; - int32_t z{ 1 }; + int32_t x{1}; + int32_t y{1}; + int32_t z{1}; }; /** @@ -2062,8 +2060,7 @@ struct LWS class OperandUnpacker { public: - OperandUnpacker(GpuTileRegistry &tiles, GpuTensorArgumentRegistry &arguments) - : _tiles(tiles), _arguments(arguments) + OperandUnpacker(GpuTileRegistry &tiles, GpuTensorArgumentRegistry &arguments) : _tiles(tiles), _arguments(arguments) { // Increase the level of the stack to allocate possible temporary tiles _tiles.increment_registry_level(); @@ -2078,26 +2075,26 @@ public: IVectorTile *unpack(const Operand &src) { // Get the tile - if(src.type() == OperandType::Tile) + if (src.type() == OperandType::Tile) { assert(_tiles.has_tile(src.value())); return _tiles[src.value()]; } // Create an anonymous tile with a constant - else if(static_cast(src.type()) & 0x00001000) + else if (static_cast(src.type()) & 0x00001000) { - if(src.type() == OperandType::ScalarTile) + if (src.type() == OperandType::ScalarTile) { ScalarTileCoord coord = src.scalar_tile_coordinate(); assert(_tiles.has_tile(src.value())); assert(coord.x >= 0); assert(coord.y >= 0); auto val = _tiles[src.value()]->scalar(coord.x, coord.y); - return _tiles.insert({ { { val.str } } }, val.type.dt); + return _tiles.insert({{{val.str}}}, val.type.dt); } else { - return _tiles.insert({ { { src.value() } } }, to_tile_data_type(src.type())); + return _tiles.insert({{{src.value()}}}, to_tile_data_type(src.type())); } } // Create an anonymous tile with the tensor component @@ -2107,7 +2104,7 @@ public: auto x = _arguments[src.value()]; const std::string val = x->component(to_tensor_component(src.type())); const DataType dt = x->component_data_type(); - return _tiles.insert({ { { val } } }, dt); + return _tiles.insert({{{val}}}, dt); } } @@ -2119,7 +2116,7 @@ private: TensorComponentType to_tensor_component(OperandType x) { - switch(x) + switch (x) { case OperandType::TensorDim0: return TensorComponentType::Dim0; @@ -2163,8 +2160,7 @@ private: class TensorOperandUnpacker { public: - TensorOperandUnpacker(GpuTensorArgumentRegistry &arguments) - : _arguments(arguments){}; + TensorOperandUnpacker(GpuTensorArgumentRegistry &arguments) : _arguments(arguments){}; IGpuTensorArgument *unpack(const TensorOperand &src) { @@ -2191,9 +2187,11 @@ struct GpuKernel std::string config_id{}; // Unique id, required for the tuning stage std::vector list_lws{}; // LWS to test, required for the tuning stage // Dispatch stage - GpuOutputSampler output_sampler{}; // GpuOutputSampler, required for the dispatch stage - std::vector> list_tensor_storages; // List of tensor storages, required for the dispatch stage - std::vector> list_tensor_components; // List of tensor components (width, stride,..), required for the dispatch stage) + GpuOutputSampler output_sampler{}; // GpuOutputSampler, required for the dispatch stage + std::vector> + list_tensor_storages; // List of tensor storages, required for the dispatch stage + std::vector> + list_tensor_components; // List of tensor components (width, stride,..), required for the dispatch stage) }; // Generate all extension pragmas (hardcoded for now) @@ -2234,13 +2232,13 @@ inline std::string generate_code(GpuKernelWriterDataHolder &in, const std::strin auto tensor_args = in.arguments.tensor_argument_declarations(); - for(auto &i : tensor_args) + for (auto &i : tensor_args) { // For each tensor used, get the storage and tensor components auto storages = i->storage_declarations(); auto components = i->component_declarations(); - for(auto &y : storages) + for (auto &y : storages) { std::string str; str += i->storage_type_declaration(y); @@ -2249,7 +2247,7 @@ inline std::string generate_code(GpuKernelWriterDataHolder &in, const std::strin arg_str.push_back(str); } - for(auto &y : components) + for (auto &y : components) { std::string str; str += i->component_type_declaration(); @@ -2259,10 +2257,10 @@ inline std::string generate_code(GpuKernelWriterDataHolder &in, const std::strin } } - for(size_t i = 0; i < arg_str.size(); ++i) + for (size_t i = 0; i < arg_str.size(); ++i) { code += arg_str[i]; - if(i + 1 < arg_str.size()) + if (i + 1 < arg_str.size()) { code += ",\n"; } @@ -2284,13 +2282,12 @@ inline std::string generate_code(GpuKernelWriterDataHolder &in, const std::strin class GpuTensor3dMapper { public: - GpuTensor3dMapper(IGpuTensorArgument *tensor, GpuSampler sampler) - : _sampler(sampler), _tensor(tensor){}; + GpuTensor3dMapper(IGpuTensorArgument *tensor, GpuSampler sampler) : _sampler(sampler), _tensor(tensor){}; std::string tensor_component_x() const { const auto format = _sampler.format; - switch(format) + switch (format) { case TensorSamplerFormat::C_WH_1: case TensorSamplerFormat::C_W_H: @@ -2305,7 +2302,7 @@ public: std::string tensor_component_y() const { const auto format = _sampler.format; - switch(format) + switch (format) { case TensorSamplerFormat::C_WH_1: return _tensor->component(TensorComponentType::Dim1xDim2); @@ -2321,7 +2318,7 @@ public: std::string tensor_component_z() const { const auto format = _sampler.format; - switch(format) + switch (format) { case TensorSamplerFormat::C_WH_1: return "1"; @@ -2337,7 +2334,7 @@ public: std::string tensor_component_stride_y() const { const auto format = _sampler.format; - switch(format) + switch (format) { case TensorSamplerFormat::C_WH_1: case TensorSamplerFormat::C_W_H: @@ -2352,7 +2349,7 @@ public: std::string tensor_component_stride_z() const { const auto format = _sampler.format; - switch(format) + switch (format) { case TensorSamplerFormat::C_WH_1: return "0"; @@ -2368,7 +2365,7 @@ public: std::string tensor_component_stride_batch() const { const auto format = _sampler.format; - switch(format) + switch (format) { case TensorSamplerFormat::C_WH_1: case TensorSamplerFormat::C_W_H: @@ -2384,7 +2381,7 @@ public: { auto t = _tensor->format(); const auto format = _sampler.format; - switch(format) + switch (format) { case TensorSamplerFormat::C_WH_1: case TensorSamplerFormat::C_W_H: @@ -2400,7 +2397,7 @@ public: { auto t = _tensor->format(); const auto format = _sampler.format; - switch(format) + switch (format) { case TensorSamplerFormat::C_WH_1: return (t.shape[1] * t.shape[2]) == 1; @@ -2417,7 +2414,7 @@ public: { auto t = _tensor->format(); const auto format = _sampler.format; - switch(format) + switch (format) { case TensorSamplerFormat::C_WH_1: return true; @@ -2434,7 +2431,7 @@ public: { auto t = _tensor->format(); const auto format = _sampler.format; - switch(format) + switch (format) { case TensorSamplerFormat::C_WH_1: case TensorSamplerFormat::C_W_H: @@ -2463,7 +2460,7 @@ private: struct GpuKernelWriterAttribute { - bool return_tensor_component_by_value{ false }; + bool return_tensor_component_by_value{false}; }; enum class RoundingMode @@ -2489,7 +2486,8 @@ public: virtual void declare_tile(const std::string &name, const TileInfo &info) = 0; - virtual void declare_const_tile(const std::string &name, const std::vector> &in, DataType dt) = 0; + virtual void + declare_const_tile(const std::string &name, const std::vector> &in, DataType dt) = 0; virtual void write_text(const std::string &x) = 0; @@ -2498,48 +2496,82 @@ public: virtual void compound_statement_end() = 0; // Operations - virtual void op_get_global_id(const Operand &dst_var, int32_t dim) = 0; + virtual void op_get_global_id(const Operand &dst_var, int32_t dim) = 0; - virtual void op_get_global_coord(const Operand &dst, const Operand &step, const TensorOperand &tensor, int32_t dim) = 0; + virtual void + op_get_global_coord(const Operand &dst, const Operand &step, const TensorOperand &tensor, int32_t dim) = 0; - virtual void op_get_global_batch(const Operand &dst, const TensorOperand &tensor) = 0; + virtual void op_get_global_batch(const Operand &dst, const TensorOperand &tensor) = 0; - virtual void op_get_global_size(const Operand &dst_var, int32_t dim) = 0; + virtual void op_get_global_size(const Operand &dst_var, int32_t dim) = 0; - virtual void op_unary_expression(const Operand &dst, UnaryOp op, const Operand &src) = 0; + virtual void op_unary_expression(const Operand &dst, UnaryOp op, const Operand &src) = 0; - virtual void op_binary_expression(const Operand &dst, const Operand &lhs, BinaryOp op, const Operand &rhs) = 0; + virtual void op_binary_expression(const Operand &dst, const Operand &lhs, BinaryOp op, const Operand &rhs) = 0; - virtual void op_assign(const Operand &dst_name, const Operand &src_name) = 0; + virtual void op_assign(const Operand &dst_name, const Operand &src_name) = 0; - virtual void op_unary_elementwise_function(const Operand &dst_name, UnaryFunction func, const Operand &src_name) = 0; + virtual void + op_unary_elementwise_function(const Operand &dst_name, UnaryFunction func, const Operand &src_name) = 0; - virtual void op_binary_elementwise_function(const Operand &dst_name, BinaryFunction func, const Operand &first_name, const Operand &second_name) = 0; + virtual void op_binary_elementwise_function(const Operand &dst_name, + BinaryFunction func, + const Operand &first_name, + const Operand &second_name) = 0; - virtual void op_ternary_elementwise_function(const Operand &dst_name, TernaryFunction func, const Operand &first_name, const Operand &second_name, const Operand &third_name) = 0; + virtual void op_ternary_elementwise_function(const Operand &dst_name, + TernaryFunction func, + const Operand &first_name, + const Operand &second_name, + const Operand &third_name) = 0; - virtual void op_if_header(const Operand &lhs, BinaryOp op, const Operand &rhs) = 0; + virtual void op_if_header(const Operand &lhs, BinaryOp op, const Operand &rhs) = 0; - virtual void op_else_if_header(const Operand &lhs, BinaryOp op, const Operand &rhs) = 0; + virtual void op_else_if_header(const Operand &lhs, BinaryOp op, const Operand &rhs) = 0; - virtual void op_else_header() = 0; + virtual void op_else_header() = 0; - virtual void op_for_loop_header(const Operand &var_name, BinaryOp cond_op, const Operand &cond_value, const Operand &update_var, AssignmentOp update_op, const Operand &update_value) = 0; + virtual void op_for_loop_header(const Operand &var_name, + BinaryOp cond_op, + const Operand &cond_value, + const Operand &update_var, + AssignmentOp update_op, + const Operand &update_value) = 0; - virtual void op_load_indirect(const TensorOperand &tensor, const Operand &dst, const Operand &x, const Operand &y_indirect, const Operand &z, const Operand &b = Operand("0", OperandType::ScalarInt32)) = 0; + virtual void op_load_indirect(const TensorOperand &tensor, + const Operand &dst, + const Operand &x, + const Operand &y_indirect, + const Operand &z, + const Operand &b = Operand("0", OperandType::ScalarInt32)) = 0; - virtual void op_load_immediate(const TensorOperand &tensor, const Operand &dst, const Operand &x, const Operand &y, const Operand &z, const Operand &b = Operand("0", OperandType::ScalarInt32), const Operand &dilation_y = Operand("1", OperandType::ScalarInt32)) = 0; + virtual void op_load_immediate(const TensorOperand &tensor, + const Operand &dst, + const Operand &x, + const Operand &y, + const Operand &z, + const Operand &b = Operand("0", OperandType::ScalarInt32), + const Operand &dilation_y = Operand("1", OperandType::ScalarInt32)) = 0; - virtual void op_store_immediate(const TensorOperand &tensor, const Operand &src, const Operand &x, const Operand &y, const Operand &z, const Operand &b = Operand("0", OperandType::ScalarInt32)) = 0; + virtual void op_store_immediate(const TensorOperand &tensor, + const Operand &src, + const Operand &x, + const Operand &y, + const Operand &z, + const Operand &b = Operand("0", OperandType::ScalarInt32)) = 0; - virtual void op_cast_expression(const Operand &dst, const Operand &src, ConvertPolicy policy) = 0; + virtual void op_cast_expression(const Operand &dst, const Operand &src, ConvertPolicy policy) = 0; - virtual void op_return() = 0; + virtual void op_return() = 0; // Utils // It is the process of converting - virtual void util_get_indirect_buffer(const Operand &dst, const TensorOperand &tensor, const Operand &x, - const Operand &y, const Operand &x_off, const Operand &y_off) = 0; + virtual void util_get_indirect_buffer(const Operand &dst, + const TensorOperand &tensor, + const Operand &x, + const Operand &y, + const Operand &x_off, + const Operand &y_off) = 0; }; enum class GpuLoadStoreType @@ -2586,12 +2618,11 @@ public: ClLoadStoreBufferHelperWriter &operator=(const ClLoadStoreBufferHelperWriter &) = default; - static bool - validate(IGpuKernelWriter *x, GpuTensor3dMapper mapper, GpuLoadStoreType type, IVectorTile *dst) + static bool validate(IGpuKernelWriter *x, GpuTensor3dMapper mapper, GpuLoadStoreType type, IVectorTile *dst) { CKW_UNUSED(x, type, dst); - if(mapper.gpu_sampler().storage != GpuSamplerTensorStorage::BufferUint8Ptr) + if (mapper.gpu_sampler().storage != GpuSamplerTensorStorage::BufferUint8Ptr) { return false; } @@ -2675,10 +2706,10 @@ public: out_of_bound_finalize_y(dst); // The left over load/store will be written in the finalize stage - if(_ls_width_part.size() != 0) + if (_ls_width_part.size() != 0) { int32_t w = 0; - for(auto &p : _ls_width_part) + for (auto &p : _ls_width_part) { const std::string dst0 = _dst->vector(w, p, idx_y).str; const std::string coord_x = _coord_x + " + " + std::to_string(w); @@ -2698,8 +2729,8 @@ public: } private: - IVectorTile *_dst{ nullptr }; - int32_t _ls_width_full{ 0 }; + IVectorTile *_dst{nullptr}; + int32_t _ls_width_full{0}; std::vector _ls_width_part{}; std::vector, std::string>> _leftovers_x{}; std::string _coord_x{}; @@ -2709,13 +2740,13 @@ private: void out_of_bound_initialize_x(std::string &coord) { - if(_mapper.gpu_sampler().address_mode_x == TensorSamplerAddressModeX::OverlappingMin) + if (_mapper.gpu_sampler().address_mode_x == TensorSamplerAddressModeX::OverlappingMin) { auto tensor_format = _mapper.tensor_argument()->format(); auto shape = tensor_format.shape; _ls_width_part = decompose_leftover_ls_vector_width(shape[0] % _ls_width_full); - if(_ls_width_part.size() != 0) + if (_ls_width_part.size() != 0) { _writer->write_text("if(" + coord + " > 0)\n"); _writer->compound_statement_begin(); @@ -2725,16 +2756,16 @@ private: void out_of_bound_finalize_x() { - if(_mapper.gpu_sampler().address_mode_x == TensorSamplerAddressModeX::OverlappingMin) + if (_mapper.gpu_sampler().address_mode_x == TensorSamplerAddressModeX::OverlappingMin) { - if(_ls_width_part.size() != 0) + if (_ls_width_part.size() != 0) { _writer->compound_statement_end(); _writer->write_text("else\n"); _writer->compound_statement_begin(); out_of_bound_initialize_z(_coord_orig_z); - for(auto &i : _leftovers_x) + for (auto &i : _leftovers_x) { out_of_bound_initialize_y(i.first.second); _writer->write_text(i.second); @@ -2753,7 +2784,7 @@ private: const auto address_mode_y = _mapper.gpu_sampler().address_mode_y; - switch(address_mode_y) + switch (address_mode_y) { case TensorSamplerAddressModeY::Skip: case TensorSamplerAddressModeY::ClampToBorder: @@ -2799,7 +2830,7 @@ private: { const auto address_mode_y = _mapper.gpu_sampler().address_mode_y; - switch(address_mode_y) + switch (address_mode_y) { case TensorSamplerAddressModeY::ClampToBorder: case TensorSamplerAddressModeY::ClampToBorderMaxEdgeOnly: @@ -2816,7 +2847,7 @@ private: assert(false); } - switch(address_mode_y) + switch (address_mode_y) { case TensorSamplerAddressModeY::ClampToBorder: case TensorSamplerAddressModeY::ClampToBorderMinEdgeOnly: @@ -2841,7 +2872,7 @@ private: const auto address_mode_z = _mapper.gpu_sampler().address_mode_z; - switch(address_mode_z) + switch (address_mode_z) { case TensorSamplerAddressModeZ::Skip: max = _mapper.tensor_component_z(); @@ -2880,7 +2911,7 @@ private: { const auto address_mode_z = _mapper.gpu_sampler().address_mode_z; - switch(address_mode_z) + switch (address_mode_z) { case TensorSamplerAddressModeZ::Skip: case TensorSamplerAddressModeZ::SkipMinEdgeOnly: @@ -2899,7 +2930,7 @@ private: { std::vector x; - switch(ls_leftover_vector_width) + switch (ls_leftover_vector_width) { case 0: break; @@ -2961,13 +2992,13 @@ private: return x; } - std::string to_ls_buffer(GpuLoadStoreType type, int32_t vector_width, const std::string &data, - const std::string &address) + std::string + to_ls_buffer(GpuLoadStoreType type, int32_t vector_width, const std::string &data, const std::string &address) { - switch(type) + switch (type) { case GpuLoadStoreType::Load: - if(vector_width != 1) + if (vector_width != 1) { return data + " = vload" + std::to_string(vector_width) + "(0, " + address + ")"; } @@ -2977,7 +3008,7 @@ private: } break; case GpuLoadStoreType::Store: - if(vector_width != 1) + if (vector_width != 1) { return "vstore" + std::to_string(vector_width) + "(" + data + ", 0, " + address + ")"; } @@ -2993,25 +3024,25 @@ private: } } - std::string to_ls_buffer_address(const std::string &x, const std::string &y, const std::string &z, - const std::string &b) const + std::string + to_ls_buffer_address(const std::string &x, const std::string &y, const std::string &z, const std::string &b) const { - auto tensor_storage = static_cast(_mapper.gpu_sampler().storage); + auto tensor_storage = static_cast(_mapper.gpu_sampler().storage); assert(tensor_storage == GpuTensorStorage::BufferUint8Ptr); - const std::string ptr_buf = _mapper.tensor_argument()->storage(tensor_storage); - const std::string dst_type = get_cl_data_type(_dst->format().dt, 1); + const std::string ptr_buf = _mapper.tensor_argument()->storage(tensor_storage); + const std::string dst_type = get_cl_data_type(_dst->format().dt, 1); std::string address; address += "(__global "; address += dst_type; address += "*)("; address += ptr_buf; - if(x != "0" && (_mapper.is_one_component_x() != true)) + if (x != "0" && (_mapper.is_one_component_x() != true)) { address += " + ("; address += x + ") * sizeof(" + dst_type + ")"; } - if(y != "0") + if (y != "0") { const std::string stride_y = _mapper.tensor_component_stride_y(); address += " + ("; @@ -3019,7 +3050,7 @@ private: address += " * "; address += stride_y; } - if(z != "0" && (_mapper.is_one_component_z() != true)) + if (z != "0" && (_mapper.is_one_component_z() != true)) { const std::string stride_z = _mapper.tensor_component_stride_z(); address += " + ("; @@ -3027,7 +3058,7 @@ private: address += " * "; address += stride_z; } - if(b != "0" && (_mapper.is_one_component_batch() != true)) + if (b != "0" && (_mapper.is_one_component_batch() != true)) { const std::string stride_b = _mapper.tensor_component_stride_batch(); address += " + ("; @@ -3043,32 +3074,32 @@ private: class ClLoadStoreImage2dHelperWriter : public IGpuLoadStoreHelperWriter { public: - static bool - validate(IGpuKernelWriter *x, const GpuTensor3dMapper &mapper, GpuLoadStoreType type, IVectorTile *dst) + static bool validate(IGpuKernelWriter *x, const GpuTensor3dMapper &mapper, GpuLoadStoreType type, IVectorTile *dst) { CKW_UNUSED(x); - if(dst->format().w != 4) + if (dst->format().w != 4) { return false; } - if(mapper.gpu_sampler().address_mode_x != TensorSamplerAddressModeX::None) + if (mapper.gpu_sampler().address_mode_x != TensorSamplerAddressModeX::None) { return false; } - if(mapper.gpu_sampler().address_mode_z != TensorSamplerAddressModeZ::None) + if (mapper.gpu_sampler().address_mode_z != TensorSamplerAddressModeZ::None) { return false; } - if(mapper.gpu_sampler().storage != GpuSamplerTensorStorage::Image2dReadOnly && type == GpuLoadStoreType::Load) + if (mapper.gpu_sampler().storage != GpuSamplerTensorStorage::Image2dReadOnly && type == GpuLoadStoreType::Load) { return false; } - if(mapper.gpu_sampler().storage != GpuSamplerTensorStorage::Image2dWriteOnly && type == GpuLoadStoreType::Store) + if (mapper.gpu_sampler().storage != GpuSamplerTensorStorage::Image2dWriteOnly && + type == GpuLoadStoreType::Store) { return false; } - if((dst->format().dt != DataType::Fp32) && (dst->format().dt != DataType::Fp16)) + if ((dst->format().dt != DataType::Fp32) && (dst->format().dt != DataType::Fp16)) { return false; } @@ -3134,8 +3165,8 @@ public: } private: - IVectorTile *_dst{ nullptr }; - int32_t _ls_width_full{ 0 }; + IVectorTile *_dst{nullptr}; + int32_t _ls_width_full{0}; std::string _coord_x{}; std::string _coord_z{}; std::string _coord_b{}; @@ -3146,7 +3177,7 @@ private: const auto address_mode_y = _mapper.gpu_sampler().address_mode_y; - switch(address_mode_y) + switch (address_mode_y) { case TensorSamplerAddressModeY::Skip: max = _mapper.tensor_component_y(); @@ -3182,7 +3213,7 @@ private: const auto address_mode_y = _mapper.gpu_sampler().address_mode_y; - switch(address_mode_y) + switch (address_mode_y) { case TensorSamplerAddressModeY::Skip: case TensorSamplerAddressModeY::SkipMinEdgeOnly: @@ -3195,16 +3226,19 @@ private: } }; - std::string to_ls_image2d(GpuLoadStoreType type, int32_t vector_width, const std::string &data, - const std::string &sampler, const std::string &coord) + std::string to_ls_image2d(GpuLoadStoreType type, + int32_t vector_width, + const std::string &data, + const std::string &sampler, + const std::string &coord) { CKW_UNUSED(vector_width); auto tensor_storage = static_cast(_mapper.gpu_sampler().storage); const std::string image2d_obj = _mapper.tensor_argument()->storage(tensor_storage); - const std::string post_fix = _dst->format().dt == DataType::Fp32 ? "f" : "h"; + const std::string post_fix = _dst->format().dt == DataType::Fp32 ? "f" : "h"; - switch(type) + switch (type) { case GpuLoadStoreType::Load: return data + " = read_image" + post_fix + "(" + image2d_obj + ", " + sampler + ", " + coord + ")"; @@ -3223,7 +3257,7 @@ private: { const auto address_mode_y = _mapper.gpu_sampler().address_mode_y; - switch(address_mode_y) + switch (address_mode_y) { case TensorSamplerAddressModeY::None: return "CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_NONE | CLK_FILTER_NEAREST"; @@ -3245,17 +3279,17 @@ private: } } - std::string to_ls_image2d_coord(const std::string &x, const std::string &y, const std::string &z, - const std::string &b) const + std::string + to_ls_image2d_coord(const std::string &x, const std::string &y, const std::string &z, const std::string &b) const { std::string coord_x = "(" + x + ") >> 2"; std::string coord_y = "("; - if(y != "0") + if (y != "0") { coord_y += y; } - if(z != "0" && (_mapper.is_one_component_z() != true)) + if (z != "0" && (_mapper.is_one_component_z() != true)) { const std::string dim = _mapper.tensor_component_y(); coord_y += " + ("; @@ -3263,7 +3297,7 @@ private: coord_y += " * "; coord_y += dim; } - if(b != "0" && (_mapper.is_one_component_batch() != true)) + if (b != "0" && (_mapper.is_one_component_batch() != true)) { const std::string dim0 = _mapper.tensor_component_y(); const std::string dim1 = _mapper.tensor_component_z(); @@ -3292,7 +3326,7 @@ public: create(IGpuKernelWriter *x, const GpuTensor3dMapper &mapper, GpuLoadStoreType type) { const auto tensor_storage = mapper.gpu_sampler().storage; - switch(tensor_storage) + switch (tensor_storage) { case GpuSamplerTensorStorage::BufferUint8Ptr: return std::make_unique(x, mapper, type); @@ -3352,14 +3386,14 @@ public: IVectorTile *x = _data->tiles[name]; - for(auto &t : x->underlying_source_variables()) + for (auto &t : x->underlying_source_variables()) { _data->code += t.type.str + " " + t.str + ";\n"; } } - void declare_const_tile(const std::string &name, const std::vector> &in, - DataType dt) override + void + declare_const_tile(const std::string &name, const std::vector> &in, DataType dt) override { assert(_data->tiles[name] == nullptr); _data->tiles.insert(name, in, dt); @@ -3387,7 +3421,8 @@ public: { assert(dst_var.type() == OperandType::Tile); assert(_data->tiles.has_tile(dst_var.value())); - assert(_data->tiles[dst_var.value()]->format().w == 1 && _data->tiles[dst_var.value()]->format().h == 1); // It must be a scalar variable + assert(_data->tiles[dst_var.value()]->format().w == 1 && + _data->tiles[dst_var.value()]->format().h == 1); // It must be a scalar variable auto var = _data->tiles[dst_var.value()]; @@ -3397,8 +3432,10 @@ public: _data->code += ");\n"; }; - void op_get_global_coord(const Operand &o_dst, const Operand &o_step, const TensorOperand &o_tensor, - int32_t dim) override + void op_get_global_coord(const Operand &o_dst, + const Operand &o_step, + const TensorOperand &o_tensor, + int32_t dim) override { OperandUnpacker operands(_data->tiles, _data->arguments); auto dst = operands.unpack(o_dst); @@ -3412,17 +3449,17 @@ public: GpuTensor3dMapper mapper(tensor, gpu_sampler); - switch(dim) + switch (dim) { case 0: - if(mapper.is_one_component_x()) + if (mapper.is_one_component_x()) { _data->code += dst->scalar(0, 0).str; _data->code += " = 0;\n"; } else { - if(mapper.gpu_sampler().address_mode_x == TensorSamplerAddressModeX::OverlappingMin) + if (mapper.gpu_sampler().address_mode_x == TensorSamplerAddressModeX::OverlappingMin) { // Validation: Check: fixed tensor shape // TO BE CHANGED @@ -3441,14 +3478,14 @@ public: } break; case 1: - if(mapper.is_one_component_y()) + if (mapper.is_one_component_y()) { _data->code += dst->scalar(0, 0).str; _data->code += " = 0;\n"; } else { - if(mapper.gpu_sampler().address_mode_y == TensorSamplerAddressModeY::OverlappingMin) + if (mapper.gpu_sampler().address_mode_y == TensorSamplerAddressModeY::OverlappingMin) { } else @@ -3461,7 +3498,7 @@ public: } break; case 2: - if(mapper.is_one_component_z()) + if (mapper.is_one_component_z()) { _data->code += dst->scalar(0, 0).str; _data->code += " = 0;\n"; @@ -3490,7 +3527,7 @@ public: GpuTensor3dMapper mapper(tensor, gpu_sampler); - if(mapper.is_one_component_batch()) + if (mapper.is_one_component_batch()) { _data->code += dst->scalar(0, 0).str; _data->code += " = 0;\n"; @@ -3506,7 +3543,8 @@ public: { assert(dst_var.type() == OperandType::Tile); assert(_data->tiles.has_tile(dst_var.value())); - assert(_data->tiles[dst_var.value()]->format().w == 1 && _data->tiles[dst_var.value()]->format().h == 1); // It must be a scalar variable + assert(_data->tiles[dst_var.value()]->format().w == 1 && + _data->tiles[dst_var.value()]->format().h == 1); // It must be a scalar variable auto var = _data->tiles[dst_var.value()]; @@ -3532,7 +3570,7 @@ public: const std::string src_prefix = broadcast_src_x ? "(" + dt + ")" : ""; // Broadcasting on Y is automatic - for(int32_t y = 0; y < dst_h; ++y) + for (int32_t y = 0; y < dst_h; ++y) { _data->code += dst->vector(y).str; _data->code += " = "; @@ -3542,7 +3580,9 @@ public: } } - void op_binary_expression(const Operand &dst_name, const Operand &lhs_name, BinaryOp op, + void op_binary_expression(const Operand &dst_name, + const Operand &lhs_name, + BinaryOp op, const Operand &rhs_name) override { OperandUnpacker operands(_data->tiles, _data->arguments); @@ -3556,14 +3596,14 @@ public: const int32_t lhs_w = lhs->format().w; const int32_t rhs_w = rhs->format().w; - if(op == BinaryOp::MatMul_Nt_T) + if (op == BinaryOp::MatMul_Nt_T) { assert((dst->format().dt == DataType::Fp32) || (dst->format().dt == DataType::Fp16)); - for(int32_t y = 0; y < dst_h; ++y) + for (int32_t y = 0; y < dst_h; ++y) { - for(int32_t x = 0; x < dst_w; ++x) + for (int32_t x = 0; x < dst_w; ++x) { - for(int32_t k = 0; k < lhs_w; ++k) + for (int32_t k = 0; k < lhs_w; ++k) { _data->code += dst->scalar(x, y).str; _data->code += " = fma("; @@ -3583,12 +3623,14 @@ public: const bool broadcast_lhs_x = dst_w != 1 && lhs_w == 1; const bool broadcast_rhs_x = dst_w != 1 && rhs_w == 1; - const std::string lhs_prefix = broadcast_lhs_x ? "(" + dst->underlying_source_variables()[0].type.str + ")" : ""; - const std::string rhs_prefix = broadcast_rhs_x ? "(" + dst->underlying_source_variables()[0].type.str + ")" : ""; - const std::string op_str = to_string(op); + const std::string lhs_prefix = + broadcast_lhs_x ? "(" + dst->underlying_source_variables()[0].type.str + ")" : ""; + const std::string rhs_prefix = + broadcast_rhs_x ? "(" + dst->underlying_source_variables()[0].type.str + ")" : ""; + const std::string op_str = to_string(op); // Broadcasting on Y is automatic - for(int32_t y = 0; y < dst_h; ++y) + for (int32_t y = 0; y < dst_h; ++y) { _data->code += dst->vector(y).str; _data->code += " = "; @@ -3607,13 +3649,13 @@ public: const IVectorTile *src = operands.unpack(o_src); const IVectorTile *dst = operands.unpack(o_dst); // const int32_t dst_w = dst->format().w; - const int32_t dst_h = dst->format().h; - const std::string dt = dst->underlying_source_variables()[0].type.str; - const bool is_float = (dst->format().dt == DataType::Fp32) || (dst->format().dt == DataType::Fp16); - const std::string sat = ((policy == ConvertPolicy::Saturate && !is_float) ? "_sat" : ""); + const int32_t dst_h = dst->format().h; + const std::string dt = dst->underlying_source_variables()[0].type.str; + const bool is_float = (dst->format().dt == DataType::Fp32) || (dst->format().dt == DataType::Fp16); + const std::string sat = ((policy == ConvertPolicy::Saturate && !is_float) ? "_sat" : ""); // Broadcasting on Y is automatic - for(int32_t y = 0; y < dst_h; ++y) + for (int32_t y = 0; y < dst_h; ++y) { _data->code += dst->vector(y).str; _data->code += " = convert_" + dt + sat + "("; @@ -3638,7 +3680,7 @@ public: const std::string src_prefix = broadcast_src_x ? "(" + dt + ")" : ""; // Broadcasting on Y is automatic - for(int32_t y = 0; y < dst_h; ++y) + for (int32_t y = 0; y < dst_h; ++y) { _data->code += dst->vector(y).str; _data->code += " = "; @@ -3647,8 +3689,7 @@ public: } } - void - op_unary_elementwise_function(const Operand &dst_name, UnaryFunction func, const Operand &src_name) override + void op_unary_elementwise_function(const Operand &dst_name, UnaryFunction func, const Operand &src_name) override { OperandUnpacker operands(_data->tiles, _data->arguments); const IVectorTile *src = operands.unpack(src_name); @@ -3665,12 +3706,12 @@ public: const std::string src_prefix = "(" + dt + ")"; // Broadcasting on Y is automatic - for(int32_t y = 0; y < dst_h; ++y) + for (int32_t y = 0; y < dst_h; ++y) { _data->code += dst->vector(y).str; _data->code += " = "; - switch(func) + switch (func) { case UnaryFunction::Exp: _data->code += "exp("; @@ -3708,7 +3749,10 @@ public: } } - void op_binary_elementwise_function(const Operand &dst_name, BinaryFunction func, const Operand &first_name, const Operand &second_name) override + void op_binary_elementwise_function(const Operand &dst_name, + BinaryFunction func, + const Operand &first_name, + const Operand &second_name) override { OperandUnpacker operands(_data->tiles, _data->arguments); const IVectorTile *first = operands.unpack(first_name); @@ -3726,12 +3770,12 @@ public: const bool is_float = (datatype.dt == DataType::Fp32 || datatype.dt == DataType::Fp16); // Broadcasting on Y is automatic - for(int32_t y = 0; y < dst_h; ++y) + for (int32_t y = 0; y < dst_h; ++y) { _data->code += dst->vector(y).str; _data->code += " = "; - switch(func) + switch (func) { case BinaryFunction::Min: _data->code += is_float ? "fmin(" : "min("; @@ -3750,7 +3794,11 @@ public: } } - void op_ternary_elementwise_function(const Operand &dst_name, TernaryFunction func, const Operand &first_name, const Operand &second_name, const Operand &third_name) override + void op_ternary_elementwise_function(const Operand &dst_name, + TernaryFunction func, + const Operand &first_name, + const Operand &second_name, + const Operand &third_name) override { OperandUnpacker operands(_data->tiles, _data->arguments); const IVectorTile *first = operands.unpack(first_name); @@ -3758,8 +3806,8 @@ public: const IVectorTile *third = operands.unpack(third_name); const IVectorTile *dst = operands.unpack(dst_name); - const int32_t dst_h = dst->format().h; - const std::string dt = dst->underlying_source_variables()[0].type.str; + const int32_t dst_h = dst->format().h; + const std::string dt = dst->underlying_source_variables()[0].type.str; // Always perform an explicit cast. See similar comments in op_unary_elementwise_function const std::string first_prefix = "(" + dt + ")"; @@ -3767,12 +3815,12 @@ public: const std::string third_prefix = "(" + dt + ")"; // Broadcasting on Y is automatic - for(int32_t y = 0; y < dst_h; ++y) + for (int32_t y = 0; y < dst_h; ++y) { _data->code += dst->vector(y).str; _data->code += " = "; - switch(func) + switch (func) { case TernaryFunction::Select: _data->code += "select("; @@ -3822,7 +3870,12 @@ public: _data->code += "else\n"; } - void op_for_loop_header(const Operand& var_name, BinaryOp cond_op, const Operand& cond_value_name, const Operand &update_var_name, AssignmentOp update_op, const Operand& update_value_name) override + void op_for_loop_header(const Operand &var_name, + BinaryOp cond_op, + const Operand &cond_value_name, + const Operand &update_var_name, + AssignmentOp update_op, + const Operand &update_value_name) override { OperandUnpacker operands(_data->tiles, _data->arguments); const IVectorTile *var = operands.unpack(var_name); @@ -3850,9 +3903,13 @@ public: _data->code += "\n"; } - void op_load_immediate(const TensorOperand &o_tensor, const Operand &o_dst, const Operand &o_x, - const Operand &o_y, const Operand &o_z, const Operand &o_batch_idx, - const Operand &dilation_y) override + void op_load_immediate(const TensorOperand &o_tensor, + const Operand &o_dst, + const Operand &o_x, + const Operand &o_y, + const Operand &o_z, + const Operand &o_batch_idx, + const Operand &dilation_y) override { OperandUnpacker operands(_data->tiles, _data->arguments); @@ -3875,10 +3932,10 @@ public: // Initialize the constant part load_writer->initialize(dst, x, z, b); - for(int i = 0; i < dst->format().h; ++i) + for (int i = 0; i < dst->format().h; ++i) { std::string coord_y = y->scalar(0, 0).str + " + " + std::to_string(i); - if(dil_y->scalar(0, 0).str != "1") + if (dil_y->scalar(0, 0).str != "1") { coord_y += " * " + dil_y->scalar(0, 0).str; } @@ -3888,9 +3945,12 @@ public: load_writer->finalize(); } - void op_load_indirect(const TensorOperand &o_tensor, const Operand &o_dst, const Operand &o_x, - const Operand &o_indirect_h, const Operand &o_z, - const Operand &o_batch_idx) override + void op_load_indirect(const TensorOperand &o_tensor, + const Operand &o_dst, + const Operand &o_x, + const Operand &o_indirect_h, + const Operand &o_z, + const Operand &o_batch_idx) override { OperandUnpacker operands(_data->tiles, _data->arguments); @@ -3912,7 +3972,7 @@ public: // Initialize the constant part load_writer->initialize(dst, x, z, b); - for(int i = 0; i < dst->format().h; ++i) + for (int i = 0; i < dst->format().h; ++i) { load_writer->write(std::make_pair(i, y_ind->scalar(0, i).str)); } @@ -3920,9 +3980,12 @@ public: load_writer->finalize(); } - void op_store_immediate(const TensorOperand &tensor_name, const Operand &src_name, const Operand &x_name, - const Operand &y_name, const Operand &z_name, - const Operand &batch_index_name) override + void op_store_immediate(const TensorOperand &tensor_name, + const Operand &src_name, + const Operand &x_name, + const Operand &y_name, + const Operand &z_name, + const Operand &batch_index_name) override { OperandUnpacker operands(_data->tiles, _data->arguments); @@ -3946,7 +4009,7 @@ public: int32_t tile_h = src->format().h; - for(int m0 = tile_h - 1; m0 >= 0; m0--) + for (int m0 = tile_h - 1; m0 >= 0; m0--) { store_writer->write(std::make_pair(m0, y->scalar(0, 0).str + " + " + std::to_string(m0))); } @@ -3959,8 +4022,12 @@ public: _data->code += "return;\n"; } - void util_get_indirect_buffer(const Operand &o_dst, const TensorOperand &o_tensor, const Operand &o_x, - const Operand &o_y, const Operand &o_x_off, const Operand &o_y_off) override + void util_get_indirect_buffer(const Operand &o_dst, + const TensorOperand &o_tensor, + const Operand &o_x, + const Operand &o_y, + const Operand &o_x_off, + const Operand &o_y_off) override { OperandUnpacker operands(_data->tiles, _data->arguments); const IVectorTile *dst = operands.unpack(o_dst); @@ -4002,7 +4069,7 @@ public: declare_tile("_y_s", TileInfo(DataType::Int32)); auto x_s = operands.unpack(Operand("_x_s")); auto y_s = operands.unpack(Operand("_y_s")); - for(int i = 0; i < dst->format().h; ++i) + for (int i = 0; i < dst->format().h; ++i) { // x_s = (xi_0 + x_k); // y_s = (yi_0 + y_k); @@ -4060,8 +4127,8 @@ public: } private: - GpuKernelWriterDataHolder *_data{ nullptr }; - GpuKernelWriterAttribute *_attr{ nullptr }; + GpuKernelWriterDataHolder *_data{nullptr}; + GpuKernelWriterAttribute *_attr{nullptr}; }; /** IGpuKernelWriter factory class */ @@ -4074,10 +4141,9 @@ public: * * @return IGpuKernelWriter */ - static std::unique_ptr - create(GpuKernelWriterAttribute *attr, GpuKernelWriterDataHolder *x) + static std::unique_ptr create(GpuKernelWriterAttribute *attr, GpuKernelWriterDataHolder *x) { - switch(x->programming_language()) + switch (x->programming_language()) { case GpuTargetLanguage::OpenCL: return std::make_unique(attr, x); @@ -4094,9 +4160,9 @@ adjust_step(TensorSamplerFormat tensor_format, int32_t step, const TensorInfo *t { auto tensor = tensor_info_id->shape; - int32_t dim[3] = { 0 }; + int32_t dim[3] = {0}; - switch(tensor_format) + switch (tensor_format) { case TensorSamplerFormat::C_W_H: dim[0] = tensor[0]; diff --git a/compute_kernel_writer/prototype/src/TensorOperand.cpp b/compute_kernel_writer/prototype/src/TensorOperand.cpp index c6725d3b26..d1aefbbb71 100644 --- a/compute_kernel_writer/prototype/src/TensorOperand.cpp +++ b/compute_kernel_writer/prototype/src/TensorOperand.cpp @@ -23,10 +23,12 @@ */ #include "ckw/TensorOperand.h" + #include "ckw/Error.h" #include "ckw/Kernel.h" #include "ckw/TensorInfo.h" #include "ckw/TileOperand.h" + #include "src/Prototype.h" namespace ckw @@ -35,9 +37,11 @@ namespace ckw namespace { -TensorComponentOperand &get_or_create_component(TensorOperand &tensor, std::unique_ptr &ptr, TensorComponentType component) +TensorComponentOperand &get_or_create_component(TensorOperand &tensor, + std::unique_ptr &ptr, + TensorComponentType component) { - if(ptr == nullptr) + if (ptr == nullptr) { ptr = std::make_unique(tensor, component); } @@ -59,7 +63,7 @@ TensorOperand::TensorOperand(const std::string &name, const TensorInfo &info, Te prototype::Operand TensorOperand::create_impl_operand(prototype::IGpuKernelWriter *writer) const { CKW_UNUSED(writer); - return { name() }; + return {name()}; } const TensorInfo &TensorOperand::info() const @@ -206,9 +210,9 @@ TensorComponentType TensorComponentOperand::component_type() const prototype::Operand TensorComponentOperand::create_impl_operand(prototype::IGpuKernelWriter *writer) const { CKW_UNUSED(writer); - prototype::OperandType type{ prototype::OperandType::Unknown }; + prototype::OperandType type{prototype::OperandType::Unknown}; - switch(_component) + switch (_component) { case TensorComponentType::OffsetFirstElement: type = prototype::OperandType::TensorDataOffset; diff --git a/compute_kernel_writer/prototype/src/TensorTileSampler.cpp b/compute_kernel_writer/prototype/src/TensorTileSampler.cpp index 28e54df3a5..bf9f946ce8 100644 --- a/compute_kernel_writer/prototype/src/TensorTileSampler.cpp +++ b/compute_kernel_writer/prototype/src/TensorTileSampler.cpp @@ -23,6 +23,7 @@ */ #include "ckw/TensorTileSampler.h" + #include "ckw/TileOperand.h" #include "ckw/types/TensorSamplerTypes.h" @@ -33,24 +34,47 @@ TensorTileSampler::TensorTileSampler() { } -TensorTileSampler::TensorTileSampler( - TileOperand &x, TileOperand &y, TileOperand &z, TileOperand &b, - TensorSamplerFormat format, - TensorSamplerAddressModeX address_mode_x, - TensorSamplerAddressModeY address_mode_y, - TensorSamplerAddressModeZ address_mode_z) - : _x(&x), _y(&y), _z(&z), _b(&b), _height(0), _width(0), _format(format), _address_mode_x(address_mode_x), _address_mode_y(address_mode_y), _address_mode_z(address_mode_z) -{ -} - -TensorTileSampler::TensorTileSampler( - TileOperand &x, TileOperand &y, TileOperand &z, TileOperand &b, - int32_t height, int32_t width, - TensorSamplerFormat format, - TensorSamplerAddressModeX address_mode_x, - TensorSamplerAddressModeY address_mode_y, - TensorSamplerAddressModeZ address_mode_z) - : _x(&x), _y(&y), _z(&z), _b(&b), _height(height), _width(width), _format(format), _address_mode_x(address_mode_x), _address_mode_y(address_mode_y), _address_mode_z(address_mode_z) +TensorTileSampler::TensorTileSampler(TileOperand &x, + TileOperand &y, + TileOperand &z, + TileOperand &b, + TensorSamplerFormat format, + TensorSamplerAddressModeX address_mode_x, + TensorSamplerAddressModeY address_mode_y, + TensorSamplerAddressModeZ address_mode_z) + : _x(&x), + _y(&y), + _z(&z), + _b(&b), + _height(0), + _width(0), + _format(format), + _address_mode_x(address_mode_x), + _address_mode_y(address_mode_y), + _address_mode_z(address_mode_z) +{ +} + +TensorTileSampler::TensorTileSampler(TileOperand &x, + TileOperand &y, + TileOperand &z, + TileOperand &b, + int32_t height, + int32_t width, + TensorSamplerFormat format, + TensorSamplerAddressModeX address_mode_x, + TensorSamplerAddressModeY address_mode_y, + TensorSamplerAddressModeZ address_mode_z) + : _x(&x), + _y(&y), + _z(&z), + _b(&b), + _height(height), + _width(width), + _format(format), + _address_mode_x(address_mode_x), + _address_mode_y(address_mode_y), + _address_mode_z(address_mode_z) { } diff --git a/compute_kernel_writer/prototype/src/TileInfo.cpp b/compute_kernel_writer/prototype/src/TileInfo.cpp index 66d8cb1620..273266eedc 100644 --- a/compute_kernel_writer/prototype/src/TileInfo.cpp +++ b/compute_kernel_writer/prototype/src/TileInfo.cpp @@ -26,18 +26,15 @@ namespace ckw { -TileInfo::TileInfo(DataType dt) - : _dt(dt), _shape({ { 1, 1 } }) +TileInfo::TileInfo(DataType dt) : _dt(dt), _shape({{1, 1}}) { } -TileInfo::TileInfo(DataType dt, int32_t w) - : _dt(dt), _shape({ { w, 1 } }) +TileInfo::TileInfo(DataType dt, int32_t w) : _dt(dt), _shape({{w, 1}}) { } -TileInfo::TileInfo(DataType dt, int32_t h, int32_t w) - : _dt(dt), _shape({ { w, h } }) +TileInfo::TileInfo(DataType dt, int32_t h, int32_t w) : _dt(dt), _shape({{w, h}}) { } diff --git a/compute_kernel_writer/prototype/src/TileOperand.cpp b/compute_kernel_writer/prototype/src/TileOperand.cpp index 0eb2ca6a64..e09c833d96 100644 --- a/compute_kernel_writer/prototype/src/TileOperand.cpp +++ b/compute_kernel_writer/prototype/src/TileOperand.cpp @@ -23,47 +23,43 @@ */ #include "ckw/TileOperand.h" + #include "ckw/Error.h" + #include "src/Prototype.h" namespace ckw { TileOperand::TileOperand(const std::string &name, const TileInfo &info) - : OperandBase(name), - _info(info), - _value{ std::vector{ "0" } }, - _constant(false) + : OperandBase(name), _info(info), _value{std::vector{"0"}}, _constant(false) { } TileOperand::TileOperand(const std::string &name, DataType data_type) - : OperandBase(name), - _info(TileInfo{ data_type }), - _value{ std::vector{ "0" } }, - _constant(false) + : OperandBase(name), _info(TileInfo{data_type}), _value{std::vector{"0"}}, _constant(false) { } TileOperand::TileOperand(const std::string &name, int32_t value) : OperandBase(name), - _info(TileInfo{ DataType::Int32 }), - _value{ std::vector{ std::to_string(value) } }, + _info(TileInfo{DataType::Int32}), + _value{std::vector{std::to_string(value)}}, _constant(true) { } TileOperand::TileOperand(const std::string &name, float value) : OperandBase(name), - _info(TileInfo{ DataType::Fp32 }), - _value{ std::vector{ std::to_string(value) } }, + _info(TileInfo{DataType::Fp32}), + _value{std::vector{std::to_string(value)}}, _constant(true) { } TileOperand::TileOperand(const std::string &name, const TileContainer &vals, DataType dt) : OperandBase(name), - _info(TileInfo{ dt, static_cast(vals.size()), static_cast(vals[0].size()) }), + _info(TileInfo{dt, static_cast(vals.size()), static_cast(vals[0].size())}), _value(vals), _constant(true) { @@ -73,11 +69,11 @@ prototype::Operand TileOperand::create_impl_operand(prototype::IGpuKernelWriter { CKW_UNUSED(writer); - if(_constant) + if (_constant) { - if(is_scalar()) + if (is_scalar()) { - switch(_info.data_type()) + switch (_info.data_type()) { case DataType::Int32: return prototype::Operand(_value[0][0], prototype::OperandType::ScalarInt32); -- cgit v1.2.1