diff options
author | Felix Thomasmathibalan <felixjohnny.thomasmathibalan@arm.com> | 2023-09-27 17:46:17 +0100 |
---|---|---|
committer | felixjohnny.thomasmathibalan <felixjohnny.thomasmathibalan@arm.com> | 2023-09-28 12:08:05 +0000 |
commit | afd38f0c617d6f89b2b4532c6c44f116617e2b6f (patch) | |
tree | 03bc7d5a762099989b16a656fa8d397b490ed70e /compute_kernel_writer/src/cl | |
parent | bdcb4c148ee2fdeaaddf4cf1e57bbb0de02bb894 (diff) | |
download | ComputeLibrary-afd38f0c617d6f89b2b4532c6c44f116617e2b6f.tar.gz |
Apply clang-format on repository
Code is formatted as per a revised clang format configuration
file(not part of this delivery). Version 14.0.6 is used.
Exclusion List:
- files with .cl extension
- files that are not strictly C/C++ (e.g. Android.bp, Sconscript ...)
And the following directories
- compute_kernel_writer/validation/
- tests/
- include/
- src/core/NEON/kernels/convolution/
- src/core/NEON/kernels/arm_gemm/
- src/core/NEON/kernels/arm_conv/
- data/
There will be a follow up for formatting of .cl files and the
files under tests/ and compute_kernel_writer/validation/.
Signed-off-by: Felix Thomasmathibalan <felixjohnny.thomasmathibalan@arm.com>
Change-Id: Ib7eb1fcf4e7537b9feaefcfc15098a804a3fde0a
Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/10391
Benchmark: Arm Jenkins <bsgcomp@arm.com>
Tested-by: Arm Jenkins <bsgcomp@arm.com>
Reviewed-by: Gunes Bayir <gunes.bayir@arm.com>
Diffstat (limited to 'compute_kernel_writer/src/cl')
-rw-r--r-- | compute_kernel_writer/src/cl/CLHelpers.cpp | 77 | ||||
-rw-r--r-- | compute_kernel_writer/src/cl/CLKernelWriter.cpp | 282 | ||||
-rw-r--r-- | compute_kernel_writer/src/cl/CLKernelWriter.h | 104 | ||||
-rw-r--r-- | compute_kernel_writer/src/cl/CLTensorArgument.cpp | 50 | ||||
-rw-r--r-- | compute_kernel_writer/src/cl/CLTensorArgument.h | 6 | ||||
-rw-r--r-- | compute_kernel_writer/src/cl/CLTensorComponent.cpp | 9 | ||||
-rw-r--r-- | compute_kernel_writer/src/cl/CLTensorComponent.h | 5 | ||||
-rw-r--r-- | compute_kernel_writer/src/cl/CLTile.cpp | 41 | ||||
-rw-r--r-- | compute_kernel_writer/src/cl/CLTile.h | 7 | ||||
-rw-r--r-- | compute_kernel_writer/src/cl/helpers/CLMemoryOpBufferHelper.cpp | 79 | ||||
-rw-r--r-- | compute_kernel_writer/src/cl/helpers/CLMemoryOpBufferHelper.h | 23 | ||||
-rw-r--r-- | compute_kernel_writer/src/cl/helpers/CLMemoryOpImage2dHelper.cpp | 51 | ||||
-rw-r--r-- | compute_kernel_writer/src/cl/helpers/CLMemoryOpImage2dHelper.h | 16 | ||||
-rw-r--r-- | compute_kernel_writer/src/cl/helpers/ICLMemoryOpHelper.h | 21 |
14 files changed, 457 insertions, 314 deletions
diff --git a/compute_kernel_writer/src/cl/CLHelpers.cpp b/compute_kernel_writer/src/cl/CLHelpers.cpp index ff4408b1a3..8e4a932764 100644 --- a/compute_kernel_writer/src/cl/CLHelpers.cpp +++ b/compute_kernel_writer/src/cl/CLHelpers.cpp @@ -28,6 +28,7 @@ #include "ckw/types/DataType.h" #include "ckw/types/Operators.h" #include "ckw/types/TensorStorageType.h" + #include "src/types/DataTypeHelpers.h" namespace ckw @@ -35,7 +36,7 @@ namespace ckw bool cl_validate_vector_length(int32_t len) { bool valid_vector_length = true; - if(len < 1 || len > 16 || (len > 4 && len < 8) || (len > 8 && len < 16)) + if (len < 1 || len > 16 || (len > 4 && len < 8) || (len > 8 && len < 16)) { valid_vector_length = false; } @@ -44,14 +45,14 @@ bool cl_validate_vector_length(int32_t len) std::string cl_get_variable_datatype_as_string(DataType dt, int32_t len) { - if(cl_validate_vector_length(len) == false) + if (cl_validate_vector_length(len) == false) { CKW_THROW_MSG("Unsupported vector length"); return ""; } std::string res; - switch(dt) + switch (dt) { case DataType::Fp32: res += "float"; @@ -85,7 +86,7 @@ std::string cl_get_variable_datatype_as_string(DataType dt, int32_t len) return ""; } - if(len > 1) + if (len > 1) { res += std::to_string(len); } @@ -95,7 +96,7 @@ std::string cl_get_variable_datatype_as_string(DataType dt, int32_t len) int32_t cl_round_up_to_nearest_valid_vector_width(int32_t width) { - switch(width) + switch (width) { case 1: return 1; @@ -128,7 +129,7 @@ int32_t cl_round_up_to_nearest_valid_vector_width(int32_t width) std::string cl_get_variable_storagetype_as_string(TensorStorageType storage) { std::string res; - switch(storage) + switch (storage) { case TensorStorageType::BufferUint8Ptr: res += "__global uchar*"; @@ -148,7 +149,7 @@ std::string cl_get_variable_storagetype_as_string(TensorStorageType storage) std::string cl_get_assignment_op_as_string(AssignmentOp op) { - switch(op) + switch (op) { case AssignmentOp::Increment: return "+="; @@ -163,34 +164,34 @@ std::string cl_get_assignment_op_as_string(AssignmentOp op) std::tuple<bool, std::string> cl_get_unary_op(UnaryOp op) { - switch(op) + switch (op) { case UnaryOp::LogicalNot: - return { false, "!" }; + return {false, "!"}; case UnaryOp::BitwiseNot: - return { false, "~" }; + return {false, "~"}; case UnaryOp::Exp: - return { true, "exp" }; + return {true, "exp"}; case UnaryOp::Tanh: - return { true, "tanh" }; + return {true, "tanh"}; case UnaryOp::Sqrt: - return { true, "sqrt" }; + return {true, "sqrt"}; case UnaryOp::Erf: - return { true, "erf" }; + return {true, "erf"}; case UnaryOp::Fabs: - return { true, "fabs" }; + return {true, "fabs"}; case UnaryOp::Log: - return { true, "log" }; + return {true, "log"}; case UnaryOp::Round: - return { true, "round" }; + return {true, "round"}; default: CKW_THROW_MSG("Unsupported unary operation!"); @@ -201,52 +202,52 @@ std::tuple<bool, std::string> cl_get_binary_op(BinaryOp op, DataType data_type) { const auto is_float = is_data_type_float(data_type); - switch(op) + switch (op) { case BinaryOp::Add: - return { false, "+" }; + return {false, "+"}; case BinaryOp::Sub: - return { false, "-" }; + return {false, "-"}; case BinaryOp::Mul: - return { false, "*" }; + return {false, "*"}; case BinaryOp::Div: - return { false, "/" }; + return {false, "/"}; case BinaryOp::Mod: - return { false, "%" }; + return {false, "%"}; case BinaryOp::Equal: - return { false, "==" }; + return {false, "=="}; case BinaryOp::Less: - return { false, "<" }; + return {false, "<"}; case BinaryOp::LessEqual: - return { false, "<=" }; + return {false, "<="}; case BinaryOp::Greater: - return { false, ">" }; + return {false, ">"}; case BinaryOp::GreaterEqual: - return { false, ">=" }; + return {false, ">="}; case BinaryOp::LogicalAnd: - return { false, "&&" }; + return {false, "&&"}; case BinaryOp::LogicalOr: - return { false, "||" }; + return {false, "||"}; case BinaryOp::BitwiseXOR: - return { false, "^" }; + return {false, "^"}; case BinaryOp::Min: - return { true, is_float ? "fmin" : "min" }; + return {true, is_float ? "fmin" : "min"}; case BinaryOp::Max: - return { true, is_float ? "fmax" : "max" }; + return {true, is_float ? "fmax" : "max"}; default: CKW_THROW_MSG("Unsupported binary operator/function!"); @@ -255,13 +256,13 @@ std::tuple<bool, std::string> cl_get_binary_op(BinaryOp op, DataType data_type) std::tuple<bool, std::string> cl_get_ternary_op(TernaryOp op) { - switch(op) + switch (op) { case TernaryOp::Select: - return { true, "select" }; + return {true, "select"}; case TernaryOp::Clamp: - return { true, "clamp" }; + return {true, "clamp"}; default: CKW_THROW_MSG("Unsupported ternary function!"); @@ -273,7 +274,7 @@ std::string cl_data_type_rounded_up_to_valid_vector_width(DataType dt, int32_t w std::string data_type; const int32_t w = cl_round_up_to_nearest_valid_vector_width(width); data_type += cl_get_variable_datatype_as_string(dt, 1); - if(w != 1) + if (w != 1) { data_type += std::to_string(w); } @@ -284,7 +285,7 @@ std::vector<int32_t> cl_decompose_vector_width(int32_t vector_width) { std::vector<int32_t> x; - switch(vector_width) + switch (vector_width) { case 0: break; diff --git a/compute_kernel_writer/src/cl/CLKernelWriter.cpp b/compute_kernel_writer/src/cl/CLKernelWriter.cpp index 2db9c139b7..62e6853a7a 100644 --- a/compute_kernel_writer/src/cl/CLKernelWriter.cpp +++ b/compute_kernel_writer/src/cl/CLKernelWriter.cpp @@ -31,14 +31,15 @@ #include "ckw/types/DataType.h" #include "ckw/types/MemoryOperation.h" #include "ckw/types/TargetLanguage.h" -#include "src/ITensorComponent.h" -#include "src/TileView.h" + #include "src/cl/CLHelpers.h" #include "src/cl/CLTensorArgument.h" #include "src/cl/CLTile.h" #include "src/cl/helpers/CLMemoryOpBufferHelper.h" #include "src/cl/helpers/CLMemoryOpImage2dHelper.h" #include "src/cl/helpers/ICLMemoryOpHelper.h" +#include "src/ITensorComponent.h" +#include "src/TileView.h" #include "src/types/DataTypeHelpers.h" #include <algorithm> @@ -63,14 +64,14 @@ std::unique_ptr<Kernel> CLKernelWriter::emit_kernel(const std::string &name) // Create the list of arguments. std::vector<KernelArgument> arguments; - for(const auto &tensor : _tensors) + for (const auto &tensor : _tensors) { const auto tensor_id = tensor->info().id(); const auto storages = tensor->storages(); const auto components = tensor->components(); - for(const auto &storage : storages) + for (const auto &storage : storages) { code += cl_get_variable_storagetype_as_string(storage.type); code += " "; @@ -80,7 +81,7 @@ std::unique_ptr<Kernel> CLKernelWriter::emit_kernel(const std::string &name) arguments.emplace_back(tensor_id, storage.type); } - for(const auto &component : components) + for (const auto &component : components) { const auto &tile = component->tile(); const auto &tile_info = tile.info(); @@ -96,7 +97,7 @@ std::unique_ptr<Kernel> CLKernelWriter::emit_kernel(const std::string &name) } } - if(code.size() >= 2 && code[code.size() - 2] == ',' && code[code.size() - 1] == '\n') + if (code.size() >= 2 && code[code.size() - 2] == ',' && code[code.size() - 1] == '\n') { // Remove the last comma in the argument list. code.pop_back(); @@ -127,11 +128,12 @@ void CLKernelWriter::op_assign(const TileOperand &dst, const TileOperand &src) const std::string src_prefix = broadcast_src_x ? "(" + data_type_str + ")" : ""; CKW_ASSERT_MSG(src_view.data_type() == dst_view.data_type(), "Source and destination type must match."); - CKW_ASSERT_MSG(src_view.height() == dst_h || src_view.height() == 1, "Tile height must match or source is broadcasting in y dimension."); + CKW_ASSERT_MSG(src_view.height() == dst_h || src_view.height() == 1, + "Tile height must match or source is broadcasting in y dimension."); CKW_ASSERT_MSG(src_w == dst_w || src_w == 1, "Tile width must match or source is broadcasting in x dimension."); // Broadcasting on y dimension is automatic (see CLTile::vector). - for(int32_t y = 0; y < dst_h; ++y) + for (int32_t y = 0; y < dst_h; ++y) { append_code(dst_view.vector(y).str, " = ", src_prefix, src_view.vector(y).str, ";\n"); } @@ -158,13 +160,15 @@ void CLKernelWriter::op_cast(const TileOperand &dst, const TileOperand &src, Con const std::string prefix = broadcast_x ? "(" + dst_type_str + ")" : ""; CKW_ASSERT_MSG(src_view.data_type() != dst_view.data_type(), "Source and destination type must be different."); - CKW_ASSERT_MSG(src_view.height() == dst_h || src_view.height() == 1, "Tile height must match or source is broadcasting in y dimension."); + CKW_ASSERT_MSG(src_view.height() == dst_h || src_view.height() == 1, + "Tile height must match or source is broadcasting in y dimension."); CKW_ASSERT_MSG(src_w == dst_w || src_w == 1, "Tile width must match or source is broadcasting in x dimension."); // Broadcasting on y dimension is automatic (see CLTile::vector). - for(int32_t y = 0; y < dst_h; ++y) + for (int32_t y = 0; y < dst_h; ++y) { - append_code(dst_view.vector(y).str, " = ", prefix, "convert_", convert_type_str, sat, "(", src_view.vector(y).str, ");\n"); + append_code(dst_view.vector(y).str, " = ", prefix, "convert_", convert_type_str, sat, "(", + src_view.vector(y).str, ");\n"); } } @@ -189,11 +193,12 @@ void CLKernelWriter::op_unary(const TileOperand &dst, UnaryOp op, const TileOper const auto op_suffix = op_is_func ? ")" : ""; CKW_ASSERT_MSG(src_view.data_type() == dst_view.data_type(), "Source and destination type must match."); - CKW_ASSERT_MSG(src_view.height() == dst_h || src_view.height() == 1, "Tile height must match or source is broadcasting in y dimension."); + CKW_ASSERT_MSG(src_view.height() == dst_h || src_view.height() == 1, + "Tile height must match or source is broadcasting in y dimension."); CKW_ASSERT_MSG(src_w == dst_w || src_w == 1, "Tile width must match or source is broadcasting in x dimension."); // Broadcasting on y dimension is automatic (see CLTile::vector). - for(int32_t y = 0; y < dst_h; ++y) + for (int32_t y = 0; y < dst_h; ++y) { append_code(dst_view.vector(y).str, " = ", src_prefix, op_prefix, src_view.vector(y).str, op_suffix, ";\n"); } @@ -214,27 +219,28 @@ void CLKernelWriter::op_binary(const TileOperand &dst, BinaryOp op, const TileOp CKW_ASSERT_MSG(lhs_view.data_type() == rhs_view.data_type(), "LHS and RHS type must match."); - CKW_ASSERT_MSG(lhs_view.height() == dst_h || lhs_view.height() == 1, "LHS tile height must match or source is broadcasting in y dimension."); - CKW_ASSERT_MSG(rhs_view.height() == dst_h || rhs_view.height() == 1, "RHS tile height must match or source is broadcasting in y dimension."); + CKW_ASSERT_MSG(lhs_view.height() == dst_h || lhs_view.height() == 1, + "LHS tile height must match or source is broadcasting in y dimension."); + CKW_ASSERT_MSG(rhs_view.height() == dst_h || rhs_view.height() == 1, + "RHS tile height must match or source is broadcasting in y dimension."); - CKW_ASSERT_MSG(lhs_w == dst_w || lhs_w == 1, "LHS tile width must match destination or LHS is broadcasting in x dimension."); - CKW_ASSERT_MSG(rhs_w == dst_w || rhs_w == 1, "RHS tile width must match destination or RHS is broadcasting in x dimension."); + CKW_ASSERT_MSG(lhs_w == dst_w || lhs_w == 1, + "LHS tile width must match destination or LHS is broadcasting in x dimension."); + CKW_ASSERT_MSG(rhs_w == dst_w || rhs_w == 1, + "RHS tile width must match destination or RHS is broadcasting in x dimension."); - if(op == BinaryOp::MatMul_Nt_T) + if (op == BinaryOp::MatMul_Nt_T) { CKW_ASSERT(is_data_type_float(data_type)); - for(int32_t y = 0; y < dst_h; ++y) + for (int32_t y = 0; y < dst_h; ++y) { - for(int32_t x = 0; x < dst_w; ++x) + for (int32_t x = 0; x < dst_w; ++x) { - for(int32_t k = 0; k < lhs_w; ++k) + for (int32_t k = 0; k < lhs_w; ++k) { - append_code( - dst_view.scalar(x, y).str, " = fma(", - lhs_view.scalar(k, y).str, ", ", - rhs_view.scalar(k, x).str, ", ", - dst_view.scalar(x, y).str, ");\n"); + append_code(dst_view.scalar(x, y).str, " = fma(", lhs_view.scalar(k, y).str, ", ", + rhs_view.scalar(k, x).str, ", ", dst_view.scalar(x, y).str, ");\n"); } } } @@ -258,14 +264,16 @@ void CLKernelWriter::op_binary(const TileOperand &dst, BinaryOp op, const TileOp const std::string op_suffix = op_is_func ? ");\n" : ";\n"; // Broadcasting on y dimension is automatic (see CLTile::vector). - for(int32_t y = 0; y < dst_h; ++y) + for (int32_t y = 0; y < dst_h; ++y) { - append_code(dst_view.vector(y).str, op_prefix, lhs_prefix, lhs_view.vector(y).str, op_separator, rhs_prefix, rhs_view.vector(y).str, op_suffix); + append_code(dst_view.vector(y).str, op_prefix, lhs_prefix, lhs_view.vector(y).str, op_separator, rhs_prefix, + rhs_view.vector(y).str, op_suffix); } } } -void CLKernelWriter::op_ternary(const TileOperand &dst, TernaryOp op, const TileOperand &first, const TileOperand &second, const TileOperand &third) +void CLKernelWriter::op_ternary( + const TileOperand &dst, TernaryOp op, const TileOperand &first, const TileOperand &second, const TileOperand &third) { const auto dst_view = to_cl_tile_view(dst); const auto first_view = to_cl_tile_view(first); @@ -297,37 +305,42 @@ void CLKernelWriter::op_ternary(const TileOperand &dst, TernaryOp op, const Tile CKW_ASSERT_MSG(second_view.data_type() == dst_view.data_type(), "2nd source and destination type must match."); CKW_ASSERT_MSG(third_view.data_type() == dst_view.data_type(), "3rd source and destination type must match."); - CKW_ASSERT_MSG(first_view.height() == dst_h || first_view.height() == 1, "1st tile height must match or source is broadcasting in y dimension."); - CKW_ASSERT_MSG(second_view.height() == dst_h || second_view.height() == 1, "2nd tile height must match or source is broadcasting in y dimension."); - CKW_ASSERT_MSG(third_view.height() == dst_h || third_view.height() == 1, "3rd tile height must match or source is broadcasting in y dimension."); + CKW_ASSERT_MSG(first_view.height() == dst_h || first_view.height() == 1, + "1st tile height must match or source is broadcasting in y dimension."); + CKW_ASSERT_MSG(second_view.height() == dst_h || second_view.height() == 1, + "2nd tile height must match or source is broadcasting in y dimension."); + CKW_ASSERT_MSG(third_view.height() == dst_h || third_view.height() == 1, + "3rd tile height must match or source is broadcasting in y dimension."); - CKW_ASSERT_MSG(first_w == dst_w || first_w == 1, "1st tile width must match or source is broadcasting in x dimension."); - CKW_ASSERT_MSG(second_w == dst_w || second_w == 1, "2nd tile width must match or source is broadcasting in x dimension."); - CKW_ASSERT_MSG(third_w == dst_w || third_w == 1, "3rd tile width must match or source is broadcasting in x dimension."); + CKW_ASSERT_MSG(first_w == dst_w || first_w == 1, + "1st tile width must match or source is broadcasting in x dimension."); + CKW_ASSERT_MSG(second_w == dst_w || second_w == 1, + "2nd tile width must match or source is broadcasting in x dimension."); + CKW_ASSERT_MSG(third_w == dst_w || third_w == 1, + "3rd tile width must match or source is broadcasting in x dimension."); // Broadcasting on y dimension is automatic (see CLTile::vector). - for(int32_t y = 0; y < dst_h; ++y) + for (int32_t y = 0; y < dst_h; ++y) { - append_code( - dst_view.vector(y).str, " = ", op_name, "(", - first_prefix, first_view.vector(y).str, ", ", - second_prefix, second_view.vector(y).str, ", ", - third_prefix, third_view.vector(y).str, ");\n"); + append_code(dst_view.vector(y).str, " = ", op_name, "(", first_prefix, first_view.vector(y).str, ", ", + second_prefix, second_view.vector(y).str, ", ", third_prefix, third_view.vector(y).str, ");\n"); } } -void CLKernelWriter::op_if_generic(const TileOperand &lhs, BinaryOp op, const TileOperand &rhs, const std::function<void()> &body, bool is_else_if) +void CLKernelWriter::op_if_generic( + const TileOperand &lhs, BinaryOp op, const TileOperand &rhs, const std::function<void()> &body, bool is_else_if) { const auto lhs_view = to_cl_tile_view(lhs); const auto rhs_view = to_cl_tile_view(rhs); const auto op_name = std::get<1>(cl_get_binary_op(op, lhs_view.data_type())); - CKW_ASSERT(op == BinaryOp::Less || op == BinaryOp::LessEqual || op == BinaryOp::Equal || op == BinaryOp::GreaterEqual || op == BinaryOp::Greater); + CKW_ASSERT(op == BinaryOp::Less || op == BinaryOp::LessEqual || op == BinaryOp::Equal || + op == BinaryOp::GreaterEqual || op == BinaryOp::Greater); CKW_ASSERT(lhs_view.is_scalar()); CKW_ASSERT(rhs_view.is_scalar()); - if(is_else_if) + if (is_else_if) { append_code("else "); } @@ -337,12 +350,18 @@ void CLKernelWriter::op_if_generic(const TileOperand &lhs, BinaryOp op, const Ti append_code("}\n"); } -void CLKernelWriter::op_if(const TileOperand &lhs, BinaryOp op, const TileOperand &rhs, const std::function<void()> &body) +void CLKernelWriter::op_if(const TileOperand &lhs, + BinaryOp op, + const TileOperand &rhs, + const std::function<void()> &body) { op_if_generic(lhs, op, rhs, body, false /* is_else_if */); } -void CLKernelWriter::op_else_if(const TileOperand &lhs, BinaryOp op, const TileOperand &rhs, const std::function<void()> &body) +void CLKernelWriter::op_else_if(const TileOperand &lhs, + BinaryOp op, + const TileOperand &rhs, + const std::function<void()> &body) { op_if_generic(lhs, op, rhs, body, true /* is_else_if */); } @@ -354,10 +373,13 @@ void CLKernelWriter::op_else(const std::function<void()> &body) append_code("}\n"); } -void CLKernelWriter::op_for_loop( - const TileOperand &var, BinaryOp cond_op, const TileOperand &cond_value, - const TileOperand &update_var, AssignmentOp update_op, const TileOperand &update_value, - const std::function<void()> &body) +void CLKernelWriter::op_for_loop(const TileOperand &var, + BinaryOp cond_op, + const TileOperand &cond_value, + const TileOperand &update_var, + AssignmentOp update_op, + const TileOperand &update_value, + const std::function<void()> &body) { const auto var_view = to_cl_tile_view(var); const auto cond_value_view = to_cl_tile_view(cond_value); @@ -373,11 +395,12 @@ void CLKernelWriter::op_for_loop( CKW_ASSERT(update_var_view.data_type() == update_value_view.data_type()); const auto cond_op_name = std::get<1>(cl_get_binary_op(cond_op, var_view.data_type())); - CKW_ASSERT(cond_op == BinaryOp::Less || cond_op == BinaryOp::LessEqual || cond_op == BinaryOp::Equal || cond_op == BinaryOp::GreaterEqual || cond_op == BinaryOp::Greater); + CKW_ASSERT(cond_op == BinaryOp::Less || cond_op == BinaryOp::LessEqual || cond_op == BinaryOp::Equal || + cond_op == BinaryOp::GreaterEqual || cond_op == BinaryOp::Greater); - append_code( - "for (; ", var_view.scalar(0, 0).str, " ", cond_op_name, " ", cond_value_view.scalar(0, 0).str, "; ", - update_var_view.scalar(0, 0).str, " ", cl_get_assignment_op_as_string(update_op), " ", update_value_view.scalar(0, 0).str, ")\n{\n"); + append_code("for (; ", var_view.scalar(0, 0).str, " ", cond_op_name, " ", cond_value_view.scalar(0, 0).str, "; ", + update_var_view.scalar(0, 0).str, " ", cl_get_assignment_op_as_string(update_op), " ", + update_value_view.scalar(0, 0).str, ")\n{\n"); write_body(body); append_code("}\n"); } @@ -404,7 +427,7 @@ void CLKernelWriter::op_print(const std::string &prefix, const std::vector<TileO std::string format_code; std::string args_code; - for(auto &op : operands) + for (auto &op : operands) { const auto tile_view = to_cl_tile_view(op); @@ -416,12 +439,12 @@ void CLKernelWriter::op_print(const std::string &prefix, const std::vector<TileO // Construct the format specifier to print out one row of the tile. std::string row_format("%"); - if(width > 1) + if (width > 1) { row_format += "v" + std::to_string(width); } - switch(data_type) + switch (data_type) { case DataType::Fp32: row_format += "hlg"; @@ -452,7 +475,7 @@ void CLKernelWriter::op_print(const std::string &prefix, const std::vector<TileO CKW_THROW_MSG("Unsupported data type!"); } - if(width > 1) + if (width > 1) { row_format = "[" + row_format + "]"; } @@ -460,14 +483,14 @@ void CLKernelWriter::op_print(const std::string &prefix, const std::vector<TileO // Construct the format specifier for the printf statement. format_code += name + " = "; - if(height == 1) + if (height == 1) { format_code += row_format; } else { format_code += "[" + row_format; - for(int32_t row = 1; row < height; ++row) + for (int32_t row = 1; row < height; ++row) { format_code += ", " + row_format; } @@ -477,7 +500,7 @@ void CLKernelWriter::op_print(const std::string &prefix, const std::vector<TileO format_code += "\\n"; // Construct the variable arguments for the printf statement. - for(int32_t row = 0; row < height; ++row) + for (int32_t row = 0; row < height; ++row) { args_code += ", " + tile_view.vector(row).str; } @@ -527,19 +550,14 @@ TileOperand CLKernelWriter::declare_tile(const std::string &name, const TileInfo const int32_t width = tile_info.width(); const DataType data_type = tile_info.data_type(); - CKW_ASSERT_MSG( - std::find_if( - _tiles.begin(), _tiles.end(), - [=](const std::unique_ptr<CLTile> &e) - { - return e->name() == fullname; - }) - == _tiles.end(), - "There is already a tile with name: " + fullname); + CKW_ASSERT_MSG(std::find_if(_tiles.begin(), _tiles.end(), + [=](const std::unique_ptr<CLTile> &e) + { return e->name() == fullname; }) == _tiles.end(), + "There is already a tile with name: " + fullname); auto tile = std::make_unique<CLTile>(fullname, tile_info); - for(int32_t row = 0; row < height; ++row) + for (int32_t row = 0; row < height; ++row) { const std::string cl_type = cl_get_variable_datatype_as_string(data_type, width); append_code(cl_type, " ", tile->vector(row).str, ";\n"); @@ -578,40 +596,40 @@ TileView<CLTile> CLKernelWriter::to_cl_tile_view(const TileOperand &operand) con { bool found = false; - for(const auto &t : _tiles) + for (const auto &t : _tiles) { - if(&tile == t.get()) + if (&tile == t.get()) { found = true; break; } } - for(const auto &t : _constant_tiles) + for (const auto &t : _constant_tiles) { - if(&tile == t.get()) + if (&tile == t.get()) { found = true; break; } } - if(!found) + if (!found) { - for(const auto &t : _tensors) + for (const auto &t : _tensors) { const auto components = t->components(); - for(const auto component : components) + for (const auto component : components) { - if(&tile == &component->tile()) + if (&tile == &component->tile()) { found = true; break; } } - if(found) + if (found) { break; } @@ -622,66 +640,106 @@ TileView<CLTile> CLKernelWriter::to_cl_tile_view(const TileOperand &operand) con } #endif // COMPUTE_KERNEL_WRITER_ASSERTS_ENABLED - return { static_cast<CLTile &>(tile), area }; + return {static_cast<CLTile &>(tile), area}; } -void CLKernelWriter::op_load(const TileOperand &tile_op, const TensorOperand &tensor_op, TensorSampler &sampler, - const TileOperand &x, const TileOperand &y, const TileOperand &z, const TileOperand &batch) +void CLKernelWriter::op_load(const TileOperand &tile_op, + const TensorOperand &tensor_op, + TensorSampler &sampler, + const TileOperand &x, + const TileOperand &y, + const TileOperand &z, + const TileOperand &batch) { - const CLTile dilation_x({ { "1" } }, DataType::Int32); - const CLTile dilation_y({ { "1" } }, DataType::Int32); + const CLTile dilation_x({{"1"}}, DataType::Int32); + const CLTile dilation_y({{"1"}}, DataType::Int32); - op_load_store(MemoryOperation::Load, tile_op, tensor_op, sampler, x, y, z, batch, dilation_x, dilation_y, false /* indirect buffer */); + op_load_store(MemoryOperation::Load, tile_op, tensor_op, sampler, x, y, z, batch, dilation_x, dilation_y, + false /* indirect buffer */); } -void CLKernelWriter::op_load_dilated(const TileOperand &tile_op, const TensorOperand &tensor_op, TensorSampler &sampler, - const TileOperand &x, const TileOperand &y, const TileOperand &z, const TileOperand &batch, - const TileOperand &dilation_x, const TileOperand &dilation_y) +void CLKernelWriter::op_load_dilated(const TileOperand &tile_op, + const TensorOperand &tensor_op, + TensorSampler &sampler, + const TileOperand &x, + const TileOperand &y, + const TileOperand &z, + const TileOperand &batch, + const TileOperand &dilation_x, + const TileOperand &dilation_y) { const auto dil_x_view = to_cl_tile_view(dilation_x); const auto dil_y_view = to_cl_tile_view(dilation_y); - op_load_store(MemoryOperation::Load, tile_op, tensor_op, sampler, x, y, z, batch, dil_x_view, dil_y_view, false /* indirect buffer */); + op_load_store(MemoryOperation::Load, tile_op, tensor_op, sampler, x, y, z, batch, dil_x_view, dil_y_view, + false /* indirect buffer */); } -void CLKernelWriter::op_store(const TensorOperand &tensor_op, const TileOperand &tile_op, TensorSampler &sampler, - const TileOperand &x, const TileOperand &y, const TileOperand &z, const TileOperand &batch) +void CLKernelWriter::op_store(const TensorOperand &tensor_op, + const TileOperand &tile_op, + TensorSampler &sampler, + const TileOperand &x, + const TileOperand &y, + const TileOperand &z, + const TileOperand &batch) { - const CLTile dilation_x({ { "1" } }, DataType::Int32); - const CLTile dilation_y({ { "1" } }, DataType::Int32); + const CLTile dilation_x({{"1"}}, DataType::Int32); + const CLTile dilation_y({{"1"}}, DataType::Int32); - op_load_store(MemoryOperation::Store, tile_op, tensor_op, sampler, x, y, z, batch, dilation_x, dilation_y, false /* indirect buffer */); + op_load_store(MemoryOperation::Store, tile_op, tensor_op, sampler, x, y, z, batch, dilation_x, dilation_y, + false /* indirect buffer */); } -void CLKernelWriter::op_store_dilated(const TensorOperand &tensor_op, const TileOperand &tile_op, TensorSampler &sampler, - const TileOperand &x, const TileOperand &y, const TileOperand &z, const TileOperand &batch, - const TileOperand &dilation_x, const TileOperand &dilation_y) +void CLKernelWriter::op_store_dilated(const TensorOperand &tensor_op, + const TileOperand &tile_op, + TensorSampler &sampler, + const TileOperand &x, + const TileOperand &y, + const TileOperand &z, + const TileOperand &batch, + const TileOperand &dilation_x, + const TileOperand &dilation_y) { const auto dil_x_view = to_cl_tile_view(dilation_x); const auto dil_y_view = to_cl_tile_view(dilation_y); - op_load_store(MemoryOperation::Store, tile_op, tensor_op, sampler, x, y, z, batch, dil_x_view, dil_y_view, false /* indirect buffer */); + op_load_store(MemoryOperation::Store, tile_op, tensor_op, sampler, x, y, z, batch, dil_x_view, dil_y_view, + false /* indirect buffer */); } -void CLKernelWriter::op_load_indirect(const TileOperand &tile_op, const TensorOperand &tensor_op, TensorSampler &sampler, - const TileOperand &x, const TileOperand &y, const TileOperand &z, const TileOperand &batch) +void CLKernelWriter::op_load_indirect(const TileOperand &tile_op, + const TensorOperand &tensor_op, + TensorSampler &sampler, + const TileOperand &x, + const TileOperand &y, + const TileOperand &z, + const TileOperand &batch) { - const CLTile dilation_x({ { "1" } }, DataType::Int32); - const CLTile dilation_y({ { "1" } }, DataType::Int32); + const CLTile dilation_x({{"1"}}, DataType::Int32); + const CLTile dilation_y({{"1"}}, DataType::Int32); - op_load_store(MemoryOperation::Load, tile_op, tensor_op, sampler, x, y, z, batch, dilation_x, dilation_y, true /* indirect buffer */); + op_load_store(MemoryOperation::Load, tile_op, tensor_op, sampler, x, y, z, batch, dilation_x, dilation_y, + true /* indirect buffer */); } -void CLKernelWriter::op_load_store(MemoryOperation op, const TileOperand &tile_op, const TensorOperand &tensor_op, TensorSampler &sampler, - const TileOperand &x, const TileOperand &y, const TileOperand &z, const TileOperand &batch, - const TileView<CLTile> &dilation_x, const TileView<CLTile> &dilation_y, bool indirect_buffer) +void CLKernelWriter::op_load_store(MemoryOperation op, + const TileOperand &tile_op, + const TensorOperand &tensor_op, + TensorSampler &sampler, + const TileOperand &x, + const TileOperand &y, + const TileOperand &z, + const TileOperand &batch, + const TileView<CLTile> &dilation_x, + const TileView<CLTile> &dilation_y, + bool indirect_buffer) { CKW_UNUSED(dilation_x); CKW_ASSERT(dilation_x.is_scalar()); CKW_ASSERT(dilation_y.is_scalar()); CKW_ASSERT(dilation_x.scalar(0, 0).str == "((int)(1))"); // Dilation in x dimension is not implemented yet - if(indirect_buffer) + if (indirect_buffer) { CKW_ASSERT(dilation_y.scalar(0, 0).str == "((int)(1))" && dilation_x.scalar(0, 0).str == "((int)(1))"); } @@ -689,7 +747,7 @@ void CLKernelWriter::op_load_store(MemoryOperation op, const TileOperand &tile_o ITensor &tensor = get_tensor(tensor_op); std::unique_ptr<ICLMemoryOpHelper> helper; - switch(sampler.storage()) + switch (sampler.storage()) { case TensorStorageType::BufferUint8Ptr: helper = std::make_unique<CLMemoryOpBufferHelper>(this, &tensor, &sampler, op); @@ -717,13 +775,13 @@ void CLKernelWriter::op_load_store(MemoryOperation op, const TileOperand &tile_o helper->initialize(&tile, &x_tile, &z_tile, &batch_tile); - for(int row = 0; row < tile.info().height(); ++row) + for (int row = 0; row < tile.info().height(); ++row) { - if(!indirect_buffer) + if (!indirect_buffer) { std::string coord_y = y_tile.scalar(0, 0).str + " + " + std::to_string(row); - if(dilation_y.scalar(0, 0).str != "((int)(1))") + if (dilation_y.scalar(0, 0).str != "((int)(1))") { coord_y += " * " + dilation_y.scalar(0, 0).str; } diff --git a/compute_kernel_writer/src/cl/CLKernelWriter.h b/compute_kernel_writer/src/cl/CLKernelWriter.h index d7cf24d5e6..6485bae512 100644 --- a/compute_kernel_writer/src/cl/CLKernelWriter.h +++ b/compute_kernel_writer/src/cl/CLKernelWriter.h @@ -26,6 +26,7 @@ #define CKW_SRC_CL_CLKERNELWRITER_H #include "ckw/KernelWriter.h" + #include "src/TileView.h" #include <memory> @@ -73,7 +74,11 @@ public: void op_binary(const TileOperand &dst, BinaryOp op, const TileOperand &first, const TileOperand &second) override; - void op_ternary(const TileOperand &dst, TernaryOp op, const TileOperand &first, const TileOperand &second, const TileOperand &third) override; + void op_ternary(const TileOperand &dst, + TernaryOp op, + const TileOperand &first, + const TileOperand &second, + const TileOperand &third) override; // ============================================================================================= // Flow control @@ -81,14 +86,18 @@ public: void op_if(const TileOperand &lhs, BinaryOp op, const TileOperand &rhs, const std::function<void()> &body) override; - void op_else_if(const TileOperand &lhs, BinaryOp op, const TileOperand &rhs, const std::function<void()> &body) override; + void + op_else_if(const TileOperand &lhs, BinaryOp op, const TileOperand &rhs, const std::function<void()> &body) override; void op_else(const std::function<void()> &body) override; - void op_for_loop( - const TileOperand &var, BinaryOp cond_op, const TileOperand &cond_value, - const TileOperand &update_var, AssignmentOp update_op, const TileOperand &update_value, - const std::function<void()> &body) override; + void op_for_loop(const TileOperand &var, + BinaryOp cond_op, + const TileOperand &cond_value, + const TileOperand &update_var, + AssignmentOp update_op, + const TileOperand &update_value, + const std::function<void()> &body) override; void op_return() override; @@ -132,26 +141,49 @@ public: // Memory Operations // ============================================================================================= - void op_load( - const TileOperand &tile_op, const TensorOperand &tensor_op, TensorSampler &sampler, - const TileOperand &x, const TileOperand &y, const TileOperand &z, const TileOperand &batch) override; - - void op_load_dilated( - const TileOperand &tile_op, const TensorOperand &tensor_op, TensorSampler &sampler, - const TileOperand &x, const TileOperand &y, const TileOperand &z, const TileOperand &batch, - const TileOperand &dilation_x, const TileOperand &dilation_y) override; - - void op_store( - const TensorOperand &tensor_op, const TileOperand &tile_op, TensorSampler &sampler, - const TileOperand &x, const TileOperand &y, const TileOperand &z, const TileOperand &batch) override; - - void op_store_dilated( - const TensorOperand &tensor_op, const TileOperand &tile_op, TensorSampler &sampler, - const TileOperand &x, const TileOperand &y, const TileOperand &z, const TileOperand &batch, - const TileOperand &dilation_x, const TileOperand &dilation_y) override; - - void op_load_indirect(const TileOperand &tile_op, const TensorOperand &tensor_op, TensorSampler &sampler, - const TileOperand &x, const TileOperand &y, const TileOperand &z, const TileOperand &batch) override; + void op_load(const TileOperand &tile_op, + const TensorOperand &tensor_op, + TensorSampler &sampler, + const TileOperand &x, + const TileOperand &y, + const TileOperand &z, + const TileOperand &batch) override; + + void op_load_dilated(const TileOperand &tile_op, + const TensorOperand &tensor_op, + TensorSampler &sampler, + const TileOperand &x, + const TileOperand &y, + const TileOperand &z, + const TileOperand &batch, + const TileOperand &dilation_x, + const TileOperand &dilation_y) override; + + void op_store(const TensorOperand &tensor_op, + const TileOperand &tile_op, + TensorSampler &sampler, + const TileOperand &x, + const TileOperand &y, + const TileOperand &z, + const TileOperand &batch) override; + + void op_store_dilated(const TensorOperand &tensor_op, + const TileOperand &tile_op, + TensorSampler &sampler, + const TileOperand &x, + const TileOperand &y, + const TileOperand &z, + const TileOperand &batch, + const TileOperand &dilation_x, + const TileOperand &dilation_y) override; + + void op_load_indirect(const TileOperand &tile_op, + const TensorOperand &tensor_op, + TensorSampler &sampler, + const TileOperand &x, + const TileOperand &y, + const TileOperand &z, + const TileOperand &batch) override; protected: /** Return a tile view containing a reference to @ref CLTile object and the active area. @@ -181,9 +213,17 @@ protected: // For helper functions private: /** Helper method to consolidate all load/store logic in this class */ - void op_load_store(MemoryOperation op, const TileOperand &tile_op, const TensorOperand &tensor_op, TensorSampler &sampler, - const TileOperand &x, const TileOperand &y, const TileOperand &z, const TileOperand &batch, - const TileView<CLTile> &dilation_x, const TileView<CLTile> &dilation_y, bool indirect_buffer); + void op_load_store(MemoryOperation op, + const TileOperand &tile_op, + const TensorOperand &tensor_op, + TensorSampler &sampler, + const TileOperand &x, + const TileOperand &y, + const TileOperand &z, + const TileOperand &batch, + const TileView<CLTile> &dilation_x, + const TileView<CLTile> &dilation_y, + bool indirect_buffer); /** This function is the generic function to write both `if` and `else if` blocks. * @@ -195,7 +235,11 @@ private: * @param[in] body The function that writes the body of the else-if block. * @param[in] is_else_if True if this is an `else if` block, otherwise this is an `if` block. */ - void op_if_generic(const TileOperand &lhs, BinaryOp op, const TileOperand &rhs, const std::function<void()> &body, bool is_else_if); + void op_if_generic(const TileOperand &lhs, + BinaryOp op, + const TileOperand &rhs, + const std::function<void()> &body, + bool is_else_if); // For attributes private: diff --git a/compute_kernel_writer/src/cl/CLTensorArgument.cpp b/compute_kernel_writer/src/cl/CLTensorArgument.cpp index 7d4dc958df..e53de2830d 100644 --- a/compute_kernel_writer/src/cl/CLTensorArgument.cpp +++ b/compute_kernel_writer/src/cl/CLTensorArgument.cpp @@ -23,11 +23,13 @@ */ #include "src/cl/CLTensorArgument.h" + #include "ckw/Error.h" -#include "src/ITensorArgument.h" -#include "src/ITensorComponent.h" + #include "src/cl/CLHelpers.h" #include "src/cl/CLTensorComponent.h" +#include "src/ITensorArgument.h" +#include "src/ITensorComponent.h" #include "src/types/TensorComponentType.h" #include <algorithm> @@ -48,25 +50,23 @@ CLTensorComponent &CLTensorArgument::cl_component(TensorComponentType x) { // Return the component if it has already been created. { - const auto it = std::find_if( - _components_used.begin(), _components_used.end(), - [=](const std::unique_ptr<CLTensorComponent> &item) - { - return item->component_type() == x; - }); + const auto it = + std::find_if(_components_used.begin(), _components_used.end(), + [=](const std::unique_ptr<CLTensorComponent> &item) { return item->component_type() == x; }); - if(it != _components_used.end()) + if (it != _components_used.end()) { return **it; } } - if(_return_dims_by_value) + if (_return_dims_by_value) { uint32_t component_type = static_cast<uint32_t>(x); - const bool is_dimension = (component_type & static_cast<uint32_t>(TensorComponentBitmask::Dimension)) != 0; - const bool is_folded_dimensions = (component_type & static_cast<uint32_t>(TensorComponentBitmask::FoldedDimensions)) != 0; + const bool is_dimension = (component_type & static_cast<uint32_t>(TensorComponentBitmask::Dimension)) != 0; + const bool is_folded_dimensions = + (component_type & static_cast<uint32_t>(TensorComponentBitmask::FoldedDimensions)) != 0; constexpr auto bitmask_all = static_cast<uint32_t>(TensorComponentIndexBitmask::All); constexpr auto bitmask_index_0 = static_cast<uint32_t>(TensorComponentIndexBitmask::Index0); @@ -83,16 +83,16 @@ CLTensorComponent &CLTensorArgument::cl_component(TensorComponentType x) CKW_ASSERT(bitmask_index_2 == bitmask_index_3 >> 4); // If we have a dimension or folded dimensions, we can return the corresponding value if it is not dynamic (not equal to -1) - if(is_dimension == true || is_folded_dimensions == true) + if (is_dimension == true || is_folded_dimensions == true) { component_type = component_type & bitmask_all; int32_t idx = 1; - for(int32_t i = 0; i < tensor_component_index_max_count; ++i) + for (int32_t i = 0; i < tensor_component_index_max_count; ++i) { uint32_t dim_idx = component_type & bitmask_index_0; - if(dim_idx == 0) + if (dim_idx == 0) { // Stop at the first nibble containing 0 break; @@ -104,7 +104,7 @@ CLTensorComponent &CLTensorArgument::cl_component(TensorComponentType x) // Get the dimension value const int32_t dim_val = _info.shape()[dim_idx]; - if(dim_val == kDynamicTensorDimensionValue) + if (dim_val == kDynamicTensorDimensionValue) { // We cannot return the dimension by value if it is dynamic. // Therefore, force the idx variable to kDynamicTensorDimensionValue and break the loop. @@ -118,7 +118,7 @@ CLTensorComponent &CLTensorArgument::cl_component(TensorComponentType x) component_type >>= 4; } - if(idx != kDynamicTensorDimensionValue) + if (idx != kDynamicTensorDimensionValue) { _components_used.emplace_back(std::make_unique<CLTensorComponent>(*this, x, idx)); @@ -141,14 +141,10 @@ TensorStorageVariable &CLTensorArgument::storage(TensorStorageType x) { // Return the storage if it has already been created. { - const auto it = std::find_if( - _storages_used.begin(), _storages_used.end(), - [=](const TensorStorageVariable &item) - { - return item.type == x; - }); + const auto it = std::find_if(_storages_used.begin(), _storages_used.end(), + [=](const TensorStorageVariable &item) { return item.type == x; }); - if(it != _storages_used.end()) + if (it != _storages_used.end()) { return *it; } @@ -167,7 +163,7 @@ std::string CLTensorArgument::create_storage_name(TensorStorageType x) const { std::string var_name = _basename; - switch(x) + switch (x) { case TensorStorageType::BufferUint8Ptr: var_name += "_ptr"; @@ -198,9 +194,9 @@ std::vector<const ITensorComponent *> CLTensorArgument::components() const { std::vector<const ITensorComponent *> components; - for(const auto &component : _components_used) + for (const auto &component : _components_used) { - if(component->is_assignable()) + if (component->is_assignable()) { components.push_back(component.get()); } diff --git a/compute_kernel_writer/src/cl/CLTensorArgument.h b/compute_kernel_writer/src/cl/CLTensorArgument.h index 4cbbee21ee..35df51422e 100644 --- a/compute_kernel_writer/src/cl/CLTensorArgument.h +++ b/compute_kernel_writer/src/cl/CLTensorArgument.h @@ -26,7 +26,9 @@ #include "ckw/types/TensorComponentType.h" #include "ckw/types/TensorStorageType.h" + #include "src/ITensor.h" + #include <memory> #include <string> #include <vector> @@ -67,7 +69,7 @@ public: * unlike @ref CLTensorComponent::component which is for the public API and only returns * a reference to a generic @ref ITile object. */ - CLTensorComponent& cl_component(TensorComponentType component_type); + CLTensorComponent &cl_component(TensorComponentType component_type); // Inherited method overridden TensorStorageVariable &storage(TensorStorageType x) override; @@ -78,7 +80,7 @@ public: private: std::string create_storage_name(TensorStorageType x) const; - bool _return_dims_by_value{ false }; + bool _return_dims_by_value{false}; std::vector<TensorStorageVariable> _storages_used{}; std::vector<std::unique_ptr<CLTensorComponent>> _components_used{}; }; diff --git a/compute_kernel_writer/src/cl/CLTensorComponent.cpp b/compute_kernel_writer/src/cl/CLTensorComponent.cpp index c29b307748..dbe2036768 100644 --- a/compute_kernel_writer/src/cl/CLTensorComponent.cpp +++ b/compute_kernel_writer/src/cl/CLTensorComponent.cpp @@ -23,8 +23,10 @@ */ #include "src/cl/CLTensorComponent.h" + #include "ckw/Error.h" #include "ckw/types/TensorComponentType.h" + #include "src/cl/CLTensorArgument.h" #include "src/cl/CLTile.h" @@ -38,7 +40,7 @@ std::string create_component_name(const std::string &name, TensorComponentType x { std::string var_name(name); - switch(x) + switch (x) { case TensorComponentType::OffsetFirstElement: var_name += "_offset_first_element"; @@ -93,12 +95,13 @@ std::string create_component_name(const std::string &name, TensorComponentType x } // namespace CLTensorComponent::CLTensorComponent(const CLTensorArgument &tensor, TensorComponentType component_type) - : CLTile(create_component_name(tensor.name(), component_type), TileInfo(DataType::Int32)), _component_type(component_type) + : CLTile(create_component_name(tensor.name(), component_type), TileInfo(DataType::Int32)), + _component_type(component_type) { } CLTensorComponent::CLTensorComponent(const CLTensorArgument &tensor, TensorComponentType component_type, int32_t value) - : CLTile({ { std::to_string(value) } }, DataType::Int32), _component_type(component_type) + : CLTile({{std::to_string(value)}}, DataType::Int32), _component_type(component_type) { CKW_UNUSED(tensor); } diff --git a/compute_kernel_writer/src/cl/CLTensorComponent.h b/compute_kernel_writer/src/cl/CLTensorComponent.h index 42a42666dc..731597ebbf 100644 --- a/compute_kernel_writer/src/cl/CLTensorComponent.h +++ b/compute_kernel_writer/src/cl/CLTensorComponent.h @@ -26,8 +26,9 @@ #define CKW_SRC_CL_CLTENSORCOMPONENT_H #include "ckw/types/TensorComponentType.h" -#include "src/ITensorComponent.h" + #include "src/cl/CLTile.h" +#include "src/ITensorComponent.h" namespace ckw { @@ -72,7 +73,7 @@ public: TensorComponentType component_type() const override; private: - TensorComponentType _component_type{ TensorComponentType::Unknown }; + TensorComponentType _component_type{TensorComponentType::Unknown}; }; } // namespace ckw diff --git a/compute_kernel_writer/src/cl/CLTile.cpp b/compute_kernel_writer/src/cl/CLTile.cpp index 0cce69a9e1..f6e271e813 100644 --- a/compute_kernel_writer/src/cl/CLTile.cpp +++ b/compute_kernel_writer/src/cl/CLTile.cpp @@ -21,20 +21,20 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ +#include "src/cl/CLTile.h" + #include "ckw/Error.h" #include "ckw/TileInfo.h" -#include "src/Helpers.h" #include "src/cl/CLHelpers.h" -#include "src/cl/CLTile.h" +#include "src/Helpers.h" #include <algorithm> #include <vector> namespace ckw { -CLTile::CLTile(const std::string &name, const TileInfo &info) - : _is_constant(false) +CLTile::CLTile(const std::string &name, const TileInfo &info) : _is_constant(false) { validate_tile_info(info); @@ -42,8 +42,7 @@ CLTile::CLTile(const std::string &name, const TileInfo &info) _info = info; } -CLTile::CLTile(const TileContainer &vals, DataType dt) - : _is_constant(true) +CLTile::CLTile(const TileContainer &vals, DataType dt) : _is_constant(true) { const int32_t w = vals[0].size(); const int32_t h = vals.size(); @@ -56,9 +55,9 @@ CLTile::CLTile(const TileContainer &vals, DataType dt) _vals = TileContainer(h, std::vector<std::string>(w)); - for(int32_t y = 0; y < h; ++y) + for (int32_t y = 0; y < h; ++y) { - for(int32_t x = 0; x < w; ++x) + for (int32_t x = 0; x < w; ++x) { _vals[y][x] = vals[y][x]; } @@ -81,7 +80,7 @@ TileVariable CLTile::scalar(int32_t row, int32_t col) const col = clamp(col, static_cast<int32_t>(0), _info.width() - 1); row = clamp(row, static_cast<int32_t>(0), _info.height() - 1); - if(_is_constant) + if (_is_constant) { // We can use the vector method to retrieve the scalar variable stored in the constant tile return vector(row, col, 1); @@ -94,7 +93,7 @@ TileVariable CLTile::scalar(int32_t row, int32_t col) const t.desc.len = 1; // This check is required because if the width has only one element, we cannot use .s0 - if(_info.width() != 1) + if (_info.width() != 1) { // Automatic broadcasting t.str += ".s" + dec_to_hex_as_string(col); @@ -109,7 +108,7 @@ TileVariable CLTile::vector(int32_t row) const // Clamp to nearest valid edge row = clamp(row, static_cast<int32_t>(0), _info.height() - 1); - if(_is_constant) + if (_is_constant) { return vector(row, 0, _info.width()); } @@ -138,14 +137,14 @@ TileVariable CLTile::vector(int32_t row, int32_t col_start, int32_t width) const t.desc.dt = _info.data_type(); t.desc.len = width; - if(_is_constant) + if (_is_constant) { // The vector has the following form: ((data_typeN)(val0, val1,..., ValN-1)) t.str = "((" + cl_get_variable_datatype_as_string(t.desc.dt, width) + ")"; t.str += "("; int32_t col = col_start; - for(; col < width - 1; ++col) + for (; col < width - 1; ++col) { t.str += _vals[row][col]; t.str += ", "; @@ -157,10 +156,10 @@ TileVariable CLTile::vector(int32_t row, int32_t col_start, int32_t width) const { t.str = create_var_name(row); - if(_info.width() != 1 && _info.width() != width) + if (_info.width() != 1 && _info.width() != width) { t.str += ".s"; - for(int i = 0; i < width; ++i) + for (int i = 0; i < width; ++i) { t.str += dec_to_hex_as_string(col_start + i); } @@ -174,11 +173,11 @@ std::vector<TileVariable> CLTile::all() const { std::vector<TileVariable> vars; - if(_is_constant) + if (_is_constant) { - for(int32_t y = 0; y < _info.height(); ++y) + for (int32_t y = 0; y < _info.height(); ++y) { - for(int32_t x = 0; x < _info.width(); ++x) + for (int32_t x = 0; x < _info.width(); ++x) { // We can use the vector method to retrieve all the scalar variables stored in the constant tile TileVariable t = vector(y, x, 1); @@ -188,7 +187,7 @@ std::vector<TileVariable> CLTile::all() const } else { - for(int32_t y = 0; y < _info.height(); ++y) + for (int32_t y = 0; y < _info.height(); ++y) { TileVariable t; t.str = create_var_name(y); @@ -211,7 +210,7 @@ std::string CLTile::create_var_name(int32_t row) const std::string var_name = _basename; // If a scalar variable, we do not append the row index - if(_info.height() > 1) + if (_info.height() > 1) { var_name += "__"; var_name += std::to_string(row); @@ -222,7 +221,7 @@ std::string CLTile::create_var_name(int32_t row) const std::vector<int32_t> CLTile::supported_vector_lengths() const { - return std::vector<int32_t>{ 1, 2, 3, 4, 8, 16 }; + return std::vector<int32_t>{1, 2, 3, 4, 8, 16}; } void CLTile::validate_tile_info(const TileInfo &info) const diff --git a/compute_kernel_writer/src/cl/CLTile.h b/compute_kernel_writer/src/cl/CLTile.h index 1fb0fc9dbe..498cf51034 100644 --- a/compute_kernel_writer/src/cl/CLTile.h +++ b/compute_kernel_writer/src/cl/CLTile.h @@ -25,6 +25,7 @@ #define COMPUTE_KERNEL_WRITER_SRC_CL_CLTILE_H #include "src/ITile.h" + #include <string> namespace ckw @@ -75,9 +76,9 @@ private: std::string create_var_name(int32_t row) const; - TileInfo _info{ DataType::Unknown }; - std::string _basename{ "" }; - bool _is_constant{ false }; + TileInfo _info{DataType::Unknown}; + std::string _basename{""}; + bool _is_constant{false}; TileContainer _vals{}; }; } // namespace ckw diff --git a/compute_kernel_writer/src/cl/helpers/CLMemoryOpBufferHelper.cpp b/compute_kernel_writer/src/cl/helpers/CLMemoryOpBufferHelper.cpp index f906bcd4b1..a98ebed8fa 100644 --- a/compute_kernel_writer/src/cl/helpers/CLMemoryOpBufferHelper.cpp +++ b/compute_kernel_writer/src/cl/helpers/CLMemoryOpBufferHelper.cpp @@ -28,20 +28,25 @@ #include "ckw/types/MemoryOperation.h" #include "ckw/types/TensorStorageType.h" -#include "src/ITensor.h" -#include "src/Tensor3dMapper.h" #include "src/cl/CLHelpers.h" #include "src/cl/CLKernelWriter.h" #include "src/cl/CLTensorArgument.h" #include "src/cl/CLTile.h" +#include "src/ITensor.h" +#include "src/Tensor3dMapper.h" namespace ckw { -bool CLMemoryOpBufferHelper::validate(const CLKernelWriter *writer, const ITensor *tensor, const TensorSampler *sampler, const Tensor3dMapper *mapper, MemoryOperation op, const CLTile *dst) +bool CLMemoryOpBufferHelper::validate(const CLKernelWriter *writer, + const ITensor *tensor, + const TensorSampler *sampler, + const Tensor3dMapper *mapper, + MemoryOperation op, + const CLTile *dst) { CKW_UNUSED(writer, tensor, mapper, op, dst); - if(sampler->storage() != TensorStorageType::BufferUint8Ptr) + if (sampler->storage() != TensorStorageType::BufferUint8Ptr) { return false; } @@ -97,15 +102,15 @@ bool CLMemoryOpBufferHelper::validate(const CLKernelWriter *writer, const ITenso */ void CLMemoryOpBufferHelper::initialize(const CLTile *dst, const CLTile *x, const CLTile *z, const CLTile *b) { - _dst = dst; + _dst = dst; CKW_ASSERT(validate(_writer, _tensor, _sampler, _mapper.get(), _op, _dst)); _ls_width_full = dst->info().width(); - _coord_x = x->scalar(0, 0).str; - _coord_z = z->scalar(0, 0).str; - _coord_b = b->scalar(0, 0).str; - _coord_orig_z = _coord_z; + _coord_x = x->scalar(0, 0).str; + _coord_z = z->scalar(0, 0).str; + _coord_b = b->scalar(0, 0).str; + _coord_orig_z = _coord_z; out_of_bound_initialize_x(_coord_x); out_of_bound_initialize_z(_coord_z); @@ -126,10 +131,10 @@ void CLMemoryOpBufferHelper::write_row(int32_t row_id, const std::string &coord_ out_of_bound_finalize_y(dst); // The left over load/store will be written in the finalize stage - if(_ls_width_part.size() != 0) + if (_ls_width_part.size() != 0) { int32_t col_start = 0; - for(int32_t partial_width : _ls_width_part) + for (int32_t partial_width : _ls_width_part) { const std::string dst = _dst->vector(row_id, col_start, partial_width).str; const std::string coord_x = _coord_x + " + " + std::to_string(col_start); @@ -150,13 +155,13 @@ void CLMemoryOpBufferHelper::finalize() void CLMemoryOpBufferHelper::out_of_bound_initialize_x(const std::string &coord) { - if(_sampler->address_mode_x() == TensorSamplerAddressModeX::OverlappingMin) + if (_sampler->address_mode_x() == TensorSamplerAddressModeX::OverlappingMin) { - TensorInfo tensor_info = _tensor->info(); - TensorShape shape = tensor_info.shape(); + TensorInfo tensor_info = _tensor->info(); + TensorShape shape = tensor_info.shape(); _ls_width_part = cl_decompose_vector_width(shape[0] % _ls_width_full); - if(_ls_width_part.size() != 0) + if (_ls_width_part.size() != 0) { _writer->op_write_raw_code("if(" + coord + " > 0)\n{\n"); } @@ -165,14 +170,14 @@ void CLMemoryOpBufferHelper::out_of_bound_initialize_x(const std::string &coord) void CLMemoryOpBufferHelper::out_of_bound_finalize_x() { - if(_sampler->address_mode_x() == TensorSamplerAddressModeX::OverlappingMin) + if (_sampler->address_mode_x() == TensorSamplerAddressModeX::OverlappingMin) { - if(_ls_width_part.size() != 0) + if (_ls_width_part.size() != 0) { _writer->op_write_raw_code("}\nelse\n{\n"); out_of_bound_initialize_z(_coord_orig_z); - for(LeftoverDescriptor leftover_desc : _leftovers_x) + for (LeftoverDescriptor leftover_desc : _leftovers_x) { out_of_bound_initialize_y(leftover_desc.coord); _writer->op_write_raw_code(leftover_desc.statement); @@ -191,7 +196,7 @@ void CLMemoryOpBufferHelper::out_of_bound_initialize_y(const std::string &coord) const TensorSamplerAddressModeY address_mode_y = _sampler->address_mode_y(); - switch(address_mode_y) + switch (address_mode_y) { case TensorSamplerAddressModeY::ClampToBorderMaxOnly: // Not to be moved outside the case because it marks the relevant tensor component as used even if we dont't use the variable @@ -212,7 +217,7 @@ void CLMemoryOpBufferHelper::out_of_bound_finalize_y(const std::string &dst) { const TensorSamplerAddressModeY address_mode_y = _sampler->address_mode_y(); - switch(address_mode_y) + switch (address_mode_y) { case TensorSamplerAddressModeY::ClampToBorderMaxOnly: _writer->op_write_raw_code("}\nelse\n{\n"); @@ -234,7 +239,7 @@ void CLMemoryOpBufferHelper::out_of_bound_initialize_z(const std::string &coord) CKW_UNUSED(coord); const TensorSamplerAddressModeZ address_mode_z = _sampler->address_mode_z(); - switch(address_mode_z) + switch (address_mode_z) { case TensorSamplerAddressModeZ::None: break; @@ -247,7 +252,7 @@ void CLMemoryOpBufferHelper::out_of_bound_finalize_z() { const TensorSamplerAddressModeZ address_mode_z = _sampler->address_mode_z(); - switch(address_mode_z) + switch (address_mode_z) { case TensorSamplerAddressModeZ::None: break; @@ -256,13 +261,15 @@ void CLMemoryOpBufferHelper::out_of_bound_finalize_z() } } -std::string CLMemoryOpBufferHelper::to_statement(MemoryOperation op, int32_t vector_width, const std::string &data, - const std::string &address) const +std::string CLMemoryOpBufferHelper::to_statement(MemoryOperation op, + int32_t vector_width, + const std::string &data, + const std::string &address) const { - switch(op) + switch (op) { case MemoryOperation::Load: - if(vector_width != 1) + if (vector_width != 1) { return data + " = vload" + std::to_string(vector_width) + "(0, " + address + ")"; } @@ -272,7 +279,7 @@ std::string CLMemoryOpBufferHelper::to_statement(MemoryOperation op, int32_t vec } break; case MemoryOperation::Store: - if(vector_width != 1) + if (vector_width != 1) { return "vstore" + std::to_string(vector_width) + "(" + data + ", 0, " + address + ")"; } @@ -288,26 +295,28 @@ std::string CLMemoryOpBufferHelper::to_statement(MemoryOperation op, int32_t vec return ""; } -std::string CLMemoryOpBufferHelper::to_buffer_address(const std::string &x, const std::string &y, const std::string &z, - const std::string &b) const +std::string CLMemoryOpBufferHelper::to_buffer_address(const std::string &x, + const std::string &y, + const std::string &z, + const std::string &b) const { TensorStorageType tensor_storage = _sampler->storage(); CKW_ASSERT(tensor_storage == TensorStorageType::BufferUint8Ptr); - const std::string ptr_buf = _tensor->storage(tensor_storage).val; - const std::string dst_type = cl_data_type_rounded_up_to_valid_vector_width(_dst->info().data_type(), 1); + const std::string ptr_buf = _tensor->storage(tensor_storage).val; + const std::string dst_type = cl_data_type_rounded_up_to_valid_vector_width(_dst->info().data_type(), 1); std::string address; address += "(__global "; address += dst_type; address += "*)("; address += ptr_buf; - if(x != "0" && (_mapper->dim_x().str != "1")) + if (x != "0" && (_mapper->dim_x().str != "1")) { address += " + ("; address += x + ") * sizeof(" + dst_type + ")"; } - if(y != "0") + if (y != "0") { const std::string stride_y = _mapper->stride_y().str; address += " + ("; @@ -315,7 +324,7 @@ std::string CLMemoryOpBufferHelper::to_buffer_address(const std::string &x, cons address += " * "; address += stride_y; } - if(z != "0" && (_mapper->dim_z().str != "1")) + if (z != "0" && (_mapper->dim_z().str != "1")) { const std::string stride_z = _mapper->stride_z().str; address += " + ("; @@ -323,7 +332,7 @@ std::string CLMemoryOpBufferHelper::to_buffer_address(const std::string &x, cons address += " * "; address += stride_z; } - if(b != "0" && (_mapper->dim_batch().str != "1")) + if (b != "0" && (_mapper->dim_batch().str != "1")) { const std::string stride_b = _mapper->stride_batch().str; address += " + ("; diff --git a/compute_kernel_writer/src/cl/helpers/CLMemoryOpBufferHelper.h b/compute_kernel_writer/src/cl/helpers/CLMemoryOpBufferHelper.h index 9bcd571a81..4e1a842fe1 100644 --- a/compute_kernel_writer/src/cl/helpers/CLMemoryOpBufferHelper.h +++ b/compute_kernel_writer/src/cl/helpers/CLMemoryOpBufferHelper.h @@ -27,9 +27,9 @@ #include "src/cl/helpers/ICLMemoryOpHelper.h" +#include <cstdint> #include <string> #include <vector> -#include <cstdint> namespace ckw { @@ -65,20 +65,25 @@ private: struct LeftoverDescriptor { LeftoverDescriptor(const std::string &dst, const std::string &coord, const std::string &statement) - : dst(dst), coord(coord), statement(statement) + : dst(dst), coord(coord), statement(statement) { } - std::string dst{}; // Describes the destination tile or part of it - std::string coord{}; // Describes the coordinate to be used in boundary checks - std::string statement{}; // Describes the memory operation statement + std::string dst{}; // Describes the destination tile or part of it + std::string coord{}; // Describes the coordinate to be used in boundary checks + std::string statement{}; // Describes the memory operation statement }; std::vector<int32_t> _ls_width_part{}; std::vector<LeftoverDescriptor> _leftovers_x{}; std::string _coord_orig_z{}; - static bool validate(const CLKernelWriter *writer, const ITensor *tensor, const TensorSampler *sampler, const Tensor3dMapper *mapper, MemoryOperation op, const CLTile *dst); + static bool validate(const CLKernelWriter *writer, + const ITensor *tensor, + const TensorSampler *sampler, + const Tensor3dMapper *mapper, + MemoryOperation op, + const CLTile *dst); void out_of_bound_initialize_x(const std::string &coord); void out_of_bound_finalize_x(); @@ -87,8 +92,10 @@ private: void out_of_bound_initialize_z(const std::string &coord); void out_of_bound_finalize_z(); - std::string to_statement(MemoryOperation op, int32_t vector_width, const std::string &data, const std::string &address) const; - std::string to_buffer_address(const std::string &x, const std::string &y, const std::string &z, const std::string &b) const; + std::string + to_statement(MemoryOperation op, int32_t vector_width, const std::string &data, const std::string &address) const; + std::string + to_buffer_address(const std::string &x, const std::string &y, const std::string &z, const std::string &b) const; }; } // namespace ckw diff --git a/compute_kernel_writer/src/cl/helpers/CLMemoryOpImage2dHelper.cpp b/compute_kernel_writer/src/cl/helpers/CLMemoryOpImage2dHelper.cpp index 55f88f4136..b7d146bdee 100644 --- a/compute_kernel_writer/src/cl/helpers/CLMemoryOpImage2dHelper.cpp +++ b/compute_kernel_writer/src/cl/helpers/CLMemoryOpImage2dHelper.cpp @@ -28,11 +28,11 @@ #include "ckw/types/MemoryOperation.h" #include "ckw/types/TensorStorageType.h" -#include "src/ITensor.h" -#include "src/Tensor3dMapper.h" #include "src/cl/CLKernelWriter.h" #include "src/cl/CLTensorArgument.h" #include "src/cl/CLTile.h" +#include "src/ITensor.h" +#include "src/Tensor3dMapper.h" namespace ckw { @@ -66,31 +66,36 @@ void CLMemoryOpImage2dHelper::finalize() { } -bool CLMemoryOpImage2dHelper::validate(const CLKernelWriter *writer, const ITensor *tensor, const TensorSampler *sampler, const Tensor3dMapper *mapper, MemoryOperation op, const CLTile *dst) +bool CLMemoryOpImage2dHelper::validate(const CLKernelWriter *writer, + const ITensor *tensor, + const TensorSampler *sampler, + const Tensor3dMapper *mapper, + MemoryOperation op, + const CLTile *dst) { CKW_UNUSED(writer, tensor, mapper); - if(dst->info().width() != 4) + if (dst->info().width() != 4) { return false; } - if(sampler->address_mode_x() != TensorSamplerAddressModeX::None) + if (sampler->address_mode_x() != TensorSamplerAddressModeX::None) { return false; } - if(sampler->address_mode_z() != TensorSamplerAddressModeZ::None) + if (sampler->address_mode_z() != TensorSamplerAddressModeZ::None) { return false; } - if(sampler->storage() != TensorStorageType::Texture2dReadOnly && op == MemoryOperation::Load) + if (sampler->storage() != TensorStorageType::Texture2dReadOnly && op == MemoryOperation::Load) { return false; } - if(sampler->storage() != TensorStorageType::Texture2dWriteOnly && op == MemoryOperation::Store) + if (sampler->storage() != TensorStorageType::Texture2dWriteOnly && op == MemoryOperation::Store) { return false; } - if((dst->info().data_type() != DataType::Fp32) && (dst->info().data_type() != DataType::Fp16)) + if ((dst->info().data_type() != DataType::Fp32) && (dst->info().data_type() != DataType::Fp16)) { return false; } @@ -102,7 +107,7 @@ void CLMemoryOpImage2dHelper::out_of_bound_initialize_y(const std::string &coord CKW_UNUSED(coord); const TensorSamplerAddressModeY address_mode_y = _sampler->address_mode_y(); - switch(address_mode_y) + switch (address_mode_y) { case TensorSamplerAddressModeY::SkipLessThanZero: _writer->op_write_raw_code("if(" + coord + " >= 0)\n{\n"); @@ -118,7 +123,7 @@ void CLMemoryOpImage2dHelper::out_of_bound_initialize_y(const std::string &coord void CLMemoryOpImage2dHelper::out_of_bound_finalize_y() { const TensorSamplerAddressModeY address_mode_y = _sampler->address_mode_y(); - switch(address_mode_y) + switch (address_mode_y) { case TensorSamplerAddressModeY::SkipLessThanZero: _writer->op_write_raw_code("}\n"); @@ -131,15 +136,19 @@ void CLMemoryOpImage2dHelper::out_of_bound_finalize_y() } } -std::string CLMemoryOpImage2dHelper::to_ls_image2d(MemoryOperation op, int32_t vector_width, const std::string &data, const std::string &sampler, const std::string &address) const +std::string CLMemoryOpImage2dHelper::to_ls_image2d(MemoryOperation op, + int32_t vector_width, + const std::string &data, + const std::string &sampler, + const std::string &address) const { CKW_UNUSED(vector_width); const TensorStorageType tensor_storage = _sampler->storage(); - const std::string image2d_obj = _tensor->storage(tensor_storage).val; - const std::string post_fix = _dst->info().data_type() == DataType::Fp32 ? "f" : "h"; + const std::string image2d_obj = _tensor->storage(tensor_storage).val; + const std::string post_fix = _dst->info().data_type() == DataType::Fp32 ? "f" : "h"; - switch(op) + switch (op) { case MemoryOperation::Load: return data + " = read_image" + post_fix + "(" + image2d_obj + ", " + sampler + ", " + address + ")"; @@ -155,7 +164,7 @@ std::string CLMemoryOpImage2dHelper::to_ls_image2d_sampler() const { const auto address_mode_y = _sampler->address_mode_y(); - switch(address_mode_y) + switch (address_mode_y) { case TensorSamplerAddressModeY::None: return "CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_NONE | CLK_FILTER_NEAREST"; @@ -167,17 +176,19 @@ std::string CLMemoryOpImage2dHelper::to_ls_image2d_sampler() const } } -std::string CLMemoryOpImage2dHelper::to_ls_image2d_address(const std::string &x, const std::string &y, const std::string &z, +std::string CLMemoryOpImage2dHelper::to_ls_image2d_address(const std::string &x, + const std::string &y, + const std::string &z, const std::string &b) const { std::string coord_x = "(" + x + ") >> 2"; std::string coord_y = "("; - if(y != "0") + if (y != "0") { coord_y += y; } - if(z != "0" && (_mapper->dim_z().str != "1")) + if (z != "0" && (_mapper->dim_z().str != "1")) { const std::string dim = _mapper->dim_y().str; coord_y += " + ("; @@ -185,7 +196,7 @@ std::string CLMemoryOpImage2dHelper::to_ls_image2d_address(const std::string &x, coord_y += " * "; coord_y += dim; } - if(b != "0" && (_mapper->dim_batch().str != "1")) + if (b != "0" && (_mapper->dim_batch().str != "1")) { const std::string dim0 = _mapper->dim_y().str; const std::string dim1 = _mapper->dim_z().str; diff --git a/compute_kernel_writer/src/cl/helpers/CLMemoryOpImage2dHelper.h b/compute_kernel_writer/src/cl/helpers/CLMemoryOpImage2dHelper.h index 73bede7789..fd9b097a24 100644 --- a/compute_kernel_writer/src/cl/helpers/CLMemoryOpImage2dHelper.h +++ b/compute_kernel_writer/src/cl/helpers/CLMemoryOpImage2dHelper.h @@ -59,14 +59,24 @@ public: void finalize() override; private: - static bool validate(const CLKernelWriter *writer, const ITensor *tensor, const TensorSampler *sampler, const Tensor3dMapper *mapper, MemoryOperation op, const CLTile *dst); + static bool validate(const CLKernelWriter *writer, + const ITensor *tensor, + const TensorSampler *sampler, + const Tensor3dMapper *mapper, + MemoryOperation op, + const CLTile *dst); void out_of_bound_initialize_y(const std::string &coord); void out_of_bound_finalize_y(); - std::string to_ls_image2d(MemoryOperation op, int32_t vector_width, const std::string &data, const std::string &sampler, const std::string &address) const; + std::string to_ls_image2d(MemoryOperation op, + int32_t vector_width, + const std::string &data, + const std::string &sampler, + const std::string &address) const; std::string to_ls_image2d_sampler() const; - std::string to_ls_image2d_address(const std::string &x, const std::string &y, const std::string &z, const std::string &b) const; + std::string + to_ls_image2d_address(const std::string &x, const std::string &y, const std::string &z, const std::string &b) const; }; } // namespace ckw diff --git a/compute_kernel_writer/src/cl/helpers/ICLMemoryOpHelper.h b/compute_kernel_writer/src/cl/helpers/ICLMemoryOpHelper.h index 7f363431e8..f46fee9750 100644 --- a/compute_kernel_writer/src/cl/helpers/ICLMemoryOpHelper.h +++ b/compute_kernel_writer/src/cl/helpers/ICLMemoryOpHelper.h @@ -26,6 +26,7 @@ #define CKW_SRC_CL_HELPERS_ICLMEMORYOPHELPER_H #include "ckw/TensorSampler.h" + #include "src/Tensor3dMapper.h" #include <cstdint> @@ -98,16 +99,16 @@ public: virtual void finalize() = 0; protected: - CLKernelWriter *_writer{ nullptr }; - ITensor *_tensor{ nullptr }; - TensorSampler *_sampler{ nullptr }; - MemoryOperation _op; - std::unique_ptr<Tensor3dMapper> _mapper{ nullptr }; - const CLTile *_dst{ nullptr }; - int32_t _ls_width_full{ 0 }; - std::string _coord_x{}; - std::string _coord_z{}; - std::string _coord_b{}; + CLKernelWriter *_writer{nullptr}; + ITensor *_tensor{nullptr}; + TensorSampler *_sampler{nullptr}; + MemoryOperation _op; + std::unique_ptr<Tensor3dMapper> _mapper{nullptr}; + const CLTile *_dst{nullptr}; + int32_t _ls_width_full{0}; + std::string _coord_x{}; + std::string _coord_z{}; + std::string _coord_b{}; }; } // namespace ckw |