diff options
author | Gunes Bayir <gunes.bayir@arm.com> | 2024-01-17 16:07:03 +0000 |
---|---|---|
committer | Viet-Hoa Do <viet-hoa.do@arm.com> | 2024-02-01 16:00:34 +0000 |
commit | 2b9fa593a0a172bf36a02b5cdb840c6b9b361d7c (patch) | |
tree | a4e2d5ce46443a79a0778e4960462ce3edf106ec /compute_kernel_writer/src | |
parent | 7ab7fca87cca8775f82b0e9efec6a40975910c17 (diff) | |
download | ComputeLibrary-2b9fa593a0a172bf36a02b5cdb840c6b9b361d7c.tar.gz |
Use the stable CKW API in the GPU dynamic fusion backend
- Refactor all kernels to work with the CKW stable API
- Add support for sub-tile in the op_load/op_store CKW operator
- Fix mismatch in resize
- Add comments in all kernels written with CKW to help developers
understand the structure of the code
- Add texture image support in depthwise convolution written with CKW
- Add support for different block sizes in depthwise convolution
- Remove the use of the dynamic fusion helper functions.
- Add support for floor in the op_unary() of CKW
Resolves: COMPMID-6708, COMPMID-6743, COMPMID-6530
Signed-off-by: Gian Marco Iodice <gianmarco.iodice@arm.com>
Signed-off-by: Gunes Bayir <gunes.bayir@arm.com>
Signed-off-by: Viet-Hoa Do <viet-hoa.do@arm.com>
Signed-off-by: Jakub Sujak <jakub.sujak@arm.com>
Change-Id: I8104ce4d04a3138a1aeb0b84940e1f1c89e76069
Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/10914
Tested-by: Arm Jenkins <bsgcomp@arm.com>
Reviewed-by: Jakub Sujak <jakub.sujak@arm.com>
Reviewed-by: Gunes Bayir <gunes.bayir@arm.com>
Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
Benchmark: Arm Jenkins <bsgcomp@arm.com>
Diffstat (limited to 'compute_kernel_writer/src')
-rw-r--r-- | compute_kernel_writer/src/KernelWriter.cpp | 3 | ||||
-rw-r--r-- | compute_kernel_writer/src/TensorOperand.cpp | 58 | ||||
-rw-r--r-- | compute_kernel_writer/src/TensorSampler.cpp | 2 | ||||
-rw-r--r-- | compute_kernel_writer/src/TileOperand.cpp | 14 | ||||
-rw-r--r-- | compute_kernel_writer/src/TileView.h | 20 | ||||
-rw-r--r-- | compute_kernel_writer/src/cl/CLHelpers.cpp | 3 | ||||
-rw-r--r-- | compute_kernel_writer/src/cl/CLKernelWriter.cpp | 88 | ||||
-rw-r--r-- | compute_kernel_writer/src/cl/CLTensorArgument.h | 3 | ||||
-rw-r--r-- | compute_kernel_writer/src/cl/helpers/CLMemoryOpBufferHelper.cpp | 43 | ||||
-rw-r--r-- | compute_kernel_writer/src/cl/helpers/CLMemoryOpBufferHelper.h | 34 | ||||
-rw-r--r-- | compute_kernel_writer/src/cl/helpers/CLMemoryOpImage2dHelper.cpp | 35 | ||||
-rw-r--r-- | compute_kernel_writer/src/cl/helpers/CLMemoryOpImage2dHelper.h | 28 | ||||
-rw-r--r-- | compute_kernel_writer/src/cl/helpers/ICLMemoryOpHelper.h | 24 | ||||
-rw-r--r-- | compute_kernel_writer/src/types/ConstantData.cpp | 28 |
14 files changed, 262 insertions, 121 deletions
diff --git a/compute_kernel_writer/src/KernelWriter.cpp b/compute_kernel_writer/src/KernelWriter.cpp index a478231c09..92a36746ce 100644 --- a/compute_kernel_writer/src/KernelWriter.cpp +++ b/compute_kernel_writer/src/KernelWriter.cpp @@ -107,7 +107,8 @@ TensorOperand KernelWriter::create_tensor_operand(ITensor &tensor) ITensor &KernelWriter::get_tensor(const TensorOperand &operand) { - return operand._tensor; + CKW_ASSERT(operand._tensor != nullptr); + return *operand._tensor; } const std::vector<std::vector<std::string>> &KernelWriter::get_values(const ConstantData &data) diff --git a/compute_kernel_writer/src/TensorOperand.cpp b/compute_kernel_writer/src/TensorOperand.cpp index bf11d0d332..94997537d8 100644 --- a/compute_kernel_writer/src/TensorOperand.cpp +++ b/compute_kernel_writer/src/TensorOperand.cpp @@ -21,91 +21,115 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ - #include "ckw/TensorOperand.h" +#include "ckw/Error.h" + #include "src/ITensor.h" namespace ckw { -TensorOperand::TensorOperand(ITensor &tensor) : _tensor(tensor) +TensorOperand::TensorOperand() : _tensor(nullptr) +{ +} + +TensorOperand::TensorOperand(ITensor &tensor) : _tensor(&tensor) +{ +} + +bool TensorOperand::is_valid() const { + return _tensor != nullptr; } const TensorInfo &TensorOperand::info() const { - return _tensor.info(); + CKW_ASSERT(is_valid() == true); + return _tensor->info(); } TileOperand TensorOperand::stride0() { - return TileOperand(_tensor.component(TensorComponentType::Stride0)); + CKW_ASSERT(is_valid() == true); + return TileOperand(_tensor->component(TensorComponentType::Stride0)); } TileOperand TensorOperand::stride1() { - return TileOperand(_tensor.component(TensorComponentType::Stride1)); + CKW_ASSERT(is_valid() == true); + return TileOperand(_tensor->component(TensorComponentType::Stride1)); } TileOperand TensorOperand::stride2() { - return TileOperand(_tensor.component(TensorComponentType::Stride2)); + CKW_ASSERT(is_valid() == true); + return TileOperand(_tensor->component(TensorComponentType::Stride2)); } TileOperand TensorOperand::stride3() { - return TileOperand(_tensor.component(TensorComponentType::Stride3)); + CKW_ASSERT(is_valid() == true); + return TileOperand(_tensor->component(TensorComponentType::Stride3)); } TileOperand TensorOperand::stride4() { - return TileOperand(_tensor.component(TensorComponentType::Stride4)); + CKW_ASSERT(is_valid() == true); + return TileOperand(_tensor->component(TensorComponentType::Stride4)); } TileOperand TensorOperand::dim0() { - return TileOperand(_tensor.component(TensorComponentType::Dim0)); + return TileOperand(_tensor->component(TensorComponentType::Dim0)); } TileOperand TensorOperand::dim1() { - return TileOperand(_tensor.component(TensorComponentType::Dim1)); + CKW_ASSERT(is_valid() == true); + return TileOperand(_tensor->component(TensorComponentType::Dim1)); } TileOperand TensorOperand::dim2() { - return TileOperand(_tensor.component(TensorComponentType::Dim2)); + CKW_ASSERT(is_valid() == true); + return TileOperand(_tensor->component(TensorComponentType::Dim2)); } TileOperand TensorOperand::dim3() { - return TileOperand(_tensor.component(TensorComponentType::Dim3)); + CKW_ASSERT(is_valid() == true); + return TileOperand(_tensor->component(TensorComponentType::Dim3)); } TileOperand TensorOperand::dim4() { - return TileOperand(_tensor.component(TensorComponentType::Dim4)); + CKW_ASSERT(is_valid() == true); + return TileOperand(_tensor->component(TensorComponentType::Dim4)); } TileOperand TensorOperand::dim1_dim2() { - return TileOperand(_tensor.component(TensorComponentType::Dim1xDim2)); + CKW_ASSERT(is_valid() == true); + return TileOperand(_tensor->component(TensorComponentType::Dim1xDim2)); } TileOperand TensorOperand::dim1_dim2_dim3() { - return TileOperand(_tensor.component(TensorComponentType::Dim1xDim2xDim3)); + CKW_ASSERT(is_valid() == true); + return TileOperand(_tensor->component(TensorComponentType::Dim1xDim2xDim3)); } TileOperand TensorOperand::dim2_dim3() { - return TileOperand(_tensor.component(TensorComponentType::Dim2xDim3)); + CKW_ASSERT(is_valid() == true); + return TileOperand(_tensor->component(TensorComponentType::Dim2xDim3)); } TileOperand TensorOperand::offset_first_element_in_bytes() { - return TileOperand(_tensor.component(TensorComponentType::OffsetFirstElement)); + CKW_ASSERT(is_valid() == true); + return TileOperand(_tensor->component(TensorComponentType::OffsetFirstElement)); } } // namespace ckw diff --git a/compute_kernel_writer/src/TensorSampler.cpp b/compute_kernel_writer/src/TensorSampler.cpp index 91d5af2fd0..e81c5f9d66 100644 --- a/compute_kernel_writer/src/TensorSampler.cpp +++ b/compute_kernel_writer/src/TensorSampler.cpp @@ -27,6 +27,8 @@ namespace ckw { +TensorSampler::TensorSampler() = default; + TensorSampler::TensorSampler(TensorStorageType storage, TensorSamplerFormat format, TensorSamplerAddressModeX address_mode_x, diff --git a/compute_kernel_writer/src/TileOperand.cpp b/compute_kernel_writer/src/TileOperand.cpp index 865ef85a13..8ced6cfe3f 100644 --- a/compute_kernel_writer/src/TileOperand.cpp +++ b/compute_kernel_writer/src/TileOperand.cpp @@ -31,6 +31,10 @@ namespace ckw { +TileOperand::TileOperand() : _tile(nullptr), _row_start(0), _row_end(0), _col_start(0), _col_end(0) +{ +} + TileOperand::TileOperand(ITile &tile) : _tile(&tile), _row_start(0), _row_end(tile.info().height()), _col_start(0), _col_end(tile.info().width()) { @@ -46,6 +50,16 @@ TileOperand::TileOperand( CKW_ASSERT(col_end > col_start && col_end <= _tile->info().width()); } +bool TileOperand::is_valid() const +{ + return _tile != nullptr; +} + +const TileInfo &TileOperand::tile_info() const +{ + return _tile->info(); +} + TileOperand TileOperand::tile(int32_t row_start, int32_t row_end, int32_t col_start, int32_t col_end) const { CKW_ASSERT(row_start >= 0 && _row_start + row_start < _row_end); diff --git a/compute_kernel_writer/src/TileView.h b/compute_kernel_writer/src/TileView.h index 50ae66b389..42854ac823 100644 --- a/compute_kernel_writer/src/TileView.h +++ b/compute_kernel_writer/src/TileView.h @@ -78,6 +78,10 @@ template <typename T> class TileView { public: + /** Default constructor */ + TileView() : _tile(nullptr), _area(0, 0, 0, 0) + { + } /** Create a tile view that refers to the whole tile. * * @param[in] tile The tile object. @@ -179,6 +183,22 @@ public: col_end() == _tile->info().width(); } + /** Set the rectangular active area. + * + * @param[in] area The rectangular active area. + */ + TileView &area(const TileArea &area) + { + _area = area; + return *this; + } + + /** Get the tile area */ + TileArea area() const + { + return _area; + } + private: const T *_tile; TileArea _area; diff --git a/compute_kernel_writer/src/cl/CLHelpers.cpp b/compute_kernel_writer/src/cl/CLHelpers.cpp index 8e4a932764..252c5cdfcb 100644 --- a/compute_kernel_writer/src/cl/CLHelpers.cpp +++ b/compute_kernel_writer/src/cl/CLHelpers.cpp @@ -193,6 +193,9 @@ std::tuple<bool, std::string> cl_get_unary_op(UnaryOp op) case UnaryOp::Round: return {true, "round"}; + case UnaryOp::Floor: + return {true, "floor"}; + default: CKW_THROW_MSG("Unsupported unary operation!"); } diff --git a/compute_kernel_writer/src/cl/CLKernelWriter.cpp b/compute_kernel_writer/src/cl/CLKernelWriter.cpp index 62e6853a7a..8b4876b6a7 100644 --- a/compute_kernel_writer/src/cl/CLKernelWriter.cpp +++ b/compute_kernel_writer/src/cl/CLKernelWriter.cpp @@ -47,6 +47,25 @@ #include <tuple> #include <vector> +namespace +{ +std::string generate_cl_extensions() +{ + std::string ext = R"( +#if defined(cl_khr_fp16) +#pragma OPENCL EXTENSION cl_khr_fp16 : enable +#endif // defined(cl_khr_fp16) + +#if defined(cl_arm_printf) +#pragma OPENCL EXTENSION cl_arm_printf : enable +#endif // defined(cl_arm_printf); + +#define inf (INFINITY) +)"; + return ext; +} +} // namespace + namespace ckw { @@ -56,7 +75,7 @@ CLKernelWriter::~CLKernelWriter() = default; std::unique_ptr<Kernel> CLKernelWriter::emit_kernel(const std::string &name) { std::string code; - + code += generate_cl_extensions(); code += "__kernel void "; code += name; code += "\n(\n"; @@ -154,21 +173,31 @@ void CLKernelWriter::op_cast(const TileOperand &dst, const TileOperand &src, Con const auto dst_type_str = cl_get_variable_datatype_as_string(dst_type, dst_w); const std::string sat = policy == ConvertPolicy::Saturate ? "_sat" : ""; + CKW_ASSERT_IF(policy == ConvertPolicy::Saturate, !is_data_type_float(dst_type)); const auto broadcast_x = dst_w != 1 && src_w == 1; const std::string prefix = broadcast_x ? "(" + dst_type_str + ")" : ""; - CKW_ASSERT_MSG(src_view.data_type() != dst_view.data_type(), "Source and destination type must be different."); CKW_ASSERT_MSG(src_view.height() == dst_h || src_view.height() == 1, "Tile height must match or source is broadcasting in y dimension."); CKW_ASSERT_MSG(src_w == dst_w || src_w == 1, "Tile width must match or source is broadcasting in x dimension."); // Broadcasting on y dimension is automatic (see CLTile::vector). - for (int32_t y = 0; y < dst_h; ++y) + if (src_view.data_type() == dst_view.data_type()) + { + for (int32_t y = 0; y < dst_h; ++y) + { + append_code(dst_view.vector(y).str, " = ", src_view.vector(y).str, ";\n"); + } + } + else { - append_code(dst_view.vector(y).str, " = ", prefix, "convert_", convert_type_str, sat, "(", - src_view.vector(y).str, ");\n"); + for (int32_t y = 0; y < dst_h; ++y) + { + append_code(dst_view.vector(y).str, " = ", prefix, "convert_", convert_type_str, sat, "(", + src_view.vector(y).str, ");\n"); + } } } @@ -219,18 +248,12 @@ void CLKernelWriter::op_binary(const TileOperand &dst, BinaryOp op, const TileOp CKW_ASSERT_MSG(lhs_view.data_type() == rhs_view.data_type(), "LHS and RHS type must match."); - CKW_ASSERT_MSG(lhs_view.height() == dst_h || lhs_view.height() == 1, - "LHS tile height must match or source is broadcasting in y dimension."); - CKW_ASSERT_MSG(rhs_view.height() == dst_h || rhs_view.height() == 1, - "RHS tile height must match or source is broadcasting in y dimension."); - - CKW_ASSERT_MSG(lhs_w == dst_w || lhs_w == 1, - "LHS tile width must match destination or LHS is broadcasting in x dimension."); - CKW_ASSERT_MSG(rhs_w == dst_w || rhs_w == 1, - "RHS tile width must match destination or RHS is broadcasting in x dimension."); - if (op == BinaryOp::MatMul_Nt_T) { + CKW_ASSERT_MSG(lhs_view.height() == dst_h, "LHS tile height must match the DST tile height"); + CKW_ASSERT_MSG(rhs_view.height() == dst_w, "RHS tile height must match the DST tile width"); + CKW_ASSERT_MSG(lhs_view.width() == rhs_view.width(), "LHS tile width must match the LHS tile width"); + CKW_ASSERT(is_data_type_float(data_type)); for (int32_t y = 0; y < dst_h; ++y) @@ -239,14 +262,24 @@ void CLKernelWriter::op_binary(const TileOperand &dst, BinaryOp op, const TileOp { for (int32_t k = 0; k < lhs_w; ++k) { - append_code(dst_view.scalar(x, y).str, " = fma(", lhs_view.scalar(k, y).str, ", ", - rhs_view.scalar(k, x).str, ", ", dst_view.scalar(x, y).str, ");\n"); + append_code(dst_view.scalar(y, x).str, " = fma(", lhs_view.scalar(y, k).str, ", ", + rhs_view.scalar(x, k).str, ", ", dst_view.scalar(y, x).str, ");\n"); } } } } else { + CKW_ASSERT_MSG(lhs_view.height() == dst_h || lhs_view.height() == 1, + "LHS tile height must match or source is broadcasting in y dimension."); + CKW_ASSERT_MSG(rhs_view.height() == dst_h || rhs_view.height() == 1, + "RHS tile height must match or source is broadcasting in y dimension."); + + CKW_ASSERT_MSG(lhs_w == dst_w || lhs_w == 1, + "LHS tile width must match destination or LHS is broadcasting in x dimension."); + CKW_ASSERT_MSG(rhs_w == dst_w || rhs_w == 1, + "RHS tile width must match destination or RHS is broadcasting in x dimension."); + const auto op_info = cl_get_binary_op(op, data_type); const auto op_is_func = std::get<0>(op_info); const auto &op_name = std::get<1>(op_info); @@ -746,36 +779,35 @@ void CLKernelWriter::op_load_store(MemoryOperation op, ITensor &tensor = get_tensor(tensor_op); + const auto tile = to_cl_tile_view(tile_op); + const auto x_tile = to_cl_tile_view(x).full_tile(); + const auto y_tile = to_cl_tile_view(y).full_tile(); + const auto z_tile = to_cl_tile_view(z).full_tile(); + const auto batch_tile = to_cl_tile_view(batch).full_tile(); + std::unique_ptr<ICLMemoryOpHelper> helper; switch (sampler.storage()) { case TensorStorageType::BufferUint8Ptr: - helper = std::make_unique<CLMemoryOpBufferHelper>(this, &tensor, &sampler, op); + helper = std::make_unique<CLMemoryOpBufferHelper>(this, &tensor, &sampler, op, tile); break; case TensorStorageType::Texture2dReadOnly: case TensorStorageType::Texture2dWriteOnly: - helper = std::make_unique<CLMemoryOpImage2dHelper>(this, &tensor, &sampler, op); + helper = std::make_unique<CLMemoryOpImage2dHelper>(this, &tensor, &sampler, op, tile); break; default: CKW_THROW_MSG("Unsupported tensor storage"); } - // Load/store op doesn't support sub-tile access. - const auto tile = to_cl_tile_view(tile_op).full_tile(); - const auto x_tile = to_cl_tile_view(x).full_tile(); - const auto y_tile = to_cl_tile_view(y).full_tile(); - const auto z_tile = to_cl_tile_view(z).full_tile(); - const auto batch_tile = to_cl_tile_view(batch).full_tile(); - CKW_ASSERT(x_tile.is_scalar()); CKW_ASSERT(z_tile.is_scalar()); CKW_ASSERT_IF(indirect_buffer, y_tile.info().width() == 1); CKW_ASSERT_IF(!indirect_buffer, y_tile.is_scalar()); CKW_ASSERT(batch_tile.is_scalar()); - helper->initialize(&tile, &x_tile, &z_tile, &batch_tile); + helper->initialize(&x_tile, &z_tile, &batch_tile); - for (int row = 0; row < tile.info().height(); ++row) + for (int row = 0; row < tile.height(); ++row) { if (!indirect_buffer) { diff --git a/compute_kernel_writer/src/cl/CLTensorArgument.h b/compute_kernel_writer/src/cl/CLTensorArgument.h index 35df51422e..a79cf340bb 100644 --- a/compute_kernel_writer/src/cl/CLTensorArgument.h +++ b/compute_kernel_writer/src/cl/CLTensorArgument.h @@ -27,6 +27,7 @@ #include "ckw/types/TensorComponentType.h" #include "ckw/types/TensorStorageType.h" +#include "src/cl/CLTensorComponent.h" #include "src/ITensor.h" #include <memory> @@ -39,8 +40,6 @@ namespace ckw class TensorInfo; class ITensorComponent; -class CLTensorComponent; -class CLTensorStorage; /** OpenCL specific tensor argument * Internally, the object keeps track of the components and storages used to minimize the number diff --git a/compute_kernel_writer/src/cl/helpers/CLMemoryOpBufferHelper.cpp b/compute_kernel_writer/src/cl/helpers/CLMemoryOpBufferHelper.cpp index a98ebed8fa..7d16f35fbe 100644 --- a/compute_kernel_writer/src/cl/helpers/CLMemoryOpBufferHelper.cpp +++ b/compute_kernel_writer/src/cl/helpers/CLMemoryOpBufferHelper.cpp @@ -34,15 +34,16 @@ #include "src/cl/CLTile.h" #include "src/ITensor.h" #include "src/Tensor3dMapper.h" +#include "src/TileView.h" namespace ckw { -bool CLMemoryOpBufferHelper::validate(const CLKernelWriter *writer, - const ITensor *tensor, - const TensorSampler *sampler, - const Tensor3dMapper *mapper, - MemoryOperation op, - const CLTile *dst) +bool CLMemoryOpBufferHelper::validate(const CLKernelWriter *writer, + const ITensor *tensor, + const TensorSampler *sampler, + const Tensor3dMapper *mapper, + MemoryOperation op, + const TileView<CLTile> &dst) { CKW_UNUSED(writer, tensor, mapper, op, dst); @@ -100,17 +101,14 @@ bool CLMemoryOpBufferHelper::validate(const CLKernelWriter *writer, * The outermost block is x, then z and then y. This is why, if/else's covering for y are initialized * at each row write. In some addressing modes, such as None, no if/else conditions are written. */ -void CLMemoryOpBufferHelper::initialize(const CLTile *dst, const CLTile *x, const CLTile *z, const CLTile *b) +void CLMemoryOpBufferHelper::initialize(const CLTile *x, const CLTile *z, const CLTile *b) { - _dst = dst; - CKW_ASSERT(validate(_writer, _tensor, _sampler, _mapper.get(), _op, _dst)); - _ls_width_full = dst->info().width(); - _coord_x = x->scalar(0, 0).str; - _coord_z = z->scalar(0, 0).str; - _coord_b = b->scalar(0, 0).str; - _coord_orig_z = _coord_z; + _coord_x = x->scalar(0, 0).str; + _coord_z = z->scalar(0, 0).str; + _coord_b = b->scalar(0, 0).str; + _coord_orig_z = _coord_z; out_of_bound_initialize_x(_coord_x); out_of_bound_initialize_z(_coord_z); @@ -121,7 +119,7 @@ void CLMemoryOpBufferHelper::write_row(int32_t row_id, const std::string &coord_ // The only check required is on Y. out_of_bound_initialize_y(coord_y); - const std::string dst = _dst->vector(row_id).str; + const std::string dst = _dst.vector(row_id).str; const std::string address = to_buffer_address(_coord_x, coord_y, _coord_z, _coord_b); const std::string ls_buf = to_statement(_op, _ls_width_full, dst, address); @@ -133,10 +131,17 @@ void CLMemoryOpBufferHelper::write_row(int32_t row_id, const std::string &coord_ // The left over load/store will be written in the finalize stage if (_ls_width_part.size() != 0) { - int32_t col_start = 0; + int32_t col_start = 0; + const TileArea original_area = _dst.area(); + for (int32_t partial_width : _ls_width_part) { - const std::string dst = _dst->vector(row_id, col_start, partial_width).str; + // Set the active area + const TileArea area(original_area.row_start(), original_area.row_end(), col_start, + col_start + partial_width); + _dst.area(area); + + const std::string dst = _dst.vector(row_id).str; const std::string coord_x = _coord_x + " + " + std::to_string(col_start); const std::string address = to_buffer_address(coord_x, coord_y, _coord_z, _coord_b); const std::string statement = to_statement(_op, partial_width, dst, address); @@ -144,6 +149,8 @@ void CLMemoryOpBufferHelper::write_row(int32_t row_id, const std::string &coord_ col_start += partial_width; } + // Restore the original area + _dst.area(original_area); } } @@ -304,7 +311,7 @@ std::string CLMemoryOpBufferHelper::to_buffer_address(const std::string &x, CKW_ASSERT(tensor_storage == TensorStorageType::BufferUint8Ptr); const std::string ptr_buf = _tensor->storage(tensor_storage).val; - const std::string dst_type = cl_data_type_rounded_up_to_valid_vector_width(_dst->info().data_type(), 1); + const std::string dst_type = cl_data_type_rounded_up_to_valid_vector_width(_dst.data_type(), 1); std::string address; address += "(__global "; diff --git a/compute_kernel_writer/src/cl/helpers/CLMemoryOpBufferHelper.h b/compute_kernel_writer/src/cl/helpers/CLMemoryOpBufferHelper.h index 4e1a842fe1..a6b3272f32 100644 --- a/compute_kernel_writer/src/cl/helpers/CLMemoryOpBufferHelper.h +++ b/compute_kernel_writer/src/cl/helpers/CLMemoryOpBufferHelper.h @@ -22,8 +22,8 @@ * SOFTWARE. */ -#ifndef CKW_SRC_CL_CLMEMORYOPBUFFERHELPER_H -#define CKW_SRC_CL_CLMEMORYOPBUFFERHELPER_H +#ifndef CKW_SRC_CL_HELPERS_CLMEMORYOPBUFFERHELPER_H +#define CKW_SRC_CL_HELPERS_CLMEMORYOPBUFFERHELPER_H #include "src/cl/helpers/ICLMemoryOpHelper.h" @@ -37,6 +37,8 @@ namespace ckw // Forward Declarations class CLKernelWriter; class CLTile; +template <class CLTile> +class TileView; enum class MemoryOperation; /** Helper class to write memory operations (like load/store) in OpenCL @@ -45,19 +47,23 @@ class CLMemoryOpBufferHelper : public ICLMemoryOpHelper { public: /** Constructor similar to @ref ICLMemoryOpHelper() */ - CLMemoryOpBufferHelper(CLKernelWriter *writer, ITensor *tensor, TensorSampler *sampler, MemoryOperation op) - : ICLMemoryOpHelper(writer, tensor, sampler, op) + CLMemoryOpBufferHelper(CLKernelWriter *writer, + ITensor *tensor, + TensorSampler *sampler, + MemoryOperation op, + const TileView<CLTile> &dst) + : ICLMemoryOpHelper(writer, tensor, sampler, op, dst) { } /** Copy constructor */ - CLMemoryOpBufferHelper(const CLMemoryOpBufferHelper &) = default; + CLMemoryOpBufferHelper(const CLMemoryOpBufferHelper &) = delete; /** Assignment operator overload */ - CLMemoryOpBufferHelper &operator=(const CLMemoryOpBufferHelper &) = default; + CLMemoryOpBufferHelper &operator=(const CLMemoryOpBufferHelper &) = delete; // Methods overridden - void initialize(const CLTile *dst, const CLTile *x, const CLTile *z, const CLTile *b) override; + void initialize(const CLTile *x, const CLTile *z, const CLTile *b) override; void write_row(int32_t row_id, const std::string &coord_y) override; void finalize() override; @@ -78,12 +84,12 @@ private: std::vector<LeftoverDescriptor> _leftovers_x{}; std::string _coord_orig_z{}; - static bool validate(const CLKernelWriter *writer, - const ITensor *tensor, - const TensorSampler *sampler, - const Tensor3dMapper *mapper, - MemoryOperation op, - const CLTile *dst); + static bool validate(const CLKernelWriter *writer, + const ITensor *tensor, + const TensorSampler *sampler, + const Tensor3dMapper *mapper, + MemoryOperation op, + const TileView<CLTile> &dst); void out_of_bound_initialize_x(const std::string &coord); void out_of_bound_finalize_x(); @@ -99,4 +105,4 @@ private: }; } // namespace ckw -#endif /* CKW_SRC_CL_CLMEMORYOPBUFFERHELPER_H */ +#endif // CKW_SRC_CL_HELPERS_CLMEMORYOPBUFFERHELPER_H diff --git a/compute_kernel_writer/src/cl/helpers/CLMemoryOpImage2dHelper.cpp b/compute_kernel_writer/src/cl/helpers/CLMemoryOpImage2dHelper.cpp index b7d146bdee..f392cd89cc 100644 --- a/compute_kernel_writer/src/cl/helpers/CLMemoryOpImage2dHelper.cpp +++ b/compute_kernel_writer/src/cl/helpers/CLMemoryOpImage2dHelper.cpp @@ -33,18 +33,15 @@ #include "src/cl/CLTile.h" #include "src/ITensor.h" #include "src/Tensor3dMapper.h" +#include "src/TileView.h" namespace ckw { -void CLMemoryOpImage2dHelper::initialize(const CLTile *dst, const CLTile *x, const CLTile *z, const CLTile *b) +void CLMemoryOpImage2dHelper::initialize(const CLTile *x, const CLTile *z, const CLTile *b) { - CKW_ASSERT(validate(_writer, _tensor, _sampler, _mapper.get(), _op, dst)); - - _dst = dst; - _ls_width_full = dst->info().width(); - _coord_x = x->scalar(0, 0).str; - _coord_z = z->scalar(0, 0).str; - _coord_b = b->scalar(0, 0).str; + _coord_x = x->scalar(0, 0).str; + _coord_z = z->scalar(0, 0).str; + _coord_b = b->scalar(0, 0).str; } void CLMemoryOpImage2dHelper::write_row(int32_t row_id, const std::string &coord_y) @@ -52,7 +49,7 @@ void CLMemoryOpImage2dHelper::write_row(int32_t row_id, const std::string &coord // The only check required is on Y. out_of_bound_initialize_y(coord_y); - const std::string dst = _dst->vector(row_id).str; + const std::string dst = _dst.vector(row_id).str; const std::string sampler = to_ls_image2d_sampler(); const std::string coord = to_ls_image2d_address(_coord_x, coord_y, _coord_z, _coord_b); const std::string ls_buf = to_ls_image2d(_op, _ls_width_full, dst, sampler, coord); @@ -66,16 +63,16 @@ void CLMemoryOpImage2dHelper::finalize() { } -bool CLMemoryOpImage2dHelper::validate(const CLKernelWriter *writer, - const ITensor *tensor, - const TensorSampler *sampler, - const Tensor3dMapper *mapper, - MemoryOperation op, - const CLTile *dst) +bool CLMemoryOpImage2dHelper::validate(const CLKernelWriter *writer, + const ITensor *tensor, + const TensorSampler *sampler, + const Tensor3dMapper *mapper, + MemoryOperation op, + const TileView<CLTile> &dst) { CKW_UNUSED(writer, tensor, mapper); - if (dst->info().width() != 4) + if (dst.width() != 4) { return false; } @@ -95,7 +92,7 @@ bool CLMemoryOpImage2dHelper::validate(const CLKernelWriter *writer, { return false; } - if ((dst->info().data_type() != DataType::Fp32) && (dst->info().data_type() != DataType::Fp16)) + if ((dst.data_type() != DataType::Fp32) && (dst.data_type() != DataType::Fp16)) { return false; } @@ -143,10 +140,12 @@ std::string CLMemoryOpImage2dHelper::to_ls_image2d(MemoryOperation op, const std::string &address) const { CKW_UNUSED(vector_width); + CKW_ASSERT_MSG(_dst.data_type() == DataType::Fp32 || _dst.data_type() == DataType::Fp16, + "Image2d only supports floating-point data type"); const TensorStorageType tensor_storage = _sampler->storage(); const std::string image2d_obj = _tensor->storage(tensor_storage).val; - const std::string post_fix = _dst->info().data_type() == DataType::Fp32 ? "f" : "h"; + const std::string post_fix = _dst.data_type() == DataType::Fp32 ? "f" : "h"; switch (op) { diff --git a/compute_kernel_writer/src/cl/helpers/CLMemoryOpImage2dHelper.h b/compute_kernel_writer/src/cl/helpers/CLMemoryOpImage2dHelper.h index fd9b097a24..6c42c132d9 100644 --- a/compute_kernel_writer/src/cl/helpers/CLMemoryOpImage2dHelper.h +++ b/compute_kernel_writer/src/cl/helpers/CLMemoryOpImage2dHelper.h @@ -35,6 +35,8 @@ namespace ckw // Forward Declarations class CLKernelWriter; class CLTile; +template <class CLTile> +class TileView; enum class MemoryOperation; /** Helper class to write memory operations (like load/store) in OpenCL for Image2d type */ @@ -42,29 +44,33 @@ class CLMemoryOpImage2dHelper : public ICLMemoryOpHelper { public: /** Constructor similar to @ref ICLMemoryOpHelper() */ - CLMemoryOpImage2dHelper(CLKernelWriter *writer, ITensor *tensor, TensorSampler *sampler, MemoryOperation op) - : ICLMemoryOpHelper(writer, tensor, sampler, op) + CLMemoryOpImage2dHelper(CLKernelWriter *writer, + ITensor *tensor, + TensorSampler *sampler, + MemoryOperation op, + const TileView<CLTile> &dst) + : ICLMemoryOpHelper(writer, tensor, sampler, op, dst) { } /** Copy constructor */ - CLMemoryOpImage2dHelper(const CLMemoryOpImage2dHelper &) = default; + CLMemoryOpImage2dHelper(const CLMemoryOpImage2dHelper &) = delete; /** Assignment operator overload */ - CLMemoryOpImage2dHelper &operator=(const CLMemoryOpImage2dHelper &) = default; + CLMemoryOpImage2dHelper &operator=(const CLMemoryOpImage2dHelper &) = delete; // Methods overridden - void initialize(const CLTile *dst, const CLTile *x, const CLTile *z, const CLTile *b) override; + void initialize(const CLTile *x, const CLTile *z, const CLTile *b) override; void write_row(int32_t row_id, const std::string &coord_y) override; void finalize() override; private: - static bool validate(const CLKernelWriter *writer, - const ITensor *tensor, - const TensorSampler *sampler, - const Tensor3dMapper *mapper, - MemoryOperation op, - const CLTile *dst); + static bool validate(const CLKernelWriter *writer, + const ITensor *tensor, + const TensorSampler *sampler, + const Tensor3dMapper *mapper, + MemoryOperation op, + const TileView<CLTile> &dst); void out_of_bound_initialize_y(const std::string &coord); void out_of_bound_finalize_y(); diff --git a/compute_kernel_writer/src/cl/helpers/ICLMemoryOpHelper.h b/compute_kernel_writer/src/cl/helpers/ICLMemoryOpHelper.h index f46fee9750..a5b679ac03 100644 --- a/compute_kernel_writer/src/cl/helpers/ICLMemoryOpHelper.h +++ b/compute_kernel_writer/src/cl/helpers/ICLMemoryOpHelper.h @@ -28,6 +28,7 @@ #include "ckw/TensorSampler.h" #include "src/Tensor3dMapper.h" +#include "src/TileView.h" #include <cstdint> #include <memory> @@ -55,18 +56,24 @@ public: * @param[in] tensor @ref ckw::ITensor object to perform the memory operation on * @param[in] sampler @ref ckw::TensorSampler object that tells how to sample a tensor * @param[in] op The memory operation to be done (e.g. Load/Store) + * @param[in] dst The tile to perform the memory operation on */ - ICLMemoryOpHelper(CLKernelWriter *writer, ITensor *tensor, TensorSampler *sampler, MemoryOperation op) - : _writer(writer), _tensor(tensor), _sampler(sampler), _op(op) + ICLMemoryOpHelper(CLKernelWriter *writer, + ITensor *tensor, + TensorSampler *sampler, + MemoryOperation op, + const TileView<CLTile> &dst) + : _writer(writer), _tensor(tensor), _sampler(sampler), _op(op), _dst(dst) { - _mapper = std::make_unique<Tensor3dMapper>(tensor, sampler->format()); + _mapper = std::make_unique<Tensor3dMapper>(tensor, sampler->format()); + _ls_width_full = _dst.width(); } /** Copy constructor */ - ICLMemoryOpHelper(const ICLMemoryOpHelper &) = default; + ICLMemoryOpHelper(const ICLMemoryOpHelper &) = delete; /** Assignment operator overload */ - ICLMemoryOpHelper &operator=(const ICLMemoryOpHelper &) = default; + ICLMemoryOpHelper &operator=(const ICLMemoryOpHelper &) = delete; /** Destructor */ virtual ~ICLMemoryOpHelper() = default; @@ -75,12 +82,11 @@ public: * the batch offset as a tile object, and initializes the code inside * the writer object. * - * @param[in] dst tile object to perform the memory operation on * @param[in] x tile object that describes the x-coordinate of the tensor involved * @param[in] z tile object that describes the z-coordinate of the tensor involved * @param[in] b tile object that describes the batch offset of the tensor involved */ - virtual void initialize(const CLTile *dst, const CLTile *x, const CLTile *z, const CLTile *b) = 0; + virtual void initialize(const CLTile *x, const CLTile *z, const CLTile *b) = 0; /** Method that writes the actual code to the writer that performs the mentioned memory * operation on the tile initialized. It writes the code for a specific row given in the @@ -104,7 +110,7 @@ protected: TensorSampler *_sampler{nullptr}; MemoryOperation _op; std::unique_ptr<Tensor3dMapper> _mapper{nullptr}; - const CLTile *_dst{nullptr}; + TileView<CLTile> _dst{}; int32_t _ls_width_full{0}; std::string _coord_x{}; std::string _coord_z{}; @@ -112,4 +118,4 @@ protected: }; } // namespace ckw -#endif /* CKW_SRC_CL_HELPERS_ICLMEMORYOPHELPER_H */ +#endif // CKW_SRC_CL_HELPERS_ICLMEMORYOPHELPER_H diff --git a/compute_kernel_writer/src/types/ConstantData.cpp b/compute_kernel_writer/src/types/ConstantData.cpp index 67b1103860..6d15eab407 100644 --- a/compute_kernel_writer/src/types/ConstantData.cpp +++ b/compute_kernel_writer/src/types/ConstantData.cpp @@ -31,7 +31,7 @@ namespace ckw namespace { template <typename T> -inline typename std::enable_if<std::is_same<T, float>::value, std::string>::type to_str(T value) +typename std::enable_if<std::is_same<T, float>::value, std::string>::type to_str(T value) { std::stringstream ss; ss << std::scientific << std::setprecision(std::numeric_limits<T>::max_digits10) << value; @@ -39,14 +39,14 @@ inline typename std::enable_if<std::is_same<T, float>::value, std::string>::type } template <typename T> -inline typename std::enable_if<!std::is_same<T, float>::value && !std::is_same<T, bool>::value, std::string>::type +typename std::enable_if<!std::is_same<T, float>::value && !std::is_same<T, bool>::value, std::string>::type to_str(T value) { return std::to_string(value); } template <typename T> -inline typename std::enable_if<std::is_same<T, bool>::value, std::string>::type to_str(T value) +typename std::enable_if<std::is_same<T, bool>::value, std::string>::type to_str(T value) { return std::to_string((int)value); } @@ -72,6 +72,24 @@ ConstantData::ConstantData(std::initializer_list<std::initializer_list<T>> value } template <typename T> +ConstantData::ConstantData(const std::vector<std::vector<T>> &values, DataType data_type) : _data_type(data_type) +{ + CKW_ASSERT(validate<T>(data_type)); + CKW_ASSERT(values.size() > 0); + + for (auto value_arr : values) + { + // Each row must have the same number of elements + CKW_ASSERT(value_arr.size() == (*values.begin()).size()); + + StringVector vec; + std::transform(value_arr.begin(), value_arr.end(), std::back_inserter(vec), [](T val) { return to_str(val); }); + + _values.push_back(std::move(vec)); + } +} + +template <typename T> bool ConstantData::validate(DataType data_type) { switch (data_type) @@ -100,6 +118,10 @@ template ConstantData::ConstantData(std::initializer_list<std::initializer_list< template ConstantData::ConstantData(std::initializer_list<std::initializer_list<uint32_t>>, DataType); template ConstantData::ConstantData(std::initializer_list<std::initializer_list<bool>>, DataType); template ConstantData::ConstantData(std::initializer_list<std::initializer_list<float>>, DataType); +template ConstantData::ConstantData(const std::vector<std::vector<int32_t>> &, DataType); +template ConstantData::ConstantData(const std::vector<std::vector<uint32_t>> &, DataType); +template ConstantData::ConstantData(const std::vector<std::vector<bool>> &, DataType); +template ConstantData::ConstantData(const std::vector<std::vector<float>> &, DataType); template bool ConstantData::validate<int32_t>(DataType); template bool ConstantData::validate<uint32_t>(DataType); |