diff options
Diffstat (limited to 'compute_kernel_writer/src/cl/helpers')
5 files changed, 94 insertions, 70 deletions
diff --git a/compute_kernel_writer/src/cl/helpers/CLMemoryOpBufferHelper.cpp b/compute_kernel_writer/src/cl/helpers/CLMemoryOpBufferHelper.cpp index a98ebed8fa..7d16f35fbe 100644 --- a/compute_kernel_writer/src/cl/helpers/CLMemoryOpBufferHelper.cpp +++ b/compute_kernel_writer/src/cl/helpers/CLMemoryOpBufferHelper.cpp @@ -34,15 +34,16 @@ #include "src/cl/CLTile.h" #include "src/ITensor.h" #include "src/Tensor3dMapper.h" +#include "src/TileView.h" namespace ckw { -bool CLMemoryOpBufferHelper::validate(const CLKernelWriter *writer, - const ITensor *tensor, - const TensorSampler *sampler, - const Tensor3dMapper *mapper, - MemoryOperation op, - const CLTile *dst) +bool CLMemoryOpBufferHelper::validate(const CLKernelWriter *writer, + const ITensor *tensor, + const TensorSampler *sampler, + const Tensor3dMapper *mapper, + MemoryOperation op, + const TileView<CLTile> &dst) { CKW_UNUSED(writer, tensor, mapper, op, dst); @@ -100,17 +101,14 @@ bool CLMemoryOpBufferHelper::validate(const CLKernelWriter *writer, * The outermost block is x, then z and then y. This is why, if/else's covering for y are initialized * at each row write. In some addressing modes, such as None, no if/else conditions are written. */ -void CLMemoryOpBufferHelper::initialize(const CLTile *dst, const CLTile *x, const CLTile *z, const CLTile *b) +void CLMemoryOpBufferHelper::initialize(const CLTile *x, const CLTile *z, const CLTile *b) { - _dst = dst; - CKW_ASSERT(validate(_writer, _tensor, _sampler, _mapper.get(), _op, _dst)); - _ls_width_full = dst->info().width(); - _coord_x = x->scalar(0, 0).str; - _coord_z = z->scalar(0, 0).str; - _coord_b = b->scalar(0, 0).str; - _coord_orig_z = _coord_z; + _coord_x = x->scalar(0, 0).str; + _coord_z = z->scalar(0, 0).str; + _coord_b = b->scalar(0, 0).str; + _coord_orig_z = _coord_z; out_of_bound_initialize_x(_coord_x); out_of_bound_initialize_z(_coord_z); @@ -121,7 +119,7 @@ void CLMemoryOpBufferHelper::write_row(int32_t row_id, const std::string &coord_ // The only check required is on Y. out_of_bound_initialize_y(coord_y); - const std::string dst = _dst->vector(row_id).str; + const std::string dst = _dst.vector(row_id).str; const std::string address = to_buffer_address(_coord_x, coord_y, _coord_z, _coord_b); const std::string ls_buf = to_statement(_op, _ls_width_full, dst, address); @@ -133,10 +131,17 @@ void CLMemoryOpBufferHelper::write_row(int32_t row_id, const std::string &coord_ // The left over load/store will be written in the finalize stage if (_ls_width_part.size() != 0) { - int32_t col_start = 0; + int32_t col_start = 0; + const TileArea original_area = _dst.area(); + for (int32_t partial_width : _ls_width_part) { - const std::string dst = _dst->vector(row_id, col_start, partial_width).str; + // Set the active area + const TileArea area(original_area.row_start(), original_area.row_end(), col_start, + col_start + partial_width); + _dst.area(area); + + const std::string dst = _dst.vector(row_id).str; const std::string coord_x = _coord_x + " + " + std::to_string(col_start); const std::string address = to_buffer_address(coord_x, coord_y, _coord_z, _coord_b); const std::string statement = to_statement(_op, partial_width, dst, address); @@ -144,6 +149,8 @@ void CLMemoryOpBufferHelper::write_row(int32_t row_id, const std::string &coord_ col_start += partial_width; } + // Restore the original area + _dst.area(original_area); } } @@ -304,7 +311,7 @@ std::string CLMemoryOpBufferHelper::to_buffer_address(const std::string &x, CKW_ASSERT(tensor_storage == TensorStorageType::BufferUint8Ptr); const std::string ptr_buf = _tensor->storage(tensor_storage).val; - const std::string dst_type = cl_data_type_rounded_up_to_valid_vector_width(_dst->info().data_type(), 1); + const std::string dst_type = cl_data_type_rounded_up_to_valid_vector_width(_dst.data_type(), 1); std::string address; address += "(__global "; diff --git a/compute_kernel_writer/src/cl/helpers/CLMemoryOpBufferHelper.h b/compute_kernel_writer/src/cl/helpers/CLMemoryOpBufferHelper.h index 4e1a842fe1..a6b3272f32 100644 --- a/compute_kernel_writer/src/cl/helpers/CLMemoryOpBufferHelper.h +++ b/compute_kernel_writer/src/cl/helpers/CLMemoryOpBufferHelper.h @@ -22,8 +22,8 @@ * SOFTWARE. */ -#ifndef CKW_SRC_CL_CLMEMORYOPBUFFERHELPER_H -#define CKW_SRC_CL_CLMEMORYOPBUFFERHELPER_H +#ifndef CKW_SRC_CL_HELPERS_CLMEMORYOPBUFFERHELPER_H +#define CKW_SRC_CL_HELPERS_CLMEMORYOPBUFFERHELPER_H #include "src/cl/helpers/ICLMemoryOpHelper.h" @@ -37,6 +37,8 @@ namespace ckw // Forward Declarations class CLKernelWriter; class CLTile; +template <class CLTile> +class TileView; enum class MemoryOperation; /** Helper class to write memory operations (like load/store) in OpenCL @@ -45,19 +47,23 @@ class CLMemoryOpBufferHelper : public ICLMemoryOpHelper { public: /** Constructor similar to @ref ICLMemoryOpHelper() */ - CLMemoryOpBufferHelper(CLKernelWriter *writer, ITensor *tensor, TensorSampler *sampler, MemoryOperation op) - : ICLMemoryOpHelper(writer, tensor, sampler, op) + CLMemoryOpBufferHelper(CLKernelWriter *writer, + ITensor *tensor, + TensorSampler *sampler, + MemoryOperation op, + const TileView<CLTile> &dst) + : ICLMemoryOpHelper(writer, tensor, sampler, op, dst) { } /** Copy constructor */ - CLMemoryOpBufferHelper(const CLMemoryOpBufferHelper &) = default; + CLMemoryOpBufferHelper(const CLMemoryOpBufferHelper &) = delete; /** Assignment operator overload */ - CLMemoryOpBufferHelper &operator=(const CLMemoryOpBufferHelper &) = default; + CLMemoryOpBufferHelper &operator=(const CLMemoryOpBufferHelper &) = delete; // Methods overridden - void initialize(const CLTile *dst, const CLTile *x, const CLTile *z, const CLTile *b) override; + void initialize(const CLTile *x, const CLTile *z, const CLTile *b) override; void write_row(int32_t row_id, const std::string &coord_y) override; void finalize() override; @@ -78,12 +84,12 @@ private: std::vector<LeftoverDescriptor> _leftovers_x{}; std::string _coord_orig_z{}; - static bool validate(const CLKernelWriter *writer, - const ITensor *tensor, - const TensorSampler *sampler, - const Tensor3dMapper *mapper, - MemoryOperation op, - const CLTile *dst); + static bool validate(const CLKernelWriter *writer, + const ITensor *tensor, + const TensorSampler *sampler, + const Tensor3dMapper *mapper, + MemoryOperation op, + const TileView<CLTile> &dst); void out_of_bound_initialize_x(const std::string &coord); void out_of_bound_finalize_x(); @@ -99,4 +105,4 @@ private: }; } // namespace ckw -#endif /* CKW_SRC_CL_CLMEMORYOPBUFFERHELPER_H */ +#endif // CKW_SRC_CL_HELPERS_CLMEMORYOPBUFFERHELPER_H diff --git a/compute_kernel_writer/src/cl/helpers/CLMemoryOpImage2dHelper.cpp b/compute_kernel_writer/src/cl/helpers/CLMemoryOpImage2dHelper.cpp index b7d146bdee..f392cd89cc 100644 --- a/compute_kernel_writer/src/cl/helpers/CLMemoryOpImage2dHelper.cpp +++ b/compute_kernel_writer/src/cl/helpers/CLMemoryOpImage2dHelper.cpp @@ -33,18 +33,15 @@ #include "src/cl/CLTile.h" #include "src/ITensor.h" #include "src/Tensor3dMapper.h" +#include "src/TileView.h" namespace ckw { -void CLMemoryOpImage2dHelper::initialize(const CLTile *dst, const CLTile *x, const CLTile *z, const CLTile *b) +void CLMemoryOpImage2dHelper::initialize(const CLTile *x, const CLTile *z, const CLTile *b) { - CKW_ASSERT(validate(_writer, _tensor, _sampler, _mapper.get(), _op, dst)); - - _dst = dst; - _ls_width_full = dst->info().width(); - _coord_x = x->scalar(0, 0).str; - _coord_z = z->scalar(0, 0).str; - _coord_b = b->scalar(0, 0).str; + _coord_x = x->scalar(0, 0).str; + _coord_z = z->scalar(0, 0).str; + _coord_b = b->scalar(0, 0).str; } void CLMemoryOpImage2dHelper::write_row(int32_t row_id, const std::string &coord_y) @@ -52,7 +49,7 @@ void CLMemoryOpImage2dHelper::write_row(int32_t row_id, const std::string &coord // The only check required is on Y. out_of_bound_initialize_y(coord_y); - const std::string dst = _dst->vector(row_id).str; + const std::string dst = _dst.vector(row_id).str; const std::string sampler = to_ls_image2d_sampler(); const std::string coord = to_ls_image2d_address(_coord_x, coord_y, _coord_z, _coord_b); const std::string ls_buf = to_ls_image2d(_op, _ls_width_full, dst, sampler, coord); @@ -66,16 +63,16 @@ void CLMemoryOpImage2dHelper::finalize() { } -bool CLMemoryOpImage2dHelper::validate(const CLKernelWriter *writer, - const ITensor *tensor, - const TensorSampler *sampler, - const Tensor3dMapper *mapper, - MemoryOperation op, - const CLTile *dst) +bool CLMemoryOpImage2dHelper::validate(const CLKernelWriter *writer, + const ITensor *tensor, + const TensorSampler *sampler, + const Tensor3dMapper *mapper, + MemoryOperation op, + const TileView<CLTile> &dst) { CKW_UNUSED(writer, tensor, mapper); - if (dst->info().width() != 4) + if (dst.width() != 4) { return false; } @@ -95,7 +92,7 @@ bool CLMemoryOpImage2dHelper::validate(const CLKernelWriter *writer, { return false; } - if ((dst->info().data_type() != DataType::Fp32) && (dst->info().data_type() != DataType::Fp16)) + if ((dst.data_type() != DataType::Fp32) && (dst.data_type() != DataType::Fp16)) { return false; } @@ -143,10 +140,12 @@ std::string CLMemoryOpImage2dHelper::to_ls_image2d(MemoryOperation op, const std::string &address) const { CKW_UNUSED(vector_width); + CKW_ASSERT_MSG(_dst.data_type() == DataType::Fp32 || _dst.data_type() == DataType::Fp16, + "Image2d only supports floating-point data type"); const TensorStorageType tensor_storage = _sampler->storage(); const std::string image2d_obj = _tensor->storage(tensor_storage).val; - const std::string post_fix = _dst->info().data_type() == DataType::Fp32 ? "f" : "h"; + const std::string post_fix = _dst.data_type() == DataType::Fp32 ? "f" : "h"; switch (op) { diff --git a/compute_kernel_writer/src/cl/helpers/CLMemoryOpImage2dHelper.h b/compute_kernel_writer/src/cl/helpers/CLMemoryOpImage2dHelper.h index fd9b097a24..6c42c132d9 100644 --- a/compute_kernel_writer/src/cl/helpers/CLMemoryOpImage2dHelper.h +++ b/compute_kernel_writer/src/cl/helpers/CLMemoryOpImage2dHelper.h @@ -35,6 +35,8 @@ namespace ckw // Forward Declarations class CLKernelWriter; class CLTile; +template <class CLTile> +class TileView; enum class MemoryOperation; /** Helper class to write memory operations (like load/store) in OpenCL for Image2d type */ @@ -42,29 +44,33 @@ class CLMemoryOpImage2dHelper : public ICLMemoryOpHelper { public: /** Constructor similar to @ref ICLMemoryOpHelper() */ - CLMemoryOpImage2dHelper(CLKernelWriter *writer, ITensor *tensor, TensorSampler *sampler, MemoryOperation op) - : ICLMemoryOpHelper(writer, tensor, sampler, op) + CLMemoryOpImage2dHelper(CLKernelWriter *writer, + ITensor *tensor, + TensorSampler *sampler, + MemoryOperation op, + const TileView<CLTile> &dst) + : ICLMemoryOpHelper(writer, tensor, sampler, op, dst) { } /** Copy constructor */ - CLMemoryOpImage2dHelper(const CLMemoryOpImage2dHelper &) = default; + CLMemoryOpImage2dHelper(const CLMemoryOpImage2dHelper &) = delete; /** Assignment operator overload */ - CLMemoryOpImage2dHelper &operator=(const CLMemoryOpImage2dHelper &) = default; + CLMemoryOpImage2dHelper &operator=(const CLMemoryOpImage2dHelper &) = delete; // Methods overridden - void initialize(const CLTile *dst, const CLTile *x, const CLTile *z, const CLTile *b) override; + void initialize(const CLTile *x, const CLTile *z, const CLTile *b) override; void write_row(int32_t row_id, const std::string &coord_y) override; void finalize() override; private: - static bool validate(const CLKernelWriter *writer, - const ITensor *tensor, - const TensorSampler *sampler, - const Tensor3dMapper *mapper, - MemoryOperation op, - const CLTile *dst); + static bool validate(const CLKernelWriter *writer, + const ITensor *tensor, + const TensorSampler *sampler, + const Tensor3dMapper *mapper, + MemoryOperation op, + const TileView<CLTile> &dst); void out_of_bound_initialize_y(const std::string &coord); void out_of_bound_finalize_y(); diff --git a/compute_kernel_writer/src/cl/helpers/ICLMemoryOpHelper.h b/compute_kernel_writer/src/cl/helpers/ICLMemoryOpHelper.h index f46fee9750..a5b679ac03 100644 --- a/compute_kernel_writer/src/cl/helpers/ICLMemoryOpHelper.h +++ b/compute_kernel_writer/src/cl/helpers/ICLMemoryOpHelper.h @@ -28,6 +28,7 @@ #include "ckw/TensorSampler.h" #include "src/Tensor3dMapper.h" +#include "src/TileView.h" #include <cstdint> #include <memory> @@ -55,18 +56,24 @@ public: * @param[in] tensor @ref ckw::ITensor object to perform the memory operation on * @param[in] sampler @ref ckw::TensorSampler object that tells how to sample a tensor * @param[in] op The memory operation to be done (e.g. Load/Store) + * @param[in] dst The tile to perform the memory operation on */ - ICLMemoryOpHelper(CLKernelWriter *writer, ITensor *tensor, TensorSampler *sampler, MemoryOperation op) - : _writer(writer), _tensor(tensor), _sampler(sampler), _op(op) + ICLMemoryOpHelper(CLKernelWriter *writer, + ITensor *tensor, + TensorSampler *sampler, + MemoryOperation op, + const TileView<CLTile> &dst) + : _writer(writer), _tensor(tensor), _sampler(sampler), _op(op), _dst(dst) { - _mapper = std::make_unique<Tensor3dMapper>(tensor, sampler->format()); + _mapper = std::make_unique<Tensor3dMapper>(tensor, sampler->format()); + _ls_width_full = _dst.width(); } /** Copy constructor */ - ICLMemoryOpHelper(const ICLMemoryOpHelper &) = default; + ICLMemoryOpHelper(const ICLMemoryOpHelper &) = delete; /** Assignment operator overload */ - ICLMemoryOpHelper &operator=(const ICLMemoryOpHelper &) = default; + ICLMemoryOpHelper &operator=(const ICLMemoryOpHelper &) = delete; /** Destructor */ virtual ~ICLMemoryOpHelper() = default; @@ -75,12 +82,11 @@ public: * the batch offset as a tile object, and initializes the code inside * the writer object. * - * @param[in] dst tile object to perform the memory operation on * @param[in] x tile object that describes the x-coordinate of the tensor involved * @param[in] z tile object that describes the z-coordinate of the tensor involved * @param[in] b tile object that describes the batch offset of the tensor involved */ - virtual void initialize(const CLTile *dst, const CLTile *x, const CLTile *z, const CLTile *b) = 0; + virtual void initialize(const CLTile *x, const CLTile *z, const CLTile *b) = 0; /** Method that writes the actual code to the writer that performs the mentioned memory * operation on the tile initialized. It writes the code for a specific row given in the @@ -104,7 +110,7 @@ protected: TensorSampler *_sampler{nullptr}; MemoryOperation _op; std::unique_ptr<Tensor3dMapper> _mapper{nullptr}; - const CLTile *_dst{nullptr}; + TileView<CLTile> _dst{}; int32_t _ls_width_full{0}; std::string _coord_x{}; std::string _coord_z{}; @@ -112,4 +118,4 @@ protected: }; } // namespace ckw -#endif /* CKW_SRC_CL_HELPERS_ICLMEMORYOPHELPER_H */ +#endif // CKW_SRC_CL_HELPERS_ICLMEMORYOPHELPER_H |