aboutsummaryrefslogtreecommitdiff
path: root/compute_kernel_writer/src/cl
diff options
context:
space:
mode:
Diffstat (limited to 'compute_kernel_writer/src/cl')
-rw-r--r--compute_kernel_writer/src/cl/CLHelpers.cpp43
-rw-r--r--compute_kernel_writer/src/cl/CLHelpers.h21
-rw-r--r--compute_kernel_writer/src/cl/CLKernelWriter.cpp146
-rw-r--r--compute_kernel_writer/src/cl/CLKernelWriter.h45
-rw-r--r--compute_kernel_writer/src/cl/CLTile.cpp4
5 files changed, 223 insertions, 36 deletions
diff --git a/compute_kernel_writer/src/cl/CLHelpers.cpp b/compute_kernel_writer/src/cl/CLHelpers.cpp
index 08108e383f..f62e1c28e6 100644
--- a/compute_kernel_writer/src/cl/CLHelpers.cpp
+++ b/compute_kernel_writer/src/cl/CLHelpers.cpp
@@ -21,10 +21,13 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
+
#include "src/cl/CLHelpers.h"
+
#include "ckw/Error.h"
#include "ckw/types/DataType.h"
#include "ckw/types/TensorStorageType.h"
+#include "src/types/DataTypeHelpers.h"
namespace ckw
{
@@ -142,10 +145,46 @@ std::string cl_get_variable_storagetype_as_string(TensorStorageType storage)
return res;
}
+std::tuple<bool, std::string> cl_get_unary_op(UnaryOp op)
+{
+ switch(op)
+ {
+ case UnaryOp::LogicalNot:
+ return { false, "!" };
+
+ case UnaryOp::BitwiseNot:
+ return { false, "~" };
+
+ case UnaryOp::Exp:
+ return { true, "exp" };
+
+ case UnaryOp::Tanh:
+ return { true, "tanh" };
+
+ case UnaryOp::Sqrt:
+ return { true, "sqrt" };
+
+ case UnaryOp::Erf:
+ return { true, "erf" };
+
+ case UnaryOp::Fabs:
+ return { true, "fabs" };
+
+ case UnaryOp::Log:
+ return { true, "log" };
+
+ case UnaryOp::Round:
+ return { true, "round" };
+
+ default:
+ CKW_THROW_MSG("Unsupported unary operation!");
+ }
+}
+
std::string cl_data_type_rounded_up_to_valid_vector_width(DataType dt, int32_t width)
{
- std::string data_type;
- const int32_t w = cl_round_up_to_nearest_valid_vector_width(width);
+ std::string data_type;
+ const int32_t w = cl_round_up_to_nearest_valid_vector_width(width);
data_type += cl_get_variable_datatype_as_string(dt, 1);
if(w != 1)
{
diff --git a/compute_kernel_writer/src/cl/CLHelpers.h b/compute_kernel_writer/src/cl/CLHelpers.h
index 669424088e..3c1a7724e2 100644
--- a/compute_kernel_writer/src/cl/CLHelpers.h
+++ b/compute_kernel_writer/src/cl/CLHelpers.h
@@ -24,8 +24,11 @@
#ifndef CKW_SRC_CL_CLHELPERS_H
#define CKW_SRC_CL_CLHELPERS_H
+#include "ckw/types/Operators.h"
+
#include <cstdint>
#include <string>
+#include <tuple>
#include <vector>
/** OpenCL specific helper functions */
@@ -52,6 +55,24 @@ bool cl_validate_vector_length(int32_t len);
*/
std::string cl_get_variable_datatype_as_string(DataType dt, int32_t len);
+/** Return the assignment operator in OpenCL language.
+ *
+ * @param[in] op The assignment operator.
+ *
+ * @return The operator in OpenCL language as a string.
+ */
+std::string cl_get_assignment_op_as_string(AssignmentOp op);
+
+/** Return the information about the unary operation.
+ *
+ * The result contains:
+ * - is_func: true if it's a function and false if it's an unary operator in OpenCL language.
+ * - str: the function name or the operator in OpenCL language.
+ *
+ * @param[in] op The unary operator.
+ */
+std::tuple<bool, std::string> cl_get_unary_op(UnaryOp op);
+
/** Helper function to return the OpenCL vector size that accommodate the the desired width
*
* @param[in] width The desired width
diff --git a/compute_kernel_writer/src/cl/CLKernelWriter.cpp b/compute_kernel_writer/src/cl/CLKernelWriter.cpp
index b4df5c5f50..33d16da926 100644
--- a/compute_kernel_writer/src/cl/CLKernelWriter.cpp
+++ b/compute_kernel_writer/src/cl/CLKernelWriter.cpp
@@ -23,6 +23,7 @@
*/
#include "src/cl/CLKernelWriter.h"
+
#include "ckw/Error.h"
#include "ckw/Kernel.h"
#include "ckw/TensorSampler.h"
@@ -37,6 +38,9 @@
#include "src/cl/helpers/CLMemoryOpImage2dHelper.h"
#include "src/cl/helpers/ICLMemoryOpHelper.h"
+#include "src/types/DataTypeHelpers.h"
+
+#include <algorithm>
#include <cstdint>
namespace ckw
@@ -106,7 +110,95 @@ std::unique_ptr<Kernel> CLKernelWriter::emit_kernel(const std::string &name)
return std::make_unique<Kernel>(TargetLanguage::OpenCL, arguments, code);
}
-void CLKernelWriter::comment(const std::string &text)
+void CLKernelWriter::op_assign(const TileOperand &dst, const TileOperand &src)
+{
+ const auto &dst_tile = to_cl_tile(dst);
+ const auto &src_tile = to_cl_tile(src);
+
+ const auto dst_w = dst_tile.info().width();
+ const auto dst_h = dst_tile.info().height();
+ const auto src_w = src_tile.info().width();
+
+ const auto data_type_str = cl_get_variable_datatype_as_string(dst_tile.info().data_type(), dst_w);
+
+ const auto broadcast_src_x = dst_w != 1 && src_w == 1;
+ const std::string src_prefix = broadcast_src_x ? "(" + data_type_str + ")" : "";
+
+ CKW_ASSERT_MSG(src_tile.info().data_type() == dst_tile.info().data_type(), "Source and destination type must match.");
+ CKW_ASSERT_MSG(src_tile.info().height() == dst_h || src_tile.info().height() == 1, "Tile height must match or source is broadcasting in y dimension.");
+ CKW_ASSERT_MSG(src_w == dst_w || src_w == 1, "Tile width must match or source is broadcasting in x dimension.");
+
+ // Broadcasting on y dimension is automatic (see CLTile::vector).
+ for(int32_t y = 0; y < dst_h; ++y)
+ {
+ append_code(dst_tile.vector(y).str, " = ", src_prefix, src_tile.vector(y).str, ";\n");
+ }
+}
+
+void CLKernelWriter::op_cast(const TileOperand &dst, const TileOperand &src, ConvertPolicy policy)
+{
+ const auto &dst_tile = to_cl_tile(dst);
+ const auto &src_tile = to_cl_tile(src);
+
+ const auto dst_w = dst_tile.info().width();
+ const auto dst_h = dst_tile.info().height();
+ const auto src_w = src_tile.info().width();
+
+ const auto dst_type = dst_tile.info().data_type();
+
+ const auto convert_type_str = cl_get_variable_datatype_as_string(dst_type, src_w);
+ const auto dst_type_str = cl_get_variable_datatype_as_string(dst_type, dst_w);
+
+ const std::string sat = policy == ConvertPolicy::Saturate ? "_sat" : "";
+ CKW_ASSERT_IF(policy == ConvertPolicy::Saturate, !is_data_type_float(dst_type));
+
+ const auto broadcast_x = dst_w != 1 && src_w == 1;
+ const std::string prefix = broadcast_x ? "(" + dst_type_str + ")" : "";
+
+ CKW_ASSERT_MSG(src_tile.info().data_type() != dst_tile.info().data_type(), "Source and destination type must be different.");
+ CKW_ASSERT_MSG(src_tile.info().height() == dst_h || src_tile.info().height() == 1, "Tile height must match or source is broadcasting in y dimension.");
+ CKW_ASSERT_MSG(src_w == dst_w || src_w == 1, "Tile width must match or source is broadcasting in x dimension.");
+
+ // Broadcasting on y dimension is automatic (see CLTile::vector).
+ for(int32_t y = 0; y < dst_h; ++y)
+ {
+ append_code(dst_tile.vector(y).str, " = ", prefix, "convert_", convert_type_str, sat, "(", src_tile.vector(y).str, ");\n");
+ }
+}
+
+void CLKernelWriter::op_unary(const TileOperand &dst, const TileOperand &src, UnaryOp op)
+{
+ const auto &dst_tile = to_cl_tile(dst);
+ const auto &src_tile = to_cl_tile(src);
+
+ const auto dst_w = dst_tile.info().width();
+ const auto dst_h = dst_tile.info().height();
+ const auto src_w = src_tile.info().width();
+
+ const auto data_type_str = cl_get_variable_datatype_as_string(dst_tile.info().data_type(), dst_w);
+ const auto broadcast_src_x = dst_w != 1 && src_w == 1;
+
+ const std::string src_prefix = broadcast_src_x ? "(" + data_type_str + ")" : "";
+
+ const auto op_info = cl_get_unary_op(op);
+ const auto op_is_func = std::get<0>(op_info);
+ const auto &op_name = std::get<1>(op_info);
+ const auto op_prefix = op_is_func ? op_name + "(" : op_name;
+ const auto op_suffix = op_is_func ? ")" : "";
+
+ CKW_ASSERT_MSG(src_tile.info().data_type() == dst_tile.info().data_type(), "Source and destination type must match.");
+ CKW_ASSERT_MSG(!is_data_type_float(src_tile.info().data_type()), "Logical and bitwise not only work with integer.");
+ CKW_ASSERT_MSG(src_tile.info().height() == dst_h || src_tile.info().height() == 1, "Tile height must match or source is broadcasting in y dimension.");
+ CKW_ASSERT_MSG(src_w == dst_w || src_w == 1, "Tile width must match or source is broadcasting in x dimension.");
+
+ // Broadcasting on y dimension is automatic (see CLTile::vector).
+ for(int32_t y = 0; y < dst_h; ++y)
+ {
+ append_code(dst_tile.vector(y).str, " = ", src_prefix, op_prefix, src_tile.vector(y).str, op_suffix, ";\n");
+ }
+}
+
+void CLKernelWriter::op_comment(const std::string &text)
{
#ifdef COMPUTE_KERNEL_WRITER_DEBUG_ENABLED
@@ -147,13 +239,24 @@ TileOperand CLKernelWriter::declare_tile(const std::string &name, const TileInfo
const int32_t width = tile_info.width();
const DataType data_type = tile_info.data_type();
+ CKW_ASSERT_MSG(
+ std::find_if(
+ _tiles.begin(), _tiles.end(),
+ [=](const std::unique_ptr<CLTile> &e)
+ {
+ return e->name() == fullname;
+ })
+ == _tiles.end(),
+ "Tile name must be unique.");
+
+ auto tile = std::make_unique<CLTile>(fullname, tile_info);
+
for(int32_t row = 0; row < height; ++row)
{
const std::string cl_type = cl_get_variable_datatype_as_string(data_type, width);
- append_code(cl_type, " ", fullname, std::to_string(row), ";\n");
+ append_code(cl_type, " ", tile->vector(row).str, ";\n");
}
- auto tile = std::make_unique<CLTile>(name, tile_info);
const auto operand = create_tile_operand(*tile);
_tiles.insert(std::move(tile));
@@ -169,10 +272,12 @@ void CLKernelWriter::op_write_raw_code(const std::string &raw_code)
const CLTile &CLKernelWriter::to_cl_tile(const TileOperand &operand)
{
const auto &tile = get_tile(operand);
+
#ifdef COMPUTE_KERNEL_WRITER_ASSERTS_ENABLED
// Check if the tile is a CLTile created by this kernel writer.
{
bool found = false;
+
for(const auto &t : _tiles)
{
if(&tile == t.get())
@@ -181,11 +286,13 @@ const CLTile &CLKernelWriter::to_cl_tile(const TileOperand &operand)
break;
}
}
+
if(!found)
{
for(const auto &t : _tensors)
{
const auto components = t->components();
+
for(const auto component : components)
{
if(&tile == &component->tile())
@@ -194,16 +301,23 @@ const CLTile &CLKernelWriter::to_cl_tile(const TileOperand &operand)
break;
}
}
+
+ if(found)
+ {
+ break;
+ }
}
}
+
CKW_ASSERT_MSG(found, "The tile is not found!");
}
#endif // COMPUTE_KERNEL_WRITER_ASSERTS_ENABLED
+
return static_cast<const CLTile &>(tile);
}
void CLKernelWriter::op_load(const TileOperand &tile_op, const TensorOperand &tensor_op, TensorSampler &sampler,
- const TileOperand &x, const TileOperand &y, const TileOperand &z, const TileOperand &batch)
+ const TileOperand &x, const TileOperand &y, const TileOperand &z, const TileOperand &batch)
{
const CLTile dilation_x("1", DataType::Int32);
const CLTile dilation_y("1", DataType::Int32);
@@ -212,8 +326,8 @@ void CLKernelWriter::op_load(const TileOperand &tile_op, const TensorOperand &te
}
void CLKernelWriter::op_load_dilated(const TileOperand &tile_op, const TensorOperand &tensor_op, TensorSampler &sampler,
- const TileOperand &x, const TileOperand &y, const TileOperand &z, const TileOperand &batch,
- const TileOperand &dilation_x, const TileOperand &dilation_y)
+ const TileOperand &x, const TileOperand &y, const TileOperand &z, const TileOperand &batch,
+ const TileOperand &dilation_x, const TileOperand &dilation_y)
{
const auto &dil_x_tile = to_cl_tile(dilation_x);
const auto &dil_y_tile = to_cl_tile(dilation_y);
@@ -222,7 +336,7 @@ void CLKernelWriter::op_load_dilated(const TileOperand &tile_op, const TensorOpe
}
void CLKernelWriter::op_store(const TensorOperand &tensor_op, const TileOperand &tile_op, TensorSampler &sampler,
- const TileOperand &x, const TileOperand &y, const TileOperand &z, const TileOperand &batch)
+ const TileOperand &x, const TileOperand &y, const TileOperand &z, const TileOperand &batch)
{
const CLTile dilation_x("1", DataType::Int32);
const CLTile dilation_y("1", DataType::Int32);
@@ -231,8 +345,8 @@ void CLKernelWriter::op_store(const TensorOperand &tensor_op, const TileOperand
}
void CLKernelWriter::op_store_dilated(const TensorOperand &tensor_op, const TileOperand &tile_op, TensorSampler &sampler,
- const TileOperand &x, const TileOperand &y, const TileOperand &z, const TileOperand &batch,
- const TileOperand &dilation_x, const TileOperand &dilation_y)
+ const TileOperand &x, const TileOperand &y, const TileOperand &z, const TileOperand &batch,
+ const TileOperand &dilation_x, const TileOperand &dilation_y)
{
const auto &dil_x_tile = to_cl_tile(dilation_x);
const auto &dil_y_tile = to_cl_tile(dilation_y);
@@ -241,11 +355,11 @@ void CLKernelWriter::op_store_dilated(const TensorOperand &tensor_op, const Tile
}
void CLKernelWriter::op_load_store(MemoryOperation op, const TileOperand &tile_op, const TensorOperand &tensor_op, TensorSampler &sampler,
- const TileOperand &x, const TileOperand &y, const TileOperand &z, const TileOperand &batch,
- const CLTile &dilation_x, const CLTile &dilation_y)
+ const TileOperand &x, const TileOperand &y, const TileOperand &z, const TileOperand &batch,
+ const CLTile &dilation_x, const CLTile &dilation_y)
{
CKW_UNUSED(dilation_x);
- CKW_ASSERT(dilation_x.scalar(0,0).str == "1"); // Dilation in x dimension is not implemented yet
+ CKW_ASSERT(dilation_x.scalar(0, 0).str == "1"); // Dilation in x dimension is not implemented yet
ITensor &tensor = get_tensor(tensor_op);
@@ -263,10 +377,10 @@ void CLKernelWriter::op_load_store(MemoryOperation op, const TileOperand &tile_o
CKW_THROW_MSG("Unsupported tensor storage");
}
- const auto &tile = to_cl_tile(tile_op);
- const auto &x_tile = to_cl_tile(x);
- const auto &y_tile = to_cl_tile(y);
- const auto &z_tile = to_cl_tile(z);
+ const auto &tile = to_cl_tile(tile_op);
+ const auto &x_tile = to_cl_tile(x);
+ const auto &y_tile = to_cl_tile(y);
+ const auto &z_tile = to_cl_tile(z);
const auto &batch_tile = to_cl_tile(batch);
helper->initialize(&tile, &x_tile, &z_tile, &batch_tile);
diff --git a/compute_kernel_writer/src/cl/CLKernelWriter.h b/compute_kernel_writer/src/cl/CLKernelWriter.h
index a40698d7bb..ea455a7fdd 100644
--- a/compute_kernel_writer/src/cl/CLKernelWriter.h
+++ b/compute_kernel_writer/src/cl/CLKernelWriter.h
@@ -57,13 +57,21 @@ public:
~CLKernelWriter();
// =============================================================================================
+ // Data processing
+ // =============================================================================================
+
+ void op_assign(const TileOperand &dst, const TileOperand &src) override;
+
+ void op_cast(const TileOperand &dst, const TileOperand &src, ConvertPolicy policy) override;
+
+ void op_unary(const TileOperand &dst, const TileOperand &src, UnaryOp op) override;
+
+ // =============================================================================================
// Misc
// =============================================================================================
- /** Similar to @ref KernelWriter::comment() */
- void comment(const std::string &text) override;
+ void op_comment(const std::string &text) override;
- /** Similar to @ref KernelWriter::op_write_raw_code() */
void op_write_raw_code(const std::string &raw_code) override;
// =============================================================================================
@@ -92,14 +100,16 @@ public:
*
* Similar to @ref KernelWriter::op_load()
*/
- void op_load(const TileOperand &tile_op, const TensorOperand &tensor_op, TensorSampler &sampler,
+ void op_load(
+ const TileOperand &tile_op, const TensorOperand &tensor_op, TensorSampler &sampler,
const TileOperand &x, const TileOperand &y, const TileOperand &z, const TileOperand &batch) override;
/** Load the data from the tensor memory to the tile in a dilated way using the sampling information.
*
* Similar to @ref KernelWriter::op_load_dilated()
*/
- void op_load_dilated(const TileOperand &tile_op, const TensorOperand &tensor_op, TensorSampler &sampler,
+ void op_load_dilated(
+ const TileOperand &tile_op, const TensorOperand &tensor_op, TensorSampler &sampler,
const TileOperand &x, const TileOperand &y, const TileOperand &z, const TileOperand &batch,
const TileOperand &dilation_x, const TileOperand &dilation_y) override;
@@ -107,18 +117,26 @@ public:
*
* Similar to @ref KernelWriter::op_store()
*/
- void op_store(const TensorOperand &tensor_op, const TileOperand &tile_op, TensorSampler &sampler,
+ void op_store(
+ const TensorOperand &tensor_op, const TileOperand &tile_op, TensorSampler &sampler,
const TileOperand &x, const TileOperand &y, const TileOperand &z, const TileOperand &batch) override;
/** Store the data to the tensor memory from the tile in a dilated way using the sampling information.
*
* Similar to @ref KernelWriter::op_store_dilated()
*/
- void op_store_dilated(const TensorOperand &tensor_op, const TileOperand &tile_op, TensorSampler &sampler,
+ void op_store_dilated(
+ const TensorOperand &tensor_op, const TileOperand &tile_op, TensorSampler &sampler,
const TileOperand &x, const TileOperand &y, const TileOperand &z, const TileOperand &batch,
const TileOperand &dilation_x, const TileOperand &dilation_y) override;
protected:
+ /** Return @ref CLTile object from the @ref TileOperand object.
+ *
+ * This function performs appropriate check before doing type casting.
+ */
+ const CLTile &to_cl_tile(const TileOperand &operand);
+
/** Append the specified code to the kernel body source code. */
template <typename T, typename... TArgs>
void append_code(T &&code, TArgs &&...args)
@@ -137,20 +155,15 @@ protected:
/** Get the current kernel body source code. */
const std::string &body_source_code() const;
-// For helper functions
+ // For helper functions
private:
- /** Return @ref CLTile object from the @ref TileOperand object.
- *
- * This function performs appropriate check before doing type casting.
- */
- const CLTile &to_cl_tile(const TileOperand &operand);
-
/** Helper function to consolidate all load/store logic in this class */
- void op_load_store(MemoryOperation op, const TileOperand &tile_op, const TensorOperand &tensor_op, TensorSampler &sampler,
+ void op_load_store(
+ MemoryOperation op, const TileOperand &tile_op, const TensorOperand &tensor_op, TensorSampler &sampler,
const TileOperand &x, const TileOperand &y, const TileOperand &z, const TileOperand &batch,
const CLTile &dilation_x, const CLTile &dilation_y);
-// For attributes
+ // For attributes
private:
/** This string contains the kernel body source code, not the full CL source code.
* The full source code will only be generated when the user calls @ref KernelWriter::emit_kernel.
diff --git a/compute_kernel_writer/src/cl/CLTile.cpp b/compute_kernel_writer/src/cl/CLTile.cpp
index 013ac4c276..556db0f47b 100644
--- a/compute_kernel_writer/src/cl/CLTile.cpp
+++ b/compute_kernel_writer/src/cl/CLTile.cpp
@@ -210,7 +210,7 @@ std::string CLTile::create_var_name(int32_t row) const
// If a scalar variable, we do not append the row index
if(_info.height() > 1)
{
- var_name += "_";
+ var_name += "__";
var_name += std::to_string(row);
}
@@ -229,4 +229,4 @@ void CLTile::validate_tile_info(const TileInfo &info) const
CKW_ASSERT_MSG(info.data_type() != DataType::Unknown, "DataType::Unknown is not supported");
}
-} // namespace ckw \ No newline at end of file
+} // namespace ckw