aboutsummaryrefslogtreecommitdiff
path: root/compute_kernel_writer
diff options
context:
space:
mode:
authorJakub Sujak <jakub.sujak@arm.com>2023-07-31 13:36:58 +0100
committerJakub Sujak <jakub.sujak@arm.com>2023-08-07 08:44:17 +0000
commite1c96e7e6dbf5314676fc81831e2ccb34a031ea1 (patch)
treef69ee0ac5dd6b29de0041293b279804bcecf0df7 /compute_kernel_writer
parent78ce2730ecd2f1e666cdd10263bf054c0b740a9c (diff)
downloadComputeLibrary-e1c96e7e6dbf5314676fc81831e2ccb34a031ea1.tar.gz
Port DirectConv2d to CKW backend
Ports the direct convolution 2D kernel from the experimental Dynamic Fusion interface to use the new Compute Kernel Writer backend for OpenCL code generation. Support is for FP16/FP32 only. Resolves: COMPMID-6259 Change-Id: Ia8d7b9cb789737b22b1d877cd798a73eda0ce4ab Signed-off-by: Jakub Sujak <jakub.sujak@arm.com> Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/10059 Reviewed-by: Gunes Bayir <gunes.bayir@arm.com> Reviewed-by: Gian Marco Iodice <gianmarco.iodice@arm.com> Comments-Addressed: Arm Jenkins <bsgcomp@arm.com> Tested-by: Arm Jenkins <bsgcomp@arm.com> Benchmark: Arm Jenkins <bsgcomp@arm.com>
Diffstat (limited to 'compute_kernel_writer')
-rw-r--r--compute_kernel_writer/CMakeLists.txt4
-rw-r--r--compute_kernel_writer/prototype/include/ckw/KernelWriter.h29
-rw-r--r--compute_kernel_writer/prototype/include/ckw/TileOperand.h25
-rw-r--r--compute_kernel_writer/prototype/src/KernelWriter.cpp58
-rw-r--r--compute_kernel_writer/prototype/src/Prototype.h18
-rw-r--r--compute_kernel_writer/prototype/src/TileOperand.cpp57
6 files changed, 160 insertions, 31 deletions
diff --git a/compute_kernel_writer/CMakeLists.txt b/compute_kernel_writer/CMakeLists.txt
index 1e82f9c6b3..9a97563025 100644
--- a/compute_kernel_writer/CMakeLists.txt
+++ b/compute_kernel_writer/CMakeLists.txt
@@ -102,7 +102,7 @@ target_compile_options(ckw
${CKW_CXX_FLAGS}
"$<$<CXX_COMPILER_ID:GNU>:${GNU_WARNINGS}>"
"$<$<CONFIG:Debug>:${CKW_ASSERTS_OPTS}>"
- "$<$<BOOL:${CKW_ASSERTS}>:${CKW_ASSERTS_OPTS}>"
+ "$<$<BOOL:${CKW_ENABLE_ASSERTS}>:${CKW_ASSERTS_OPTS}>"
# Set CMAKE_CXX_FLAGS last so user can overwrite options
${CMAKE_CXX_FLAGS}
PRIVATE
@@ -113,7 +113,7 @@ target_compile_options(ckw
target_compile_definitions(ckw PUBLIC
$<$<CONFIG:Debug>:COMPUTE_KERNEL_WRITER_DEBUG_ENABLED>
$<$<CONFIG:Debug>:COMPUTE_KERNEL_WRITER_ASSERTS_ENABLED>
- $<$<BOOL:${CKW_ASSERTS}>:COMPUTE_KERNEL_WRITER_ASSERTS_ENABLED>
+ $<$<BOOL:${CKW_ENABLE_ASSERTS}>:COMPUTE_KERNEL_WRITER_ASSERTS_ENABLED>
$<$<BOOL:${CKW_ENABLE_OPENCL}>:COMPUTE_KERNEL_WRITER_OPENCL_ENABLED>
)
diff --git a/compute_kernel_writer/prototype/include/ckw/KernelWriter.h b/compute_kernel_writer/prototype/include/ckw/KernelWriter.h
index c116e62650..72f85c78aa 100644
--- a/compute_kernel_writer/prototype/include/ckw/KernelWriter.h
+++ b/compute_kernel_writer/prototype/include/ckw/KernelWriter.h
@@ -129,11 +129,38 @@ public:
/** Load the data from the tensor memory to the tile using the sampling information.
*
+ * @param[out] tile The tile to be loaded.
+ * @param[in] tensor The tensor to be read.
+ * @param[in] sampler The tensor sampling information.
+ * @param[in] dilation_y Dilation in the Y dimension.
+ */
+ void op_load(TileOperand &tile, const TensorOperand &tensor, const TensorTileSampler &sampler, const TileOperand &dilation_y = TileOperand("dil_y", 1));
+
+ /** Load the data from the tensor memory to the tile using the indirect buffer approach and respective of the sampling information.
+ *
* @param[out] tile The tile to be loaded.
* @param[in] tensor The tensor to be read.
* @param[in] sampler The tensor sampling information.
*/
- void op_load(TileOperand &tile, TensorOperand &tensor, const TensorTileSampler &sampler);
+ void op_load_indirect(TileOperand &tile, const TensorOperand &tensor, const TensorTileSampler &sampler);
+
+ /** Construct an indirection buffer in @p tile containing the precalculated addresses of elements in the source tensor.
+ *
+ * @param[out] tile The tile to be loaded.
+ * @param[in] tensor The tensor the be read.
+ * @param[in] sampler The tensor sampling information.
+ * @param[in] x The X coordinate.
+ * @param[in] y The Y coordinate.
+ * @param[in] x_off Offset in the X dimension.
+ * @param[in] y_off Offset in the Y dimension.
+ */
+ void util_get_indirect_buffer(TileOperand &tile,
+ const TensorOperand &tensor,
+ const TensorTileSampler &sampler,
+ const TileOperand &x,
+ const TileOperand &y,
+ const TileOperand &x_off,
+ const TileOperand &y_off);
/** Store the tile to the tensor using the specified sampling information.
*
diff --git a/compute_kernel_writer/prototype/include/ckw/TileOperand.h b/compute_kernel_writer/prototype/include/ckw/TileOperand.h
index c071707a45..24ee373a24 100644
--- a/compute_kernel_writer/prototype/include/ckw/TileOperand.h
+++ b/compute_kernel_writer/prototype/include/ckw/TileOperand.h
@@ -37,6 +37,8 @@ namespace ckw
class Kernel;
+using TileContainer = std::vector<std::vector<std::string>>;
+
/** Tile operand which can be either scalar, vector or 2D tile. */
class TileOperand : public OperandBase
{
@@ -69,6 +71,13 @@ public:
*/
TileOperand(const ::std::string &name, float value);
+ /** Initialize a new instance of @ref TileOperand for compile-time constant variable.
+ *
+ * @param[in] name The name of the tile.
+ * @param[in] value The value of the tile.
+ */
+ TileOperand(const ::std::string &name, const ::std::vector<std::vector<std::string>> &value, DataType dt);
+
/** Prohibit copy of tile operand. */
TileOperand(const TileOperand &) = delete;
@@ -96,13 +105,21 @@ public:
/** Get the scalar value of the tile.
*
* The tile must have the shape of 1, 1 (i.e. scalar).
+ *
+ * @return Scalar value as a string.
+ */
+ std::string scalar_value() const;
+
+ /** Get the values of the tile.
+ *
+ * @return 2D container of values.
*/
- ScalarValue scalar_value() const;
+ const TileContainer &value() const;
private:
- TileInfo _info;
- ScalarValue _value{};
- bool _constant;
+ TileInfo _info;
+ TileContainer _value{};
+ bool _constant;
};
} // namespace ckw
diff --git a/compute_kernel_writer/prototype/src/KernelWriter.cpp b/compute_kernel_writer/prototype/src/KernelWriter.cpp
index 9122e518b4..f29cf12802 100644
--- a/compute_kernel_writer/prototype/src/KernelWriter.cpp
+++ b/compute_kernel_writer/prototype/src/KernelWriter.cpp
@@ -128,6 +128,10 @@ TileOperand &KernelWriter::declare_tile_operand(std::unique_ptr<TileOperand> ope
name,
prototype::TileInfo(info.data_type(), info.width(), info.height()));
}
+ else
+ {
+ _impl->declare_const_tile(name, operand.value(), operand.data_type());
+ }
return operand;
}
@@ -136,7 +140,7 @@ TileOperand &KernelWriter::declare_tile_operand(std::unique_ptr<TileOperand> ope
// Load and store
// =================================================================================================
-void KernelWriter::op_load(TileOperand &tile, TensorOperand &tensor, const TensorTileSampler &sampler)
+void KernelWriter::op_load(TileOperand &tile, const TensorOperand &tensor, const TensorTileSampler &sampler, const TileOperand &dilation_y)
{
prototype::TensorOperand impl_tensor(
tensor.name(),
@@ -152,9 +156,59 @@ void KernelWriter::op_load(TileOperand &tile, TensorOperand &tensor, const Tenso
auto impl_z = sampler.z().create_impl_operand(_impl.get());
auto impl_b = sampler.b().create_impl_operand(_impl.get());
+ auto impl_dilation_y = dilation_y.create_impl_operand(_impl.get());
+
+ auto impl_dst = tile.create_impl_operand(_impl.get());
+
+ _impl->op_load_immediate(impl_tensor, impl_dst, impl_x, impl_y, impl_z, impl_b, impl_dilation_y);
+}
+
+void KernelWriter::op_load_indirect(TileOperand &tile, const TensorOperand &tensor, const TensorTileSampler &sampler)
+{
+ prototype::TensorOperand impl_tensor(
+ tensor.name(),
+ prototype::GpuSampler{
+ sampler.format(),
+ prototype::to_gpu_tensor_storage(tensor.storage_type()),
+ sampler.address_mode_x(),
+ sampler.address_mode_y(),
+ sampler.address_mode_z() });
+
+ auto impl_x = sampler.x().create_impl_operand(_impl.get());
+ auto impl_y = sampler.y().create_impl_operand(_impl.get());
+ auto impl_z = sampler.z().create_impl_operand(_impl.get());
+ auto impl_b = sampler.b().create_impl_operand(_impl.get());
+
+ auto impl_dst = tile.create_impl_operand(_impl.get());
+
+ _impl->op_load_indirect(impl_tensor, impl_dst, impl_x, impl_y, impl_z, impl_b);
+}
+
+void KernelWriter::util_get_indirect_buffer(TileOperand &tile,
+ const TensorOperand &tensor,
+ const TensorTileSampler &sampler,
+ const TileOperand &x,
+ const TileOperand &y,
+ const TileOperand &x_off,
+ const TileOperand &y_off)
+{
+ prototype::TensorOperand impl_tensor(
+ tensor.name(),
+ prototype::GpuSampler{
+ sampler.format(),
+ prototype::to_gpu_tensor_storage(tensor.storage_type()),
+ sampler.address_mode_x(),
+ sampler.address_mode_y(),
+ sampler.address_mode_z() });
+
+ auto impl_x = x.create_impl_operand(_impl.get());
+ auto impl_y = y.create_impl_operand(_impl.get());
+ auto impl_x_off = x_off.create_impl_operand(_impl.get());
+ auto impl_y_off = y_off.create_impl_operand(_impl.get());
+
auto impl_dst = tile.create_impl_operand(_impl.get());
- _impl->op_load_immediate(impl_tensor, impl_dst, impl_x, impl_y, impl_z, impl_b);
+ _impl->util_get_indirect_buffer(impl_dst, impl_tensor, impl_x, impl_y, impl_x_off, impl_y_off);
}
void KernelWriter::op_store(TensorOperand &tensor, const TileOperand &tile, const TensorTileSampler &sampler)
diff --git a/compute_kernel_writer/prototype/src/Prototype.h b/compute_kernel_writer/prototype/src/Prototype.h
index a8dc7fbfdb..2b519471ac 100644
--- a/compute_kernel_writer/prototype/src/Prototype.h
+++ b/compute_kernel_writer/prototype/src/Prototype.h
@@ -3009,7 +3009,7 @@ private:
address += " + (";
address += x + ") * sizeof(" + dst_type + ")";
}
- if(y != "0" && (_mapper.is_one_component_y() != true))
+ if(y != "0")
{
const std::string stride_y = _mapper.tensor_component_stride_y();
address += " + (";
@@ -3249,7 +3249,7 @@ private:
std::string coord_x = "(" + x + ") >> 2";
std::string coord_y = "(";
- if(y != "0" && (_mapper.is_one_component_y() != true))
+ if(y != "0")
{
coord_y += y;
}
@@ -4024,13 +4024,6 @@ public:
_data->code += ", ";
_data->code += x_s->scalar(0, i).str;
_data->code += " >= 0);\n";
- // mi_0 = select(wxh, mi_0, y_s >= 0);
- _data->code += dst->scalar(0, i).str;
- _data->code += " = select(-1, ";
- _data->code += dst->scalar(0, i).str;
- _data->code += ", ";
- _data->code += y_s->scalar(0, i).str;
- _data->code += " >= 0);\n";
// mi_0 = select(wxh, mi_0, x_s < width);
_data->code += dst->scalar(0, i).str;
_data->code += " = select(-1, ";
@@ -4039,6 +4032,13 @@ public:
_data->code += x_s->scalar(0, i).str;
_data->code += " < ";
_data->code += width + ");\n";
+ // mi_0 = select(wxh, mi_0, y_s >= 0);
+ _data->code += dst->scalar(0, i).str;
+ _data->code += " = select(-1, ";
+ _data->code += dst->scalar(0, i).str;
+ _data->code += ", ";
+ _data->code += y_s->scalar(0, i).str;
+ _data->code += " >= 0);\n";
// mi_0 = select(wxh, mi_0, y_s < height);
_data->code += dst->scalar(0, i).str;
_data->code += " = select(-1, ";
diff --git a/compute_kernel_writer/prototype/src/TileOperand.cpp b/compute_kernel_writer/prototype/src/TileOperand.cpp
index fcb3cb6415..bf6a15b9df 100644
--- a/compute_kernel_writer/prototype/src/TileOperand.cpp
+++ b/compute_kernel_writer/prototype/src/TileOperand.cpp
@@ -30,22 +30,42 @@ namespace ckw
{
TileOperand::TileOperand(const std::string &name, const TileInfo &info)
- : OperandBase(name), _info(info), _value{ 0 }, _constant(false)
+ : OperandBase(name),
+ _info(info),
+ _value{ std::vector<std::string>{ "0" } },
+ _constant(false)
{
}
TileOperand::TileOperand(const std::string &name, DataType data_type)
- : OperandBase(name), _info(TileInfo{ data_type }), _value(0), _constant(false)
+ : OperandBase(name),
+ _info(TileInfo{ data_type }),
+ _value{ std::vector<std::string>{ "0" } },
+ _constant(false)
{
}
TileOperand::TileOperand(const std::string &name, int32_t value)
- : OperandBase(name), _info(TileInfo{ DataType::Int32 }), _value(value), _constant(true)
+ : OperandBase(name),
+ _info(TileInfo{ DataType::Int32 }),
+ _value{ std::vector<std::string>{ std::to_string(value) } },
+ _constant(true)
{
}
TileOperand::TileOperand(const std::string &name, float value)
- : OperandBase(name), _info(TileInfo{ DataType::Fp32 }), _value(value), _constant(true)
+ : OperandBase(name),
+ _info(TileInfo{ DataType::Fp32 }),
+ _value{ std::vector<std::string>{ std::to_string(value) } },
+ _constant(true)
+{
+}
+
+TileOperand::TileOperand(const std::string &name, const TileContainer &vals, DataType dt)
+ : OperandBase(name),
+ _info(TileInfo{ dt, static_cast<int32_t>(vals.size()), static_cast<int32_t>(vals[0].size()) }),
+ _value(vals),
+ _constant(true)
{
}
@@ -55,17 +75,23 @@ prototype::Operand TileOperand::create_impl_operand(prototype::IGpuKernelWriter
if(_constant)
{
- switch(_info.data_type())
+ if(is_scalar())
{
- case DataType::Int32:
- return prototype::Operand(std::to_string(_value.get<int32_t>()),
- prototype::OperandType::ScalarInt32);
+ switch(_info.data_type())
+ {
+ case DataType::Int32:
+ return prototype::Operand(_value[0][0], prototype::OperandType::ScalarInt32);
- case DataType::Fp32:
- return prototype::Operand(std::to_string(_value.get<float>()), prototype::OperandType::ScalarFp32);
+ case DataType::Fp32:
+ return prototype::Operand(_value[0][0], prototype::OperandType::ScalarFp32);
- default:
- CKW_ASSERT(false);
+ default:
+ CKW_ASSERT(false);
+ }
+ }
+ else
+ {
+ return prototype::Operand(name());
}
}
else
@@ -94,11 +120,16 @@ bool TileOperand::is_scalar() const
return _info.width() == 1 && _info.height() == 1;
}
-ScalarValue TileOperand::scalar_value() const
+std::string TileOperand::scalar_value() const
{
CKW_ASSERT(is_scalar());
CKW_ASSERT(is_constant());
+ return _value[0][0];
+}
+
+const TileContainer &TileOperand::value() const
+{
return _value;
}