aboutsummaryrefslogtreecommitdiff
path: root/compute_kernel_writer/src/cl
diff options
context:
space:
mode:
authorGunes Bayir <gunes.bayir@arm.com>2023-08-17 11:04:02 +0100
committerGunes Bayir <gunes.bayir@arm.com>2023-08-30 15:45:59 +0000
commitd5f9a1cf9f0340f3e6bf9ff00156fc2adb1fdca9 (patch)
treeaf23cff1cb3a504ee51676cd9bfc74b75934fef2 /compute_kernel_writer/src/cl
parent91cb7336400acc857e20086a23692f99fe11be9c (diff)
downloadComputeLibrary-d5f9a1cf9f0340f3e6bf9ff00156fc2adb1fdca9.tar.gz
Implement indirect load for buffer and CLImage
Add KernelWriter API functions for loading from an indirect buffer Resolves: COMPMID-6390 Signed-off-by: Gunes Bayir <gunes.bayir@arm.com> Change-Id: I45dbf88b25ec5caf2b458657ef20aacac9924745 Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/10192 Reviewed-by: Viet-Hoa Do <viet-hoa.do@arm.com> Reviewed-by: Gian Marco Iodice <gianmarco.iodice@arm.com> Tested-by: Arm Jenkins <bsgcomp@arm.com> Comments-Addressed: Arm Jenkins <bsgcomp@arm.com> Benchmark: Arm Jenkins <bsgcomp@arm.com>
Diffstat (limited to 'compute_kernel_writer/src/cl')
-rw-r--r--compute_kernel_writer/src/cl/CLKernelWriter.cpp52
-rw-r--r--compute_kernel_writer/src/cl/CLKernelWriter.h26
-rw-r--r--compute_kernel_writer/src/cl/helpers/CLMemoryOpBufferHelper.cpp6
-rw-r--r--compute_kernel_writer/src/cl/helpers/CLMemoryOpImage2dHelper.cpp7
4 files changed, 60 insertions, 31 deletions
diff --git a/compute_kernel_writer/src/cl/CLKernelWriter.cpp b/compute_kernel_writer/src/cl/CLKernelWriter.cpp
index a946b989d7..4074da7912 100644
--- a/compute_kernel_writer/src/cl/CLKernelWriter.cpp
+++ b/compute_kernel_writer/src/cl/CLKernelWriter.cpp
@@ -42,6 +42,7 @@
#include <algorithm>
#include <cstdint>
+#include <vector>
namespace ckw
{
@@ -628,7 +629,7 @@ void CLKernelWriter::op_load(const TileOperand &tile_op, const TensorOperand &te
const CLTile dilation_x({ { "1" } }, DataType::Int32);
const CLTile dilation_y({ { "1" } }, DataType::Int32);
- op_load_store(MemoryOperation::Load, tile_op, tensor_op, sampler, x, y, z, batch, dilation_x, dilation_y);
+ op_load_store(MemoryOperation::Load, tile_op, tensor_op, sampler, x, y, z, batch, dilation_x, dilation_y, false /* indirect buffer */);
}
void CLKernelWriter::op_load_dilated(const TileOperand &tile_op, const TensorOperand &tensor_op, TensorSampler &sampler,
@@ -638,7 +639,7 @@ void CLKernelWriter::op_load_dilated(const TileOperand &tile_op, const TensorOpe
const auto &dil_x_tile = to_cl_tile(dilation_x);
const auto &dil_y_tile = to_cl_tile(dilation_y);
- op_load_store(MemoryOperation::Load, tile_op, tensor_op, sampler, x, y, z, batch, dil_x_tile, dil_y_tile);
+ op_load_store(MemoryOperation::Load, tile_op, tensor_op, sampler, x, y, z, batch, dil_x_tile, dil_y_tile, false /* indirect buffer */);
}
void CLKernelWriter::op_store(const TensorOperand &tensor_op, const TileOperand &tile_op, TensorSampler &sampler,
@@ -647,7 +648,7 @@ void CLKernelWriter::op_store(const TensorOperand &tensor_op, const TileOperand
const CLTile dilation_x({ { "1" } }, DataType::Int32);
const CLTile dilation_y({ { "1" } }, DataType::Int32);
- op_load_store(MemoryOperation::Store, tile_op, tensor_op, sampler, x, y, z, batch, dilation_x, dilation_y);
+ op_load_store(MemoryOperation::Store, tile_op, tensor_op, sampler, x, y, z, batch, dilation_x, dilation_y, false /* indirect buffer */);
}
void CLKernelWriter::op_store_dilated(const TensorOperand &tensor_op, const TileOperand &tile_op, TensorSampler &sampler,
@@ -657,16 +658,32 @@ void CLKernelWriter::op_store_dilated(const TensorOperand &tensor_op, const Tile
const auto &dil_x_tile = to_cl_tile(dilation_x);
const auto &dil_y_tile = to_cl_tile(dilation_y);
- op_load_store(MemoryOperation::Store, tile_op, tensor_op, sampler, x, y, z, batch, dil_x_tile, dil_y_tile);
+ op_load_store(MemoryOperation::Store, tile_op, tensor_op, sampler, x, y, z, batch, dil_x_tile, dil_y_tile, false /* indirect buffer */);
+}
+
+void CLKernelWriter::op_load_indirect(const TileOperand &tile_op, const TensorOperand &tensor_op, TensorSampler &sampler,
+ const TileOperand &x, const TileOperand &y, const TileOperand &z, const TileOperand &batch)
+{
+ const CLTile dilation_x({ { "1" } }, DataType::Int32);
+ const CLTile dilation_y({ { "1" } }, DataType::Int32);
+
+ op_load_store(MemoryOperation::Load, tile_op, tensor_op, sampler, x, y, z, batch, dilation_x, dilation_y, true /* indirect buffer */);
}
void CLKernelWriter::op_load_store(MemoryOperation op, const TileOperand &tile_op, const TensorOperand &tensor_op, TensorSampler &sampler,
- const TileOperand &x, const TileOperand &y, const TileOperand &z, const TileOperand &batch,
- const CLTile &dilation_x, const CLTile &dilation_y)
+ const TileOperand &x, const TileOperand &y, const TileOperand &z, const TileOperand &batch,
+ const CLTile &dilation_x, const CLTile &dilation_y, bool indirect_buffer)
{
CKW_UNUSED(dilation_x);
+ CKW_ASSERT(dilation_x.is_scalar());
+ CKW_ASSERT(dilation_y.is_scalar());
CKW_ASSERT(dilation_x.scalar(0, 0).str == "((int)(1))"); // Dilation in x dimension is not implemented yet
+ if(indirect_buffer)
+ {
+ CKW_ASSERT(dilation_y.scalar(0,0).str == "((int)(1))" && dilation_x.scalar(0,0).str == "((int)(1))");
+ }
+
ITensor &tensor = get_tensor(tensor_op);
std::unique_ptr<ICLMemoryOpHelper> helper;
@@ -689,18 +706,31 @@ void CLKernelWriter::op_load_store(MemoryOperation op, const TileOperand &tile_o
const auto &z_tile = to_cl_tile(z);
const auto &batch_tile = to_cl_tile(batch);
+ CKW_ASSERT(x_tile.is_scalar());
+ CKW_ASSERT(z_tile.is_scalar());
+ CKW_ASSERT_IF(indirect_buffer, y_tile.info().width() == 1);
+ CKW_ASSERT_IF(!indirect_buffer, y_tile.is_scalar());
+ CKW_ASSERT(batch_tile.is_scalar());
+
helper->initialize(&tile, &x_tile, &z_tile, &batch_tile);
for(int row = 0; row < tile.info().height(); ++row)
{
- std::string coord_y = y_tile.scalar(0, 0).str + " + " + std::to_string(row);
+ if(!indirect_buffer)
+ {
+ std::string coord_y = y_tile.scalar(0, 0).str + " + " + std::to_string(row);
+
+ if(dilation_y.scalar(0, 0).str != "((int)(1))")
+ {
+ coord_y += " * " + dilation_y.scalar(0, 0).str;
+ }
- if(dilation_y.scalar(0, 0).str != "1")
+ helper->write_row(row, coord_y);
+ }
+ else
{
- coord_y += " * " + dilation_y.scalar(0, 0).str;
+ helper->write_row(row, y_tile.scalar(row, 0).str);
}
-
- helper->write_row(row, coord_y);
}
helper->finalize();
diff --git a/compute_kernel_writer/src/cl/CLKernelWriter.h b/compute_kernel_writer/src/cl/CLKernelWriter.h
index c494847944..1e2e5dc910 100644
--- a/compute_kernel_writer/src/cl/CLKernelWriter.h
+++ b/compute_kernel_writer/src/cl/CLKernelWriter.h
@@ -131,40 +131,27 @@ public:
// Memory Operations
// =============================================================================================
- /** Load the data from the tensor memory to the tile using the sampling information.
- *
- * Similar to @ref KernelWriter::op_load()
- */
void op_load(
const TileOperand &tile_op, const TensorOperand &tensor_op, TensorSampler &sampler,
const TileOperand &x, const TileOperand &y, const TileOperand &z, const TileOperand &batch) override;
- /** Load the data from the tensor memory to the tile in a dilated way using the sampling information.
- *
- * Similar to @ref KernelWriter::op_load_dilated()
- */
void op_load_dilated(
const TileOperand &tile_op, const TensorOperand &tensor_op, TensorSampler &sampler,
const TileOperand &x, const TileOperand &y, const TileOperand &z, const TileOperand &batch,
const TileOperand &dilation_x, const TileOperand &dilation_y) override;
- /** Store the data to the tensor memory from the tile using the sampling information.
- *
- * Similar to @ref KernelWriter::op_store()
- */
void op_store(
const TensorOperand &tensor_op, const TileOperand &tile_op, TensorSampler &sampler,
const TileOperand &x, const TileOperand &y, const TileOperand &z, const TileOperand &batch) override;
- /** Store the data to the tensor memory from the tile in a dilated way using the sampling information.
- *
- * Similar to @ref KernelWriter::op_store_dilated()
- */
void op_store_dilated(
const TensorOperand &tensor_op, const TileOperand &tile_op, TensorSampler &sampler,
const TileOperand &x, const TileOperand &y, const TileOperand &z, const TileOperand &batch,
const TileOperand &dilation_x, const TileOperand &dilation_y) override;
+ void op_load_indirect(const TileOperand &tile_op, const TensorOperand &tensor_op, TensorSampler &sampler,
+ const TileOperand &x, const TileOperand &y, const TileOperand &z, const TileOperand &batch) override;
+
protected:
/** Return @ref CLTile object from the @ref TileOperand object.
*
@@ -192,11 +179,10 @@ protected:
// For helper functions
private:
- /** Helper function to consolidate all load/store logic in this class */
- void op_load_store(
- MemoryOperation op, const TileOperand &tile_op, const TensorOperand &tensor_op, TensorSampler &sampler,
+ /** Helper method to consolidate all load/store logic in this class */
+ void op_load_store(MemoryOperation op, const TileOperand &tile_op, const TensorOperand &tensor_op, TensorSampler &sampler,
const TileOperand &x, const TileOperand &y, const TileOperand &z, const TileOperand &batch,
- const CLTile &dilation_x, const CLTile &dilation_y);
+ const CLTile &dilation_x, const CLTile &dilation_y, bool indirect_buffer);
/** This function is the generic function to write both `if` and `else if` blocks.
*
diff --git a/compute_kernel_writer/src/cl/helpers/CLMemoryOpBufferHelper.cpp b/compute_kernel_writer/src/cl/helpers/CLMemoryOpBufferHelper.cpp
index e50418711e..f906bcd4b1 100644
--- a/compute_kernel_writer/src/cl/helpers/CLMemoryOpBufferHelper.cpp
+++ b/compute_kernel_writer/src/cl/helpers/CLMemoryOpBufferHelper.cpp
@@ -198,6 +198,9 @@ void CLMemoryOpBufferHelper::out_of_bound_initialize_y(const std::string &coord)
max = _mapper->dim_y().str;
_writer->op_write_raw_code("if(" + coord + " < " + max + ")\n{\n");
break;
+ case TensorSamplerAddressModeY::SkipLessThanZero:
+ _writer->op_write_raw_code("if(" + coord + " >= 0)\n{\n");
+ break;
case TensorSamplerAddressModeY::None:
break;
default:
@@ -216,6 +219,9 @@ void CLMemoryOpBufferHelper::out_of_bound_finalize_y(const std::string &dst)
_writer->op_write_raw_code(dst);
_writer->op_write_raw_code(" = 0.0f;\n}\n");
break;
+ case TensorSamplerAddressModeY::SkipLessThanZero:
+ _writer->op_write_raw_code("}\n");
+ break;
case TensorSamplerAddressModeY::None:
break;
default:
diff --git a/compute_kernel_writer/src/cl/helpers/CLMemoryOpImage2dHelper.cpp b/compute_kernel_writer/src/cl/helpers/CLMemoryOpImage2dHelper.cpp
index a5f0c17c16..55f88f4136 100644
--- a/compute_kernel_writer/src/cl/helpers/CLMemoryOpImage2dHelper.cpp
+++ b/compute_kernel_writer/src/cl/helpers/CLMemoryOpImage2dHelper.cpp
@@ -104,6 +104,9 @@ void CLMemoryOpImage2dHelper::out_of_bound_initialize_y(const std::string &coord
const TensorSamplerAddressModeY address_mode_y = _sampler->address_mode_y();
switch(address_mode_y)
{
+ case TensorSamplerAddressModeY::SkipLessThanZero:
+ _writer->op_write_raw_code("if(" + coord + " >= 0)\n{\n");
+ break;
case TensorSamplerAddressModeY::ClampToBorderMaxOnly:
case TensorSamplerAddressModeY::None:
break;
@@ -117,6 +120,9 @@ void CLMemoryOpImage2dHelper::out_of_bound_finalize_y()
const TensorSamplerAddressModeY address_mode_y = _sampler->address_mode_y();
switch(address_mode_y)
{
+ case TensorSamplerAddressModeY::SkipLessThanZero:
+ _writer->op_write_raw_code("}\n");
+ break;
case TensorSamplerAddressModeY::ClampToBorderMaxOnly:
case TensorSamplerAddressModeY::None:
break;
@@ -153,6 +159,7 @@ std::string CLMemoryOpImage2dHelper::to_ls_image2d_sampler() const
{
case TensorSamplerAddressModeY::None:
return "CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_NONE | CLK_FILTER_NEAREST";
+ case TensorSamplerAddressModeY::SkipLessThanZero:
case TensorSamplerAddressModeY::ClampToBorderMaxOnly:
return "CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_CLAMP | CLK_FILTER_NEAREST";
default: