diff options
author | Gunes Bayir <gunes.bayir@arm.com> | 2023-08-17 11:04:02 +0100 |
---|---|---|
committer | Gunes Bayir <gunes.bayir@arm.com> | 2023-08-30 15:45:59 +0000 |
commit | d5f9a1cf9f0340f3e6bf9ff00156fc2adb1fdca9 (patch) | |
tree | af23cff1cb3a504ee51676cd9bfc74b75934fef2 /compute_kernel_writer/src/cl/CLKernelWriter.cpp | |
parent | 91cb7336400acc857e20086a23692f99fe11be9c (diff) | |
download | ComputeLibrary-d5f9a1cf9f0340f3e6bf9ff00156fc2adb1fdca9.tar.gz |
Implement indirect load for buffer and CLImage
Add KernelWriter API functions for loading from an indirect buffer
Resolves: COMPMID-6390
Signed-off-by: Gunes Bayir <gunes.bayir@arm.com>
Change-Id: I45dbf88b25ec5caf2b458657ef20aacac9924745
Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/10192
Reviewed-by: Viet-Hoa Do <viet-hoa.do@arm.com>
Reviewed-by: Gian Marco Iodice <gianmarco.iodice@arm.com>
Tested-by: Arm Jenkins <bsgcomp@arm.com>
Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
Benchmark: Arm Jenkins <bsgcomp@arm.com>
Diffstat (limited to 'compute_kernel_writer/src/cl/CLKernelWriter.cpp')
-rw-r--r-- | compute_kernel_writer/src/cl/CLKernelWriter.cpp | 52 |
1 files changed, 41 insertions, 11 deletions
diff --git a/compute_kernel_writer/src/cl/CLKernelWriter.cpp b/compute_kernel_writer/src/cl/CLKernelWriter.cpp index a946b989d7..4074da7912 100644 --- a/compute_kernel_writer/src/cl/CLKernelWriter.cpp +++ b/compute_kernel_writer/src/cl/CLKernelWriter.cpp @@ -42,6 +42,7 @@ #include <algorithm> #include <cstdint> +#include <vector> namespace ckw { @@ -628,7 +629,7 @@ void CLKernelWriter::op_load(const TileOperand &tile_op, const TensorOperand &te const CLTile dilation_x({ { "1" } }, DataType::Int32); const CLTile dilation_y({ { "1" } }, DataType::Int32); - op_load_store(MemoryOperation::Load, tile_op, tensor_op, sampler, x, y, z, batch, dilation_x, dilation_y); + op_load_store(MemoryOperation::Load, tile_op, tensor_op, sampler, x, y, z, batch, dilation_x, dilation_y, false /* indirect buffer */); } void CLKernelWriter::op_load_dilated(const TileOperand &tile_op, const TensorOperand &tensor_op, TensorSampler &sampler, @@ -638,7 +639,7 @@ void CLKernelWriter::op_load_dilated(const TileOperand &tile_op, const TensorOpe const auto &dil_x_tile = to_cl_tile(dilation_x); const auto &dil_y_tile = to_cl_tile(dilation_y); - op_load_store(MemoryOperation::Load, tile_op, tensor_op, sampler, x, y, z, batch, dil_x_tile, dil_y_tile); + op_load_store(MemoryOperation::Load, tile_op, tensor_op, sampler, x, y, z, batch, dil_x_tile, dil_y_tile, false /* indirect buffer */); } void CLKernelWriter::op_store(const TensorOperand &tensor_op, const TileOperand &tile_op, TensorSampler &sampler, @@ -647,7 +648,7 @@ void CLKernelWriter::op_store(const TensorOperand &tensor_op, const TileOperand const CLTile dilation_x({ { "1" } }, DataType::Int32); const CLTile dilation_y({ { "1" } }, DataType::Int32); - op_load_store(MemoryOperation::Store, tile_op, tensor_op, sampler, x, y, z, batch, dilation_x, dilation_y); + op_load_store(MemoryOperation::Store, tile_op, tensor_op, sampler, x, y, z, batch, dilation_x, dilation_y, false /* indirect buffer */); } void CLKernelWriter::op_store_dilated(const TensorOperand &tensor_op, const TileOperand &tile_op, TensorSampler &sampler, @@ -657,16 +658,32 @@ void CLKernelWriter::op_store_dilated(const TensorOperand &tensor_op, const Tile const auto &dil_x_tile = to_cl_tile(dilation_x); const auto &dil_y_tile = to_cl_tile(dilation_y); - op_load_store(MemoryOperation::Store, tile_op, tensor_op, sampler, x, y, z, batch, dil_x_tile, dil_y_tile); + op_load_store(MemoryOperation::Store, tile_op, tensor_op, sampler, x, y, z, batch, dil_x_tile, dil_y_tile, false /* indirect buffer */); +} + +void CLKernelWriter::op_load_indirect(const TileOperand &tile_op, const TensorOperand &tensor_op, TensorSampler &sampler, + const TileOperand &x, const TileOperand &y, const TileOperand &z, const TileOperand &batch) +{ + const CLTile dilation_x({ { "1" } }, DataType::Int32); + const CLTile dilation_y({ { "1" } }, DataType::Int32); + + op_load_store(MemoryOperation::Load, tile_op, tensor_op, sampler, x, y, z, batch, dilation_x, dilation_y, true /* indirect buffer */); } void CLKernelWriter::op_load_store(MemoryOperation op, const TileOperand &tile_op, const TensorOperand &tensor_op, TensorSampler &sampler, - const TileOperand &x, const TileOperand &y, const TileOperand &z, const TileOperand &batch, - const CLTile &dilation_x, const CLTile &dilation_y) + const TileOperand &x, const TileOperand &y, const TileOperand &z, const TileOperand &batch, + const CLTile &dilation_x, const CLTile &dilation_y, bool indirect_buffer) { CKW_UNUSED(dilation_x); + CKW_ASSERT(dilation_x.is_scalar()); + CKW_ASSERT(dilation_y.is_scalar()); CKW_ASSERT(dilation_x.scalar(0, 0).str == "((int)(1))"); // Dilation in x dimension is not implemented yet + if(indirect_buffer) + { + CKW_ASSERT(dilation_y.scalar(0,0).str == "((int)(1))" && dilation_x.scalar(0,0).str == "((int)(1))"); + } + ITensor &tensor = get_tensor(tensor_op); std::unique_ptr<ICLMemoryOpHelper> helper; @@ -689,18 +706,31 @@ void CLKernelWriter::op_load_store(MemoryOperation op, const TileOperand &tile_o const auto &z_tile = to_cl_tile(z); const auto &batch_tile = to_cl_tile(batch); + CKW_ASSERT(x_tile.is_scalar()); + CKW_ASSERT(z_tile.is_scalar()); + CKW_ASSERT_IF(indirect_buffer, y_tile.info().width() == 1); + CKW_ASSERT_IF(!indirect_buffer, y_tile.is_scalar()); + CKW_ASSERT(batch_tile.is_scalar()); + helper->initialize(&tile, &x_tile, &z_tile, &batch_tile); for(int row = 0; row < tile.info().height(); ++row) { - std::string coord_y = y_tile.scalar(0, 0).str + " + " + std::to_string(row); + if(!indirect_buffer) + { + std::string coord_y = y_tile.scalar(0, 0).str + " + " + std::to_string(row); + + if(dilation_y.scalar(0, 0).str != "((int)(1))") + { + coord_y += " * " + dilation_y.scalar(0, 0).str; + } - if(dilation_y.scalar(0, 0).str != "1") + helper->write_row(row, coord_y); + } + else { - coord_y += " * " + dilation_y.scalar(0, 0).str; + helper->write_row(row, y_tile.scalar(row, 0).str); } - - helper->write_row(row, coord_y); } helper->finalize(); |