aboutsummaryrefslogtreecommitdiff
path: root/compute_kernel_writer/src/cl/CLKernelWriter.cpp
diff options
context:
space:
mode:
authorGunes Bayir <gunes.bayir@arm.com>2023-08-17 11:04:02 +0100
committerGunes Bayir <gunes.bayir@arm.com>2023-08-30 15:45:59 +0000
commitd5f9a1cf9f0340f3e6bf9ff00156fc2adb1fdca9 (patch)
treeaf23cff1cb3a504ee51676cd9bfc74b75934fef2 /compute_kernel_writer/src/cl/CLKernelWriter.cpp
parent91cb7336400acc857e20086a23692f99fe11be9c (diff)
downloadComputeLibrary-d5f9a1cf9f0340f3e6bf9ff00156fc2adb1fdca9.tar.gz
Implement indirect load for buffer and CLImage
Add KernelWriter API functions for loading from an indirect buffer Resolves: COMPMID-6390 Signed-off-by: Gunes Bayir <gunes.bayir@arm.com> Change-Id: I45dbf88b25ec5caf2b458657ef20aacac9924745 Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/10192 Reviewed-by: Viet-Hoa Do <viet-hoa.do@arm.com> Reviewed-by: Gian Marco Iodice <gianmarco.iodice@arm.com> Tested-by: Arm Jenkins <bsgcomp@arm.com> Comments-Addressed: Arm Jenkins <bsgcomp@arm.com> Benchmark: Arm Jenkins <bsgcomp@arm.com>
Diffstat (limited to 'compute_kernel_writer/src/cl/CLKernelWriter.cpp')
-rw-r--r--compute_kernel_writer/src/cl/CLKernelWriter.cpp52
1 files changed, 41 insertions, 11 deletions
diff --git a/compute_kernel_writer/src/cl/CLKernelWriter.cpp b/compute_kernel_writer/src/cl/CLKernelWriter.cpp
index a946b989d7..4074da7912 100644
--- a/compute_kernel_writer/src/cl/CLKernelWriter.cpp
+++ b/compute_kernel_writer/src/cl/CLKernelWriter.cpp
@@ -42,6 +42,7 @@
#include <algorithm>
#include <cstdint>
+#include <vector>
namespace ckw
{
@@ -628,7 +629,7 @@ void CLKernelWriter::op_load(const TileOperand &tile_op, const TensorOperand &te
const CLTile dilation_x({ { "1" } }, DataType::Int32);
const CLTile dilation_y({ { "1" } }, DataType::Int32);
- op_load_store(MemoryOperation::Load, tile_op, tensor_op, sampler, x, y, z, batch, dilation_x, dilation_y);
+ op_load_store(MemoryOperation::Load, tile_op, tensor_op, sampler, x, y, z, batch, dilation_x, dilation_y, false /* indirect buffer */);
}
void CLKernelWriter::op_load_dilated(const TileOperand &tile_op, const TensorOperand &tensor_op, TensorSampler &sampler,
@@ -638,7 +639,7 @@ void CLKernelWriter::op_load_dilated(const TileOperand &tile_op, const TensorOpe
const auto &dil_x_tile = to_cl_tile(dilation_x);
const auto &dil_y_tile = to_cl_tile(dilation_y);
- op_load_store(MemoryOperation::Load, tile_op, tensor_op, sampler, x, y, z, batch, dil_x_tile, dil_y_tile);
+ op_load_store(MemoryOperation::Load, tile_op, tensor_op, sampler, x, y, z, batch, dil_x_tile, dil_y_tile, false /* indirect buffer */);
}
void CLKernelWriter::op_store(const TensorOperand &tensor_op, const TileOperand &tile_op, TensorSampler &sampler,
@@ -647,7 +648,7 @@ void CLKernelWriter::op_store(const TensorOperand &tensor_op, const TileOperand
const CLTile dilation_x({ { "1" } }, DataType::Int32);
const CLTile dilation_y({ { "1" } }, DataType::Int32);
- op_load_store(MemoryOperation::Store, tile_op, tensor_op, sampler, x, y, z, batch, dilation_x, dilation_y);
+ op_load_store(MemoryOperation::Store, tile_op, tensor_op, sampler, x, y, z, batch, dilation_x, dilation_y, false /* indirect buffer */);
}
void CLKernelWriter::op_store_dilated(const TensorOperand &tensor_op, const TileOperand &tile_op, TensorSampler &sampler,
@@ -657,16 +658,32 @@ void CLKernelWriter::op_store_dilated(const TensorOperand &tensor_op, const Tile
const auto &dil_x_tile = to_cl_tile(dilation_x);
const auto &dil_y_tile = to_cl_tile(dilation_y);
- op_load_store(MemoryOperation::Store, tile_op, tensor_op, sampler, x, y, z, batch, dil_x_tile, dil_y_tile);
+ op_load_store(MemoryOperation::Store, tile_op, tensor_op, sampler, x, y, z, batch, dil_x_tile, dil_y_tile, false /* indirect buffer */);
+}
+
+void CLKernelWriter::op_load_indirect(const TileOperand &tile_op, const TensorOperand &tensor_op, TensorSampler &sampler,
+ const TileOperand &x, const TileOperand &y, const TileOperand &z, const TileOperand &batch)
+{
+ const CLTile dilation_x({ { "1" } }, DataType::Int32);
+ const CLTile dilation_y({ { "1" } }, DataType::Int32);
+
+ op_load_store(MemoryOperation::Load, tile_op, tensor_op, sampler, x, y, z, batch, dilation_x, dilation_y, true /* indirect buffer */);
}
void CLKernelWriter::op_load_store(MemoryOperation op, const TileOperand &tile_op, const TensorOperand &tensor_op, TensorSampler &sampler,
- const TileOperand &x, const TileOperand &y, const TileOperand &z, const TileOperand &batch,
- const CLTile &dilation_x, const CLTile &dilation_y)
+ const TileOperand &x, const TileOperand &y, const TileOperand &z, const TileOperand &batch,
+ const CLTile &dilation_x, const CLTile &dilation_y, bool indirect_buffer)
{
CKW_UNUSED(dilation_x);
+ CKW_ASSERT(dilation_x.is_scalar());
+ CKW_ASSERT(dilation_y.is_scalar());
CKW_ASSERT(dilation_x.scalar(0, 0).str == "((int)(1))"); // Dilation in x dimension is not implemented yet
+ if(indirect_buffer)
+ {
+ CKW_ASSERT(dilation_y.scalar(0,0).str == "((int)(1))" && dilation_x.scalar(0,0).str == "((int)(1))");
+ }
+
ITensor &tensor = get_tensor(tensor_op);
std::unique_ptr<ICLMemoryOpHelper> helper;
@@ -689,18 +706,31 @@ void CLKernelWriter::op_load_store(MemoryOperation op, const TileOperand &tile_o
const auto &z_tile = to_cl_tile(z);
const auto &batch_tile = to_cl_tile(batch);
+ CKW_ASSERT(x_tile.is_scalar());
+ CKW_ASSERT(z_tile.is_scalar());
+ CKW_ASSERT_IF(indirect_buffer, y_tile.info().width() == 1);
+ CKW_ASSERT_IF(!indirect_buffer, y_tile.is_scalar());
+ CKW_ASSERT(batch_tile.is_scalar());
+
helper->initialize(&tile, &x_tile, &z_tile, &batch_tile);
for(int row = 0; row < tile.info().height(); ++row)
{
- std::string coord_y = y_tile.scalar(0, 0).str + " + " + std::to_string(row);
+ if(!indirect_buffer)
+ {
+ std::string coord_y = y_tile.scalar(0, 0).str + " + " + std::to_string(row);
+
+ if(dilation_y.scalar(0, 0).str != "((int)(1))")
+ {
+ coord_y += " * " + dilation_y.scalar(0, 0).str;
+ }
- if(dilation_y.scalar(0, 0).str != "1")
+ helper->write_row(row, coord_y);
+ }
+ else
{
- coord_y += " * " + dilation_y.scalar(0, 0).str;
+ helper->write_row(row, y_tile.scalar(row, 0).str);
}
-
- helper->write_row(row, coord_y);
}
helper->finalize();