aboutsummaryrefslogtreecommitdiff
path: root/compute_kernel_writer/src/cl/helpers/CLMemoryOpBufferHelper.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'compute_kernel_writer/src/cl/helpers/CLMemoryOpBufferHelper.cpp')
-rw-r--r--compute_kernel_writer/src/cl/helpers/CLMemoryOpBufferHelper.cpp79
1 files changed, 44 insertions, 35 deletions
diff --git a/compute_kernel_writer/src/cl/helpers/CLMemoryOpBufferHelper.cpp b/compute_kernel_writer/src/cl/helpers/CLMemoryOpBufferHelper.cpp
index f906bcd4b1..a98ebed8fa 100644
--- a/compute_kernel_writer/src/cl/helpers/CLMemoryOpBufferHelper.cpp
+++ b/compute_kernel_writer/src/cl/helpers/CLMemoryOpBufferHelper.cpp
@@ -28,20 +28,25 @@
#include "ckw/types/MemoryOperation.h"
#include "ckw/types/TensorStorageType.h"
-#include "src/ITensor.h"
-#include "src/Tensor3dMapper.h"
#include "src/cl/CLHelpers.h"
#include "src/cl/CLKernelWriter.h"
#include "src/cl/CLTensorArgument.h"
#include "src/cl/CLTile.h"
+#include "src/ITensor.h"
+#include "src/Tensor3dMapper.h"
namespace ckw
{
-bool CLMemoryOpBufferHelper::validate(const CLKernelWriter *writer, const ITensor *tensor, const TensorSampler *sampler, const Tensor3dMapper *mapper, MemoryOperation op, const CLTile *dst)
+bool CLMemoryOpBufferHelper::validate(const CLKernelWriter *writer,
+ const ITensor *tensor,
+ const TensorSampler *sampler,
+ const Tensor3dMapper *mapper,
+ MemoryOperation op,
+ const CLTile *dst)
{
CKW_UNUSED(writer, tensor, mapper, op, dst);
- if(sampler->storage() != TensorStorageType::BufferUint8Ptr)
+ if (sampler->storage() != TensorStorageType::BufferUint8Ptr)
{
return false;
}
@@ -97,15 +102,15 @@ bool CLMemoryOpBufferHelper::validate(const CLKernelWriter *writer, const ITenso
*/
void CLMemoryOpBufferHelper::initialize(const CLTile *dst, const CLTile *x, const CLTile *z, const CLTile *b)
{
- _dst = dst;
+ _dst = dst;
CKW_ASSERT(validate(_writer, _tensor, _sampler, _mapper.get(), _op, _dst));
_ls_width_full = dst->info().width();
- _coord_x = x->scalar(0, 0).str;
- _coord_z = z->scalar(0, 0).str;
- _coord_b = b->scalar(0, 0).str;
- _coord_orig_z = _coord_z;
+ _coord_x = x->scalar(0, 0).str;
+ _coord_z = z->scalar(0, 0).str;
+ _coord_b = b->scalar(0, 0).str;
+ _coord_orig_z = _coord_z;
out_of_bound_initialize_x(_coord_x);
out_of_bound_initialize_z(_coord_z);
@@ -126,10 +131,10 @@ void CLMemoryOpBufferHelper::write_row(int32_t row_id, const std::string &coord_
out_of_bound_finalize_y(dst);
// The left over load/store will be written in the finalize stage
- if(_ls_width_part.size() != 0)
+ if (_ls_width_part.size() != 0)
{
int32_t col_start = 0;
- for(int32_t partial_width : _ls_width_part)
+ for (int32_t partial_width : _ls_width_part)
{
const std::string dst = _dst->vector(row_id, col_start, partial_width).str;
const std::string coord_x = _coord_x + " + " + std::to_string(col_start);
@@ -150,13 +155,13 @@ void CLMemoryOpBufferHelper::finalize()
void CLMemoryOpBufferHelper::out_of_bound_initialize_x(const std::string &coord)
{
- if(_sampler->address_mode_x() == TensorSamplerAddressModeX::OverlappingMin)
+ if (_sampler->address_mode_x() == TensorSamplerAddressModeX::OverlappingMin)
{
- TensorInfo tensor_info = _tensor->info();
- TensorShape shape = tensor_info.shape();
+ TensorInfo tensor_info = _tensor->info();
+ TensorShape shape = tensor_info.shape();
_ls_width_part = cl_decompose_vector_width(shape[0] % _ls_width_full);
- if(_ls_width_part.size() != 0)
+ if (_ls_width_part.size() != 0)
{
_writer->op_write_raw_code("if(" + coord + " > 0)\n{\n");
}
@@ -165,14 +170,14 @@ void CLMemoryOpBufferHelper::out_of_bound_initialize_x(const std::string &coord)
void CLMemoryOpBufferHelper::out_of_bound_finalize_x()
{
- if(_sampler->address_mode_x() == TensorSamplerAddressModeX::OverlappingMin)
+ if (_sampler->address_mode_x() == TensorSamplerAddressModeX::OverlappingMin)
{
- if(_ls_width_part.size() != 0)
+ if (_ls_width_part.size() != 0)
{
_writer->op_write_raw_code("}\nelse\n{\n");
out_of_bound_initialize_z(_coord_orig_z);
- for(LeftoverDescriptor leftover_desc : _leftovers_x)
+ for (LeftoverDescriptor leftover_desc : _leftovers_x)
{
out_of_bound_initialize_y(leftover_desc.coord);
_writer->op_write_raw_code(leftover_desc.statement);
@@ -191,7 +196,7 @@ void CLMemoryOpBufferHelper::out_of_bound_initialize_y(const std::string &coord)
const TensorSamplerAddressModeY address_mode_y = _sampler->address_mode_y();
- switch(address_mode_y)
+ switch (address_mode_y)
{
case TensorSamplerAddressModeY::ClampToBorderMaxOnly:
// Not to be moved outside the case because it marks the relevant tensor component as used even if we dont't use the variable
@@ -212,7 +217,7 @@ void CLMemoryOpBufferHelper::out_of_bound_finalize_y(const std::string &dst)
{
const TensorSamplerAddressModeY address_mode_y = _sampler->address_mode_y();
- switch(address_mode_y)
+ switch (address_mode_y)
{
case TensorSamplerAddressModeY::ClampToBorderMaxOnly:
_writer->op_write_raw_code("}\nelse\n{\n");
@@ -234,7 +239,7 @@ void CLMemoryOpBufferHelper::out_of_bound_initialize_z(const std::string &coord)
CKW_UNUSED(coord);
const TensorSamplerAddressModeZ address_mode_z = _sampler->address_mode_z();
- switch(address_mode_z)
+ switch (address_mode_z)
{
case TensorSamplerAddressModeZ::None:
break;
@@ -247,7 +252,7 @@ void CLMemoryOpBufferHelper::out_of_bound_finalize_z()
{
const TensorSamplerAddressModeZ address_mode_z = _sampler->address_mode_z();
- switch(address_mode_z)
+ switch (address_mode_z)
{
case TensorSamplerAddressModeZ::None:
break;
@@ -256,13 +261,15 @@ void CLMemoryOpBufferHelper::out_of_bound_finalize_z()
}
}
-std::string CLMemoryOpBufferHelper::to_statement(MemoryOperation op, int32_t vector_width, const std::string &data,
- const std::string &address) const
+std::string CLMemoryOpBufferHelper::to_statement(MemoryOperation op,
+ int32_t vector_width,
+ const std::string &data,
+ const std::string &address) const
{
- switch(op)
+ switch (op)
{
case MemoryOperation::Load:
- if(vector_width != 1)
+ if (vector_width != 1)
{
return data + " = vload" + std::to_string(vector_width) + "(0, " + address + ")";
}
@@ -272,7 +279,7 @@ std::string CLMemoryOpBufferHelper::to_statement(MemoryOperation op, int32_t vec
}
break;
case MemoryOperation::Store:
- if(vector_width != 1)
+ if (vector_width != 1)
{
return "vstore" + std::to_string(vector_width) + "(" + data + ", 0, " + address + ")";
}
@@ -288,26 +295,28 @@ std::string CLMemoryOpBufferHelper::to_statement(MemoryOperation op, int32_t vec
return "";
}
-std::string CLMemoryOpBufferHelper::to_buffer_address(const std::string &x, const std::string &y, const std::string &z,
- const std::string &b) const
+std::string CLMemoryOpBufferHelper::to_buffer_address(const std::string &x,
+ const std::string &y,
+ const std::string &z,
+ const std::string &b) const
{
TensorStorageType tensor_storage = _sampler->storage();
CKW_ASSERT(tensor_storage == TensorStorageType::BufferUint8Ptr);
- const std::string ptr_buf = _tensor->storage(tensor_storage).val;
- const std::string dst_type = cl_data_type_rounded_up_to_valid_vector_width(_dst->info().data_type(), 1);
+ const std::string ptr_buf = _tensor->storage(tensor_storage).val;
+ const std::string dst_type = cl_data_type_rounded_up_to_valid_vector_width(_dst->info().data_type(), 1);
std::string address;
address += "(__global ";
address += dst_type;
address += "*)(";
address += ptr_buf;
- if(x != "0" && (_mapper->dim_x().str != "1"))
+ if (x != "0" && (_mapper->dim_x().str != "1"))
{
address += " + (";
address += x + ") * sizeof(" + dst_type + ")";
}
- if(y != "0")
+ if (y != "0")
{
const std::string stride_y = _mapper->stride_y().str;
address += " + (";
@@ -315,7 +324,7 @@ std::string CLMemoryOpBufferHelper::to_buffer_address(const std::string &x, cons
address += " * ";
address += stride_y;
}
- if(z != "0" && (_mapper->dim_z().str != "1"))
+ if (z != "0" && (_mapper->dim_z().str != "1"))
{
const std::string stride_z = _mapper->stride_z().str;
address += " + (";
@@ -323,7 +332,7 @@ std::string CLMemoryOpBufferHelper::to_buffer_address(const std::string &x, cons
address += " * ";
address += stride_z;
}
- if(b != "0" && (_mapper->dim_batch().str != "1"))
+ if (b != "0" && (_mapper->dim_batch().str != "1"))
{
const std::string stride_b = _mapper->stride_batch().str;
address += " + (";