diff options
Diffstat (limited to 'src/core/CL/kernels')
-rw-r--r-- | src/core/CL/kernels/CLTileKernel.cpp | 13 |
1 files changed, 8 insertions, 5 deletions
diff --git a/src/core/CL/kernels/CLTileKernel.cpp b/src/core/CL/kernels/CLTileKernel.cpp index 9c678a3f7e..3e7015cfd2 100644 --- a/src/core/CL/kernels/CLTileKernel.cpp +++ b/src/core/CL/kernels/CLTileKernel.cpp @@ -80,11 +80,13 @@ void CLTileKernel::configure(const CLCompileContext &compile_context, const ICLT _input = input; _output = output; - const DataType data_type = input->info()->data_type(); - const int vec_size_x = 16 / input->info()->element_size(); - const int input_width_x = input->info()->tensor_shape().x(); - const unsigned int offset = ceil_to_multiple(input_width_x, vec_size_x) - input_width_x; - const bool multi_access_x = (input_width_x / vec_size_x > 0); + const DataType data_type = input->info()->data_type(); + const int vec_size_x = 16 / input->info()->element_size(); + const int input_width_x = input->info()->tensor_shape().x(); + const unsigned int input_width_ceil = ceil_to_multiple(input_width_x, vec_size_x); + const unsigned int input_width_tiles = input_width_ceil / vec_size_x; + const unsigned int offset = input_width_ceil - input_width_x; + const bool multi_access_x = (input_width_x / vec_size_x > 0); // Create kernel CLBuildOptions build_opts; @@ -96,6 +98,7 @@ void CLTileKernel::configure(const CLCompileContext &compile_context, const ICLT build_opts.add_option("-DDST_DEPTH=" + support::cpp11::to_string(output->info()->dimension(2))); build_opts.add_option_if(multi_access_x, "-DOFFSET=" + support::cpp11::to_string(offset)); build_opts.add_option_if(multi_access_x, "-DVEC_SIZE=" + support::cpp11::to_string(vec_size_x)); + build_opts.add_option_if(multi_access_x, "-DSRC_WIDTH_TILES=" + support::cpp11::to_string(input_width_tiles)); _kernel = create_kernel(compile_context, "tile", build_opts.options()); // Configure window without padding |