From 0c19f59279a88384074635bf273a99001602ed21 Mon Sep 17 00:00:00 2001 From: Viet-Hoa Do Date: Tue, 1 Aug 2023 14:42:41 +0100 Subject: Fix CL Tile operator Resolves: COMPMID-6404 Signed-off-by: Viet-Hoa Do Change-Id: I75aebe620567ed50817747589bbe8cfb63715a7b Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/10036 Tested-by: Arm Jenkins Reviewed-by: TeresaARM Reviewed-by: Anitha Raj Reviewed-by: Pablo Marquez Tello Comments-Addressed: Arm Jenkins Benchmark: Arm Jenkins --- src/core/CL/kernels/CLTileKernel.cpp | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) (limited to 'src/core/CL/kernels') diff --git a/src/core/CL/kernels/CLTileKernel.cpp b/src/core/CL/kernels/CLTileKernel.cpp index 9c678a3f7e..3e7015cfd2 100644 --- a/src/core/CL/kernels/CLTileKernel.cpp +++ b/src/core/CL/kernels/CLTileKernel.cpp @@ -80,11 +80,13 @@ void CLTileKernel::configure(const CLCompileContext &compile_context, const ICLT _input = input; _output = output; - const DataType data_type = input->info()->data_type(); - const int vec_size_x = 16 / input->info()->element_size(); - const int input_width_x = input->info()->tensor_shape().x(); - const unsigned int offset = ceil_to_multiple(input_width_x, vec_size_x) - input_width_x; - const bool multi_access_x = (input_width_x / vec_size_x > 0); + const DataType data_type = input->info()->data_type(); + const int vec_size_x = 16 / input->info()->element_size(); + const int input_width_x = input->info()->tensor_shape().x(); + const unsigned int input_width_ceil = ceil_to_multiple(input_width_x, vec_size_x); + const unsigned int input_width_tiles = input_width_ceil / vec_size_x; + const unsigned int offset = input_width_ceil - input_width_x; + const bool multi_access_x = (input_width_x / vec_size_x > 0); // Create kernel CLBuildOptions build_opts; @@ -96,6 +98,7 @@ void CLTileKernel::configure(const CLCompileContext &compile_context, const ICLT build_opts.add_option("-DDST_DEPTH=" + support::cpp11::to_string(output->info()->dimension(2))); build_opts.add_option_if(multi_access_x, "-DOFFSET=" + support::cpp11::to_string(offset)); build_opts.add_option_if(multi_access_x, "-DVEC_SIZE=" + support::cpp11::to_string(vec_size_x)); + build_opts.add_option_if(multi_access_x, "-DSRC_WIDTH_TILES=" + support::cpp11::to_string(input_width_tiles)); _kernel = create_kernel(compile_context, "tile", build_opts.options()); // Configure window without padding -- cgit v1.2.1