aboutsummaryrefslogtreecommitdiff
path: root/src/core/CL/kernels/CLTileKernel.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'src/core/CL/kernels/CLTileKernel.cpp')
-rw-r--r--src/core/CL/kernels/CLTileKernel.cpp47
1 files changed, 26 insertions, 21 deletions
diff --git a/src/core/CL/kernels/CLTileKernel.cpp b/src/core/CL/kernels/CLTileKernel.cpp
index c0c3d2e2ee..fa996c4008 100644
--- a/src/core/CL/kernels/CLTileKernel.cpp
+++ b/src/core/CL/kernels/CLTileKernel.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018-2020 Arm Limited.
+ * Copyright (c) 2018-2021, 2023 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -22,8 +22,11 @@
* SOFTWARE.
*/
#include "src/core/CL/kernels/CLTileKernel.h"
+
#include "arm_compute/core/CL/ICLTensor.h"
#include "arm_compute/core/utils/misc/ShapeCalculator.h"
+#include "arm_compute/core/utils/StringUtils.h"
+
#include "src/core/helpers/AutoConfiguration.h"
#include "src/core/helpers/WindowHelpers.h"
#include "support/StringSupport.h"
@@ -38,15 +41,13 @@ Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output, c
ARM_COMPUTE_RETURN_ERROR_ON(input->data_type() == DataType::UNKNOWN);
ARM_COMPUTE_RETURN_ERROR_ON(multiples.size() > 4);
ARM_COMPUTE_RETURN_ERROR_ON(multiples.empty());
- ARM_COMPUTE_RETURN_ERROR_ON(std::any_of(multiples.begin(), multiples.end(), [](uint32_t e)
- {
- return e == 0;
- }));
+ ARM_COMPUTE_RETURN_ERROR_ON(std::any_of(multiples.begin(), multiples.end(), [](uint32_t e) { return e == 0; }));
// Validate output if initialized
- if(output->total_size() != 0)
+ if (output->total_size() != 0)
{
- ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DIMENSIONS(misc::shape_calculator::compute_tiled_shape(input->tensor_shape(), multiples), output->tensor_shape());
+ ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DIMENSIONS(
+ misc::shape_calculator::compute_tiled_shape(input->tensor_shape(), multiples), output->tensor_shape());
ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, output);
}
@@ -54,9 +55,9 @@ Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output, c
}
} // namespace
-CLTileKernel::CLTileKernel()
- : _input(nullptr), _output(nullptr)
+CLTileKernel::CLTileKernel() : _input(nullptr), _output(nullptr)
{
+ _type = CLKernelType::ELEMENTWISE;
}
void CLTileKernel::configure(const ICLTensor *input, ICLTensor *output, const Multiples &multiples)
@@ -64,7 +65,10 @@ void CLTileKernel::configure(const ICLTensor *input, ICLTensor *output, const Mu
configure(CLKernelLibrary::get().get_compile_context(), input, output, multiples);
}
-void CLTileKernel::configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const Multiples &multiples)
+void CLTileKernel::configure(const CLCompileContext &compile_context,
+ const ICLTensor *input,
+ ICLTensor *output,
+ const Multiples &multiples)
{
ARM_COMPUTE_ERROR_ON_NULLPTR(input, output);
@@ -78,11 +82,13 @@ void CLTileKernel::configure(const CLCompileContext &compile_context, const ICLT
_input = input;
_output = output;
- const DataType data_type = input->info()->data_type();
- const int vec_size_x = 16 / input->info()->element_size();
- const int input_width_x = input->info()->tensor_shape().x();
- const unsigned int offset = ceil_to_multiple(input_width_x, vec_size_x) - input_width_x;
- const bool multi_access_x = (input_width_x / vec_size_x > 0);
+ const DataType data_type = input->info()->data_type();
+ const int vec_size_x = 16 / input->info()->element_size();
+ const int input_width_x = input->info()->tensor_shape().x();
+ const unsigned int input_width_ceil = ceil_to_multiple(input_width_x, vec_size_x);
+ const unsigned int input_width_tiles = input_width_ceil / vec_size_x;
+ const unsigned int offset = input_width_ceil - input_width_x;
+ const bool multi_access_x = (input_width_x / vec_size_x > 0);
// Create kernel
CLBuildOptions build_opts;
@@ -94,20 +100,20 @@ void CLTileKernel::configure(const CLCompileContext &compile_context, const ICLT
build_opts.add_option("-DDST_DEPTH=" + support::cpp11::to_string(output->info()->dimension(2)));
build_opts.add_option_if(multi_access_x, "-DOFFSET=" + support::cpp11::to_string(offset));
build_opts.add_option_if(multi_access_x, "-DVEC_SIZE=" + support::cpp11::to_string(vec_size_x));
+ build_opts.add_option_if(multi_access_x, "-DSRC_WIDTH_TILES=" + support::cpp11::to_string(input_width_tiles));
_kernel = create_kernel(compile_context, "tile", build_opts.options());
// Configure window without padding
Window win = calculate_max_window(*output->info());
- if(multi_access_x)
+ if (multi_access_x)
{
// If multi-access is enabled, no thread should cross the tile boundaries. This means we need
// as many threads as those to cover a single tile times multiples[0]. Note that if threads
// do not cross the boundaries of the tiles, they won't cross the boundaries of the last tile, and
// we don't need to pad the output
const unsigned int size_win_x = ceil_to_multiple(input->info()->dimension(0), vec_size_x) * multiples[0];
- win.set(Window::DimX,
- Window::Dimension(win.x().start(), size_win_x, vec_size_x));
+ win.set(Window::DimX, Window::Dimension(win.x().start(), size_win_x, vec_size_x));
}
ICLKernel::configure_internal(win);
@@ -116,7 +122,7 @@ void CLTileKernel::configure(const CLCompileContext &compile_context, const ICLT
_config_id = "tile";
_config_id += "_";
_config_id += lower_string(string_from_data_type(input->info()->data_type()));
- for(unsigned int i = 0; i < multiples.size(); ++i)
+ for (unsigned int i = 0; i < multiples.size(); ++i)
{
_config_id += "_";
_config_id += support::cpp11::to_string(input->info()->dimension(i));
@@ -145,7 +151,6 @@ void CLTileKernel::run(const Window &window, cl::CommandQueue &queue)
add_4D_tensor_argument(idx, _input, slice);
add_4D_tensor_argument(idx, _output, slice);
enqueue(queue, *this, slice, lws_hint());
- }
- while(collapsed.slide_window_slice_4D(slice));
+ } while (collapsed.slide_window_slice_4D(slice));
}
} // namespace arm_compute