From 343722b441e2be9a156745db2daab305edae0db6 Mon Sep 17 00:00:00 2001 From: Michalis Spyrou Date: Tue, 5 Jun 2018 13:04:40 +0100 Subject: COMPMID-1143 - Excessive padding in CLReductionOperationKernel when configuring window Now max padding is equal to 15 instead of 127. If input width is less than 128 we decrease the number of threads in the WG. Change-Id: I5ff0b6fd8cb46143ba49e745ec9ad01f691bdd80 Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/134152 Tested-by: Jenkins Reviewed-by: Georgios Pinitas --- src/core/CL/kernels/CLReductionOperationKernel.cpp | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) (limited to 'src/core/CL/kernels/CLReductionOperationKernel.cpp') diff --git a/src/core/CL/kernels/CLReductionOperationKernel.cpp b/src/core/CL/kernels/CLReductionOperationKernel.cpp index 355dc7948e..c44fced3e3 100644 --- a/src/core/CL/kernels/CLReductionOperationKernel.cpp +++ b/src/core/CL/kernels/CLReductionOperationKernel.cpp @@ -101,14 +101,18 @@ void CLReductionOperationKernel::configure(const ICLTensor *input, ICLTensor *ou ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input->info(), output->info(), axis, op)); const unsigned int num_elems_processed_per_iteration = 16; - const unsigned int border_width = ((input->info()->dimension(0) % 128) != 0) ? 128 - input->info()->dimension(0) % 128 : 0; + const unsigned int border_width = ((input->info()->dimension(0) % 16) != 0) ? 16 - input->info()->dimension(0) % 16 : 0; + const unsigned int num_of_threads = ((input->info()->dimension(0) + border_width) / 16); _input = input; _output = output; _reduction_axis = axis; _op = op; - _lws_hint = cl::NDRange(8); - _border_size = BorderSize(0, border_width, 0, 0); + + // Set the number of WG based on the input size. If input width is < 128 + // we can use fewer threads than 8. + _lws_hint = cl::NDRange(std::min(8U, num_of_threads)); + _border_size = BorderSize(0, border_width, 0, 0); // Set build options std::set build_opts; @@ -164,7 +168,7 @@ void CLReductionOperationKernel::run(const Window &window, cl::CommandQueue &que Window out_slice = out_window.first_slice_window_2D(); // Reshape window - const unsigned int border_width = ((in_slice.x().end() % 128) != 0) ? 128 - in_slice.x().end() % 128 : 0; + const unsigned int border_width = ((in_slice.x().end() % 16) != 0) ? 16 - in_slice.x().end() % 16 : 0; in_slice.set(Window::DimX, Window::Dimension(in_slice.x().start(), in_slice.x().end() + border_width, in_slice.x().step())); // Set local sums buffer -- cgit v1.2.1