diff options
Diffstat (limited to 'src')
-rw-r--r-- | src/core/CL/kernels/CLHistogramKernel.cpp | 16 | ||||
-rw-r--r-- | src/core/NEON/kernels/NEHistogramKernel.cpp | 12 |
2 files changed, 15 insertions, 13 deletions
diff --git a/src/core/CL/kernels/CLHistogramKernel.cpp b/src/core/CL/kernels/CLHistogramKernel.cpp index 87ee5fb74e..7b715abb36 100644 --- a/src/core/CL/kernels/CLHistogramKernel.cpp +++ b/src/core/CL/kernels/CLHistogramKernel.cpp @@ -115,18 +115,20 @@ void CLHistogramKernel::run(const Window &window, cl::CommandQueue &queue) ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); ARM_COMPUTE_ERROR_ON_MISMATCHING_WINDOWS(ICLKernel::window(), window); - if(_input->info()->dimension(0) < pixels_per_item) - { - return; - } - + // TODO (COMPMID-679): Add CLMemFill _output->map(queue, true); ARM_COMPUTE_ERROR_ON(_output->buffer() == nullptr); memset(_output->buffer(), 0, _output->size()); _output->unmap(queue); - Window slice = window.first_slice_window_2D(); - cl::NDRange lws = cl::NDRange(local_x_size, 1); + if(_input->info()->dimension(0) < pixels_per_item) + { + return; + } + + Window slice = window.first_slice_window_2D(); + const unsigned int gws_x = (window.x().end() - window.x().start()) / window.x().step(); + cl::NDRange lws = (local_x_size < gws_x) ? cl::NDRange(local_x_size, 1) : cl::NDRange(1, 1); do { diff --git a/src/core/NEON/kernels/NEHistogramKernel.cpp b/src/core/NEON/kernels/NEHistogramKernel.cpp index 6e402ae604..02de566f6a 100644 --- a/src/core/NEON/kernels/NEHistogramKernel.cpp +++ b/src/core/NEON/kernels/NEHistogramKernel.cpp @@ -87,8 +87,8 @@ void NEHistogramKernel::histogram_U8(Window win, const ThreadInfo &info) } }; - const unsigned int x_start = win.x().start(); - const unsigned int x_end = win.x().end(); + const int x_start = win.x().start(); + const int x_end = win.x().end(); // Handle X dimension manually to split into two loops // First one will use vector operations, second one processes the left over @@ -100,7 +100,7 @@ void NEHistogramKernel::histogram_U8(Window win, const ThreadInfo &info) // Calculate local histogram execute_window_loop(win, [&](const Coordinates &) { - unsigned int x = x_start; + int x = x_start; // Vector loop for(; x <= x_end - 8; x += 8) @@ -136,8 +136,8 @@ void NEHistogramKernel::histogram_fixed_U8(Window win, const ThreadInfo &info) std::array<uint32_t, _max_range_size> local_hist{ { 0 } }; - const unsigned int x_start = win.x().start(); - const unsigned int x_end = win.x().end(); + const int x_start = win.x().start(); + const int x_end = win.x().end(); // Handle X dimension manually to split into two loops // First one will use vector operations, second one processes the left over @@ -149,7 +149,7 @@ void NEHistogramKernel::histogram_fixed_U8(Window win, const ThreadInfo &info) // Calculate local histogram execute_window_loop(win, [&](const Coordinates &) { - unsigned int x = x_start; + int x = x_start; // Vector loop for(; x <= x_end - 8; x += 8) |