aboutsummaryrefslogtreecommitdiff
path: root/src/core/CL/kernels/CLReductionOperationKernel.cpp
diff options
context:
space:
mode:
authorMichalis Spyrou <michalis.spyrou@arm.com>2018-01-26 15:06:19 +0000
committerAnthony Barbier <anthony.barbier@arm.com>2018-11-02 16:44:23 +0000
commitf6402dd37092c842d1de9998b23640caf12f227b (patch)
tree217284ffe083ea6af5cb5bc2e7f348090122ff16 /src/core/CL/kernels/CLReductionOperationKernel.cpp
parentbd0e61238b2126e990d7811750ad4511ec2ccbd1 (diff)
downloadComputeLibrary-f6402dd37092c842d1de9998b23640caf12f227b.tar.gz
COMPMID-834 Fix arm_compute_nightly_validation getting killed
Changed CLReductionOperationKernel: Now each kernel computes a 2D slice instead of 1D. This reduces the memory footprint from around 1.6Gb for a 4k input image to a few Mb, which was caused by the __local memory and was probably the cause for this bug. Change-Id: I71ac71ff09b041c945a134177600f0f3475e48cf Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/117835 Reviewed-by: Anthony Barbier <anthony.barbier@arm.com> Tested-by: Anthony Barbier <anthony.barbier@arm.com>
Diffstat (limited to 'src/core/CL/kernels/CLReductionOperationKernel.cpp')
-rw-r--r--src/core/CL/kernels/CLReductionOperationKernel.cpp14
1 files changed, 7 insertions, 7 deletions
diff --git a/src/core/CL/kernels/CLReductionOperationKernel.cpp b/src/core/CL/kernels/CLReductionOperationKernel.cpp
index 18a8e353d7..1dd5eb97ec 100644
--- a/src/core/CL/kernels/CLReductionOperationKernel.cpp
+++ b/src/core/CL/kernels/CLReductionOperationKernel.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017 ARM Limited.
+ * Copyright (c) 2017-2018 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -118,8 +118,8 @@ void CLReductionOperationKernel::run(const Window &window, cl::CommandQueue &que
out_window.set(Window::DimX, Window::Dimension(0, 0, 0));
// Get first input and output slices
- Window in_slice = window.first_slice_window_1D();
- Window out_slice = out_window.first_slice_window_1D();
+ Window in_slice = window.first_slice_window_2D();
+ Window out_slice = out_window.first_slice_window_2D();
// Reshape window
const unsigned int border_width = ((in_slice.x().end() % 128) != 0) ? 128 - in_slice.x().end() % 128 : 0;
@@ -127,14 +127,14 @@ void CLReductionOperationKernel::run(const Window &window, cl::CommandQueue &que
// Set local sums buffer
unsigned int local_sum_size = _lws_hint[0] * _input->info()->element_size();
- _kernel.setArg(num_arguments_per_1D_tensor() * 2, local_sum_size, nullptr);
+ _kernel.setArg(num_arguments_per_2D_tensor() * 2, local_sum_size, nullptr);
do
{
unsigned int idx = 0;
- add_1D_tensor_argument(idx, _input, in_slice);
- add_1D_tensor_argument(idx, _output, out_slice);
+ add_2D_tensor_argument(idx, _input, in_slice);
+ add_2D_tensor_argument(idx, _output, out_slice);
enqueue(queue, *this, in_slice, _lws_hint);
}
- while(window.slide_window_slice_1D(in_slice) && window.slide_window_slice_1D(out_slice));
+ while(window.slide_window_slice_2D(in_slice) && window.slide_window_slice_2D(out_slice));
}