aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorManuel Bottini <manuel.bottini@arm.com>2020-01-28 17:25:48 +0000
committerManuel Bottini <manuel.bottini@arm.com>2020-02-03 16:55:07 +0000
commit5c829cab49b0dcffd893065af05f0bf028e7a548 (patch)
treee534e4377985499d6af7ab2c3ffd8b96af2c9ea0
parent94672fb2af6535adc6ea7fe8b8498580ad8cf3f4 (diff)
downloadComputeLibrary-5c829cab49b0dcffd893065af05f0bf028e7a548.tar.gz
COMPMID-3045: CTS failures in ARGMAX/MIN
Change-Id: I35276a3d95dc99a7f4dea00e89c8ed206a5f13f1 Signed-off-by: Manuel Bottini <manuel.bottini@arm.com> Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/2669 Comments-Addressed: Arm Jenkins <bsgcomp@arm.com> Tested-by: Arm Jenkins <bsgcomp@arm.com> Reviewed-by: Michalis Spyrou <michalis.spyrou@arm.com>
-rw-r--r--src/core/CL/cl_kernels/arg_min_max.cl9
-rw-r--r--src/core/CL/kernels/CLArgMinMaxLayerKernel.cpp2
-rw-r--r--tests/validation/CL/ArgMinMax.cpp8
3 files changed, 12 insertions, 7 deletions
diff --git a/src/core/CL/cl_kernels/arg_min_max.cl b/src/core/CL/cl_kernels/arg_min_max.cl
index 06dcc8ddde..104d30d8f3 100644
--- a/src/core/CL/cl_kernels/arg_min_max.cl
+++ b/src/core/CL/cl_kernels/arg_min_max.cl
@@ -254,10 +254,15 @@ __kernel void arg_min_max_x(
barrier(CLK_LOCAL_MEM_FENCE);
+ // Looking for the next highest power of 2 (maximum value of lsize is 8)
+ unsigned int middle = lsize - 1;
+ middle |= middle >> 1;
+ middle |= middle >> 2;
+ middle += 1;
// Perform parallel reduction
- for(unsigned int i = lsize >> 1; i > 0; i >>= 1)
+ for(unsigned int i = middle; i > 0; i >>= 1)
{
- if(lid < i)
+ if( lid < i && lid + i < lsize)
{
DATA_TYPE tmp0 = *(src_in_row + local_results[lid]);
DATA_TYPE tmp1 = *(src_in_row + local_results[lid + i]);
diff --git a/src/core/CL/kernels/CLArgMinMaxLayerKernel.cpp b/src/core/CL/kernels/CLArgMinMaxLayerKernel.cpp
index bdb7ab76d5..4845d60487 100644
--- a/src/core/CL/kernels/CLArgMinMaxLayerKernel.cpp
+++ b/src/core/CL/kernels/CLArgMinMaxLayerKernel.cpp
@@ -198,8 +198,6 @@ void CLArgMinMaxLayerKernel::run(const Window &window, cl::CommandQueue &queue)
Window out_slice = out_window.first_slice_window_2D();
// Reshape window
- const unsigned int border_width = ((in_slice.x().end() % vector_size) != 0) ? vector_size - in_slice.x().end() % vector_size : 0;
- in_slice.set(Window::DimX, Window::Dimension(in_slice.x().start(), in_slice.x().end() + border_width, in_slice.x().step()));
const unsigned int num_tensors = _prev_output != nullptr ? 3 : 2;
// Set local sums buffer
diff --git a/tests/validation/CL/ArgMinMax.cpp b/tests/validation/CL/ArgMinMax.cpp
index 4bee942d70..e5decb86d3 100644
--- a/tests/validation/CL/ArgMinMax.cpp
+++ b/tests/validation/CL/ArgMinMax.cpp
@@ -47,12 +47,14 @@ namespace
const auto ArgMinMaxSmallDataset = framework::dataset::make("Shape",
{
TensorShape{ 2U, 7U, 1U, 3U },
- TensorShape{ 128U, 64U, 21U, 3U },
+ TensorShape{ 149U, 5U, 1U, 2U },
+ TensorShape{ 166U, 5U, 1U, 2U },
+ TensorShape{ 322U, 5U, 1U, 2U },
+ TensorShape{ 128U, 5U, 21U, 3U },
TensorShape{ 2560, 2U, 2U, 2U },
});
-const auto ArgMinMaxLargeDataset = framework::dataset::make("Shape",
-{ TensorShape{ 517U, 123U, 13U, 2U } });
+const auto ArgMinMaxLargeDataset = framework::dataset::make("Shape", { TensorShape{ 517U, 123U, 13U, 2U } });
} // namespace
TEST_SUITE(CL)
TEST_SUITE(ArgMinMax)