From 5c829cab49b0dcffd893065af05f0bf028e7a548 Mon Sep 17 00:00:00 2001 From: Manuel Bottini Date: Tue, 28 Jan 2020 17:25:48 +0000 Subject: COMPMID-3045: CTS failures in ARGMAX/MIN Change-Id: I35276a3d95dc99a7f4dea00e89c8ed206a5f13f1 Signed-off-by: Manuel Bottini Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/2669 Comments-Addressed: Arm Jenkins Tested-by: Arm Jenkins Reviewed-by: Michalis Spyrou --- src/core/CL/cl_kernels/arg_min_max.cl | 9 +++++++-- src/core/CL/kernels/CLArgMinMaxLayerKernel.cpp | 2 -- tests/validation/CL/ArgMinMax.cpp | 8 +++++--- 3 files changed, 12 insertions(+), 7 deletions(-) diff --git a/src/core/CL/cl_kernels/arg_min_max.cl b/src/core/CL/cl_kernels/arg_min_max.cl index 06dcc8ddde..104d30d8f3 100644 --- a/src/core/CL/cl_kernels/arg_min_max.cl +++ b/src/core/CL/cl_kernels/arg_min_max.cl @@ -254,10 +254,15 @@ __kernel void arg_min_max_x( barrier(CLK_LOCAL_MEM_FENCE); + // Looking for the next highest power of 2 (maximum value of lsize is 8) + unsigned int middle = lsize - 1; + middle |= middle >> 1; + middle |= middle >> 2; + middle += 1; // Perform parallel reduction - for(unsigned int i = lsize >> 1; i > 0; i >>= 1) + for(unsigned int i = middle; i > 0; i >>= 1) { - if(lid < i) + if( lid < i && lid + i < lsize) { DATA_TYPE tmp0 = *(src_in_row + local_results[lid]); DATA_TYPE tmp1 = *(src_in_row + local_results[lid + i]); diff --git a/src/core/CL/kernels/CLArgMinMaxLayerKernel.cpp b/src/core/CL/kernels/CLArgMinMaxLayerKernel.cpp index bdb7ab76d5..4845d60487 100644 --- a/src/core/CL/kernels/CLArgMinMaxLayerKernel.cpp +++ b/src/core/CL/kernels/CLArgMinMaxLayerKernel.cpp @@ -198,8 +198,6 @@ void CLArgMinMaxLayerKernel::run(const Window &window, cl::CommandQueue &queue) Window out_slice = out_window.first_slice_window_2D(); // Reshape window - const unsigned int border_width = ((in_slice.x().end() % vector_size) != 0) ? vector_size - in_slice.x().end() % vector_size : 0; - in_slice.set(Window::DimX, Window::Dimension(in_slice.x().start(), in_slice.x().end() + border_width, in_slice.x().step())); const unsigned int num_tensors = _prev_output != nullptr ? 3 : 2; // Set local sums buffer diff --git a/tests/validation/CL/ArgMinMax.cpp b/tests/validation/CL/ArgMinMax.cpp index 4bee942d70..e5decb86d3 100644 --- a/tests/validation/CL/ArgMinMax.cpp +++ b/tests/validation/CL/ArgMinMax.cpp @@ -47,12 +47,14 @@ namespace const auto ArgMinMaxSmallDataset = framework::dataset::make("Shape", { TensorShape{ 2U, 7U, 1U, 3U }, - TensorShape{ 128U, 64U, 21U, 3U }, + TensorShape{ 149U, 5U, 1U, 2U }, + TensorShape{ 166U, 5U, 1U, 2U }, + TensorShape{ 322U, 5U, 1U, 2U }, + TensorShape{ 128U, 5U, 21U, 3U }, TensorShape{ 2560, 2U, 2U, 2U }, }); -const auto ArgMinMaxLargeDataset = framework::dataset::make("Shape", -{ TensorShape{ 517U, 123U, 13U, 2U } }); +const auto ArgMinMaxLargeDataset = framework::dataset::make("Shape", { TensorShape{ 517U, 123U, 13U, 2U } }); } // namespace TEST_SUITE(CL) TEST_SUITE(ArgMinMax) -- cgit v1.2.1