From a25582c4e7dddd26419e0a3316614e8309928934 Mon Sep 17 00:00:00 2001 From: Viet-Hoa Do Date: Wed, 15 Mar 2023 16:52:05 +0000 Subject: Fix the gather layer indices check * If the index is out-of-bound, both CPU and GPU implementations of the gather layer will output 0. Resolves: COMPMID-5964 Signed-off-by: Viet-Hoa Do Change-Id: Ib029b3acfb31452f2097c8c75448fb2697cfa332 Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/9487 Tested-by: Arm Jenkins Reviewed-by: Pablo Marquez Tello Comments-Addressed: Arm Jenkins Benchmark: Arm Jenkins --- arm_compute/runtime/CL/functions/CLGather.h | 8 ++--- arm_compute/runtime/NEON/functions/NEGather.h | 4 +-- src/core/CL/cl_kernels/common/gather.cl | 16 +++++---- src/core/CL/kernels/CLGatherKernel.cpp | 3 +- src/core/NEON/kernels/NEGatherKernel.cpp | 49 +++++++-------------------- tests/validation/fixtures/GatherFixture.h | 7 ++-- tests/validation/reference/Gather.cpp | 36 ++++++++++++++------ 7 files changed, 60 insertions(+), 63 deletions(-) diff --git a/arm_compute/runtime/CL/functions/CLGather.h b/arm_compute/runtime/CL/functions/CLGather.h index 7a57c7358c..0f1ccbad08 100644 --- a/arm_compute/runtime/CL/functions/CLGather.h +++ b/arm_compute/runtime/CL/functions/CLGather.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2021 Arm Limited. + * Copyright (c) 2018-2021, 2023 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -49,7 +49,7 @@ public: * |All |All | * * @param[in] input Source tensor. Supported tensor rank: up to 4. Data type supported: All. - * @param[in] indices Indices tensor. Supported tensor rank: up to 1. Must be one of the following types: U32/S32. Each value must be in range [0, input.shape[@p axis]) + * @param[in] indices Indices tensor. Supported tensor rank: up to 1. Must be one of the following types: U32/S32. Each value must be in range [0, input.shape[@p axis]), otherwise the result will become unpredictable. * @param[out] output Destination tensor. Data type supported: Same as @p input * @param[in] axis (Optional) The axis in @p input to gather @p indices from. Defaults to 0 */ @@ -58,7 +58,7 @@ public: * * @param[in] compile_context The compile context to be used. * @param[in] input Source tensor. Supported tensor rank: up to 4. Data type supported: All. - * @param[in] indices Indices tensor. Supported tensor rank: up to 1. Must be one of the following types: U32/S32. Each value must be in range [0, input.shape[@p axis]) + * @param[in] indices Indices tensor. Supported tensor rank: up to 1. Must be one of the following types: U32/S32. Each value must be in range [0, input.shape[@p axis]), otherwise the result will become unpredictable. * @param[out] output Destination tensor. Data type supported: Same as @p input * @param[in] axis (Optional) The axis in @p input to gather @p indices from. Defaults to 0 */ @@ -67,7 +67,7 @@ public: /** Static function to check if given info will lead to a valid configuration of @ref CLGatherKernel * * @param[in] input Source tensor info. Supported tensor rank: up to 4. Data type supported: All. - * @param[in] indices Indices tensor info. Supported tensor rank: up to 4. Must be one of the following types: U32/S32. Each value must be in range [0, input.shape[@p axis]) + * @param[in] indices Indices tensor info. Supported tensor rank: up to 4. Must be one of the following types: U32/S32. Each value must be in range [0, input.shape[@p axis]), otherwise the result will become unpredictable. * @param[in] output Destination tensor info. Data type supported: Same as @p input * @param[in] axis (Optional) The axis in @p input to gather @p indices from. Defaults to 0 * diff --git a/arm_compute/runtime/NEON/functions/NEGather.h b/arm_compute/runtime/NEON/functions/NEGather.h index 8253e986df..9c7ae0134d 100644 --- a/arm_compute/runtime/NEON/functions/NEGather.h +++ b/arm_compute/runtime/NEON/functions/NEGather.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2022 Arm Limited. + * Copyright (c) 2019-2023 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -49,7 +49,7 @@ public: * |All |All | * * @param[in] input Source tensor. Supported tensor rank: up to 4. Data type supported: All - * @param[in] indices Indices tensor. Supported tensor rank: up to 3. Must be one of the following type: U32/S32. Each value Must be in range [0, input.shape[@p axis]) + * @param[in] indices Indices tensor. Supported tensor rank: up to 3. Must be one of the following type: U32/S32. Each value must be in range [0, input.shape[@p axis]), otherwise the result will become unpredictable. * @note The "axis" must be in the range [0, input.rank -1] when indices is a vector, and must be 1 when indices is a 2D or 3D tensor. * @param[out] output Destination tensor. Data type supported: Same as @p input * @param[in] axis (Optional) The axis in @p input to gather @p indices from. Defaults to 0 diff --git a/src/core/CL/cl_kernels/common/gather.cl b/src/core/CL/cl_kernels/common/gather.cl index 76eaefa92e..a47c8a7bb7 100644 --- a/src/core/CL/cl_kernels/common/gather.cl +++ b/src/core/CL/cl_kernels/common/gather.cl @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2021 Arm Limited. + * Copyright (c) 2018-2021, 2023 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -73,19 +73,23 @@ __kernel void gather( #if AXIS == 0 const uint index = *(__global const uint *)vector_offset(&indices, px); - __global const uchar *input_addr = tensor4D_offset(&input, index, py, pz, pw); + const uint safe_index = select((uint)0, index, index < INDEX_LIMIT); + __global const uchar *input_addr = tensor4D_offset(&input, safe_index, py, pz, pw); #elif AXIS == 1 const uint index = *(__global const uint *)vector_offset(&indices, py); - __global const uchar *input_addr = tensor4D_offset(&input, px, index, pz, pw); + const uint safe_index = select((uint)0, index, index < INDEX_LIMIT); + __global const uchar *input_addr = tensor4D_offset(&input, px, safe_index, pz, pw); #elif AXIS == 2 const uint index = *(__global const uint *)vector_offset(&indices, pz); - __global const uchar *input_addr = tensor4D_offset(&input, px, py, index, pw); + const uint safe_index = select((uint)0, index, index < INDEX_LIMIT); + __global const uchar *input_addr = tensor4D_offset(&input, px, py, safe_index, pw); #elif AXIS == 3 const uint index = *(__global const uint *)vector_offset(&indices, pw); - __global const uchar *input_addr = tensor4D_offset(&input, px, py, pz, index); + const uint safe_index = select((uint)0, index, index < INDEX_LIMIT); + __global const uchar *input_addr = tensor4D_offset(&input, px, py, pz, safe_index); #endif //AXIS - *(__global DATA_TYPE *)output.ptr = *((__global const DATA_TYPE *)input_addr); + *(__global DATA_TYPE *)output.ptr = select((DATA_TYPE)0, *((__global const DATA_TYPE *)input_addr), (DATA_TYPE)(index < INDEX_LIMIT)); } #endif //defined(DATA_TYPE) && defined(AXIS) \ No newline at end of file diff --git a/src/core/CL/kernels/CLGatherKernel.cpp b/src/core/CL/kernels/CLGatherKernel.cpp index b49e6351a2..31a9a3bba4 100644 --- a/src/core/CL/kernels/CLGatherKernel.cpp +++ b/src/core/CL/kernels/CLGatherKernel.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2021 Arm Limited. + * Copyright (c) 2018-2021, 2023 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -104,6 +104,7 @@ void CLGatherKernel::configure(const CLCompileContext &compile_context, const IC build_opts.add_option("-DOUTPUT_DIM_Z=" + support::cpp11::to_string(output->info()->dimension(2))); build_opts.add_option("-DINPUT_DIM_Z=" + support::cpp11::to_string(input->info()->dimension(2))); build_opts.add_option("-DAXIS=" + support::cpp11::to_string(_axis)); + build_opts.add_option("-DINDEX_LIMIT=" + support::cpp11::to_string(input->info()->tensor_shape()[_axis])); // Create kernel _kernel = create_kernel(compile_context, "gather", build_opts.options()); diff --git a/src/core/NEON/kernels/NEGatherKernel.cpp b/src/core/NEON/kernels/NEGatherKernel.cpp index d361eb93fd..11332ffac8 100644 --- a/src/core/NEON/kernels/NEGatherKernel.cpp +++ b/src/core/NEON/kernels/NEGatherKernel.cpp @@ -37,26 +37,6 @@ namespace arm_compute { namespace { -/** Validate the indices - * - * Validate that indices are not negative - * - * @param[in] indices Indices tensor info. - */ - -template -void validate_indices(const ITensor *indices) -{ - Window window; - window.use_tensor_dimensions(indices->info()->tensor_shape()); - execute_window_loop(window, [&](const Coordinates & id) - { - const auto i = *(reinterpret_cast(indices->ptr_to_element(id))); - ARM_COMPUTE_UNUSED(i); - ARM_COMPUTE_ERROR_ON(i < 0); - }); -} - Status validate_arguments(const ITensorInfo *input, const ITensorInfo *indices, const ITensorInfo *output, int axis) { ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, indices, output); @@ -108,6 +88,8 @@ void NEGatherKernel::gather_common(const Window &window, const ThreadInfo &info) const auto window_end_x = window.x().end(); auto window_size_x = src_info->element_size(); + const auto idx_limit = static_cast(src_info->tensor_shape()[_axis]); + if(_axis != 0) { dst_win.set(0, Window::Dimension(window_start_x, window_start_x + 1, 1)); @@ -131,9 +113,17 @@ void NEGatherKernel::gather_common(const Window &window, const ThreadInfo &info) execute_window_loop(dst_win, [&](const Coordinates &) { const auto idx = *reinterpret_cast(idx_it.ptr()); - const auto src_ptr = src_it.ptr() + idx * chunk_stride; - std::copy_n(src_ptr, window_size_x, dst_it.ptr()); + if(idx >= 0 && idx < idx_limit) + { + const auto src_ptr = src_it.ptr() + idx * chunk_stride; + + std::copy_n(src_ptr, window_size_x, dst_it.ptr()); + } + else + { + std::fill_n(dst_it.ptr(), window_size_x, 0); + } }, src_it, idx_it, dst_it); } @@ -214,21 +204,6 @@ void NEGatherKernel::run(const Window &window, const ThreadInfo &info) ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); ARM_COMPUTE_ERROR_ON(_func == nullptr); - switch(_indices->info()->data_type()) - { - case DataType::U32: - validate_indices(_indices); - break; - - case DataType::S32: - validate_indices(_indices); - break; - - default: - ARM_COMPUTE_ERROR("Not supported"); - break; - } - (this->*_func)(window, info); } diff --git a/tests/validation/fixtures/GatherFixture.h b/tests/validation/fixtures/GatherFixture.h index 452a201f82..f6f70023b9 100644 --- a/tests/validation/fixtures/GatherFixture.h +++ b/tests/validation/fixtures/GatherFixture.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2021 Arm Limited. + * Copyright (c) 2018-2021, 2023 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -67,7 +67,10 @@ protected: std::mt19937 gen(library->seed()); uint32_t *indices_ptr = static_cast(indices.data()); - std::uniform_int_distribution dist_index(0, input_shape[actual_axis] - 1); + // 10% of the time the index is out-of-range. + uint32_t max_index = input_shape[actual_axis] + input_shape[actual_axis] / 9 + 1; + + std::uniform_int_distribution dist_index(0, max_index - 1); //Let's consider 1D indices for(unsigned int ind = 0; ind < indices_shape[0]; ind++) { diff --git a/tests/validation/reference/Gather.cpp b/tests/validation/reference/Gather.cpp index 12d1a3cd3c..c90c04f8cc 100644 --- a/tests/validation/reference/Gather.cpp +++ b/tests/validation/reference/Gather.cpp @@ -46,10 +46,14 @@ SimpleTensor gather(const SimpleTensor &src, const SimpleTensor const auto indices_ptr = static_cast(indices.data()); const auto dst_ptr = static_cast(dst.data()); + const uint32_t index_limit = src.shape()[actual_axis]; + Window win; win.use_tensor_dimensions(dst_shape); execute_window_loop(win, [&](const Coordinates &dst_coords) { + const auto dst_addr = coords2index(dst.shape(), dst_coords); + // Calculate the coordinates of the index value. Coordinates idx_coords; @@ -58,23 +62,33 @@ SimpleTensor gather(const SimpleTensor &src, const SimpleTensor idx_coords.set(i, dst_coords[i + actual_axis]); } - // Calculate the coordinates of the source data. - Coordinates src_coords; + const auto index = indices_ptr[coords2index(indices.shape(), idx_coords)]; - for(size_t i = 0; i < actual_axis; ++i) + if(index < index_limit) { - src_coords.set(i, dst_coords[i]); - } + // Calculate the coordinates of the source data. + Coordinates src_coords; + + for(size_t i = 0; i < actual_axis; ++i) + { + src_coords.set(i, dst_coords[i]); + } - src_coords.set(actual_axis, indices_ptr[coords2index(indices.shape(), idx_coords)]); + src_coords.set(actual_axis, index); - for(size_t i = actual_axis + 1; i < src.shape().num_dimensions(); ++i) + for(size_t i = actual_axis + 1; i < src.shape().num_dimensions(); ++i) + { + src_coords.set(i, dst_coords[i + indices.shape().num_dimensions() - 1]); + } + + // Copy the data. + const auto src_addr = coords2index(src.shape(), src_coords); + dst_ptr[dst_addr] = src_ptr[src_addr]; + } + else { - src_coords.set(i, dst_coords[i + indices.shape().num_dimensions() - 1]); + dst_ptr[dst_addr] = 0; } - - // Copy the data. - dst_ptr[coords2index(dst.shape(), dst_coords)] = src_ptr[coords2index(src.shape(), src_coords)]; }); return dst; -- cgit v1.2.1