From a25582c4e7dddd26419e0a3316614e8309928934 Mon Sep 17 00:00:00 2001 From: Viet-Hoa Do Date: Wed, 15 Mar 2023 16:52:05 +0000 Subject: Fix the gather layer indices check * If the index is out-of-bound, both CPU and GPU implementations of the gather layer will output 0. Resolves: COMPMID-5964 Signed-off-by: Viet-Hoa Do Change-Id: Ib029b3acfb31452f2097c8c75448fb2697cfa332 Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/9487 Tested-by: Arm Jenkins Reviewed-by: Pablo Marquez Tello Comments-Addressed: Arm Jenkins Benchmark: Arm Jenkins --- src/core/CL/cl_kernels/common/gather.cl | 16 +++++++---- src/core/CL/kernels/CLGatherKernel.cpp | 3 +- src/core/NEON/kernels/NEGatherKernel.cpp | 49 ++++++++------------------------ 3 files changed, 24 insertions(+), 44 deletions(-) (limited to 'src/core') diff --git a/src/core/CL/cl_kernels/common/gather.cl b/src/core/CL/cl_kernels/common/gather.cl index 76eaefa92e..a47c8a7bb7 100644 --- a/src/core/CL/cl_kernels/common/gather.cl +++ b/src/core/CL/cl_kernels/common/gather.cl @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2021 Arm Limited. + * Copyright (c) 2018-2021, 2023 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -73,19 +73,23 @@ __kernel void gather( #if AXIS == 0 const uint index = *(__global const uint *)vector_offset(&indices, px); - __global const uchar *input_addr = tensor4D_offset(&input, index, py, pz, pw); + const uint safe_index = select((uint)0, index, index < INDEX_LIMIT); + __global const uchar *input_addr = tensor4D_offset(&input, safe_index, py, pz, pw); #elif AXIS == 1 const uint index = *(__global const uint *)vector_offset(&indices, py); - __global const uchar *input_addr = tensor4D_offset(&input, px, index, pz, pw); + const uint safe_index = select((uint)0, index, index < INDEX_LIMIT); + __global const uchar *input_addr = tensor4D_offset(&input, px, safe_index, pz, pw); #elif AXIS == 2 const uint index = *(__global const uint *)vector_offset(&indices, pz); - __global const uchar *input_addr = tensor4D_offset(&input, px, py, index, pw); + const uint safe_index = select((uint)0, index, index < INDEX_LIMIT); + __global const uchar *input_addr = tensor4D_offset(&input, px, py, safe_index, pw); #elif AXIS == 3 const uint index = *(__global const uint *)vector_offset(&indices, pw); - __global const uchar *input_addr = tensor4D_offset(&input, px, py, pz, index); + const uint safe_index = select((uint)0, index, index < INDEX_LIMIT); + __global const uchar *input_addr = tensor4D_offset(&input, px, py, pz, safe_index); #endif //AXIS - *(__global DATA_TYPE *)output.ptr = *((__global const DATA_TYPE *)input_addr); + *(__global DATA_TYPE *)output.ptr = select((DATA_TYPE)0, *((__global const DATA_TYPE *)input_addr), (DATA_TYPE)(index < INDEX_LIMIT)); } #endif //defined(DATA_TYPE) && defined(AXIS) \ No newline at end of file diff --git a/src/core/CL/kernels/CLGatherKernel.cpp b/src/core/CL/kernels/CLGatherKernel.cpp index b49e6351a2..31a9a3bba4 100644 --- a/src/core/CL/kernels/CLGatherKernel.cpp +++ b/src/core/CL/kernels/CLGatherKernel.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2021 Arm Limited. + * Copyright (c) 2018-2021, 2023 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -104,6 +104,7 @@ void CLGatherKernel::configure(const CLCompileContext &compile_context, const IC build_opts.add_option("-DOUTPUT_DIM_Z=" + support::cpp11::to_string(output->info()->dimension(2))); build_opts.add_option("-DINPUT_DIM_Z=" + support::cpp11::to_string(input->info()->dimension(2))); build_opts.add_option("-DAXIS=" + support::cpp11::to_string(_axis)); + build_opts.add_option("-DINDEX_LIMIT=" + support::cpp11::to_string(input->info()->tensor_shape()[_axis])); // Create kernel _kernel = create_kernel(compile_context, "gather", build_opts.options()); diff --git a/src/core/NEON/kernels/NEGatherKernel.cpp b/src/core/NEON/kernels/NEGatherKernel.cpp index d361eb93fd..11332ffac8 100644 --- a/src/core/NEON/kernels/NEGatherKernel.cpp +++ b/src/core/NEON/kernels/NEGatherKernel.cpp @@ -37,26 +37,6 @@ namespace arm_compute { namespace { -/** Validate the indices - * - * Validate that indices are not negative - * - * @param[in] indices Indices tensor info. - */ - -template -void validate_indices(const ITensor *indices) -{ - Window window; - window.use_tensor_dimensions(indices->info()->tensor_shape()); - execute_window_loop(window, [&](const Coordinates & id) - { - const auto i = *(reinterpret_cast(indices->ptr_to_element(id))); - ARM_COMPUTE_UNUSED(i); - ARM_COMPUTE_ERROR_ON(i < 0); - }); -} - Status validate_arguments(const ITensorInfo *input, const ITensorInfo *indices, const ITensorInfo *output, int axis) { ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, indices, output); @@ -108,6 +88,8 @@ void NEGatherKernel::gather_common(const Window &window, const ThreadInfo &info) const auto window_end_x = window.x().end(); auto window_size_x = src_info->element_size(); + const auto idx_limit = static_cast(src_info->tensor_shape()[_axis]); + if(_axis != 0) { dst_win.set(0, Window::Dimension(window_start_x, window_start_x + 1, 1)); @@ -131,9 +113,17 @@ void NEGatherKernel::gather_common(const Window &window, const ThreadInfo &info) execute_window_loop(dst_win, [&](const Coordinates &) { const auto idx = *reinterpret_cast(idx_it.ptr()); - const auto src_ptr = src_it.ptr() + idx * chunk_stride; - std::copy_n(src_ptr, window_size_x, dst_it.ptr()); + if(idx >= 0 && idx < idx_limit) + { + const auto src_ptr = src_it.ptr() + idx * chunk_stride; + + std::copy_n(src_ptr, window_size_x, dst_it.ptr()); + } + else + { + std::fill_n(dst_it.ptr(), window_size_x, 0); + } }, src_it, idx_it, dst_it); } @@ -214,21 +204,6 @@ void NEGatherKernel::run(const Window &window, const ThreadInfo &info) ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); ARM_COMPUTE_ERROR_ON(_func == nullptr); - switch(_indices->info()->data_type()) - { - case DataType::U32: - validate_indices(_indices); - break; - - case DataType::S32: - validate_indices(_indices); - break; - - default: - ARM_COMPUTE_ERROR("Not supported"); - break; - } - (this->*_func)(window, info); } -- cgit v1.2.1