aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--arm_compute/runtime/CL/functions/CLGather.h8
-rw-r--r--arm_compute/runtime/NEON/functions/NEGather.h4
-rw-r--r--src/core/CL/cl_kernels/common/gather.cl16
-rw-r--r--src/core/CL/kernels/CLGatherKernel.cpp3
-rw-r--r--src/core/NEON/kernels/NEGatherKernel.cpp49
-rw-r--r--tests/validation/fixtures/GatherFixture.h7
-rw-r--r--tests/validation/reference/Gather.cpp36
7 files changed, 60 insertions, 63 deletions
diff --git a/arm_compute/runtime/CL/functions/CLGather.h b/arm_compute/runtime/CL/functions/CLGather.h
index 7a57c7358c..0f1ccbad08 100644
--- a/arm_compute/runtime/CL/functions/CLGather.h
+++ b/arm_compute/runtime/CL/functions/CLGather.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018-2021 Arm Limited.
+ * Copyright (c) 2018-2021, 2023 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -49,7 +49,7 @@ public:
* |All |All |
*
* @param[in] input Source tensor. Supported tensor rank: up to 4. Data type supported: All.
- * @param[in] indices Indices tensor. Supported tensor rank: up to 1. Must be one of the following types: U32/S32. Each value must be in range [0, input.shape[@p axis])
+ * @param[in] indices Indices tensor. Supported tensor rank: up to 1. Must be one of the following types: U32/S32. Each value must be in range [0, input.shape[@p axis]), otherwise the result will become unpredictable.
* @param[out] output Destination tensor. Data type supported: Same as @p input
* @param[in] axis (Optional) The axis in @p input to gather @p indices from. Defaults to 0
*/
@@ -58,7 +58,7 @@ public:
*
* @param[in] compile_context The compile context to be used.
* @param[in] input Source tensor. Supported tensor rank: up to 4. Data type supported: All.
- * @param[in] indices Indices tensor. Supported tensor rank: up to 1. Must be one of the following types: U32/S32. Each value must be in range [0, input.shape[@p axis])
+ * @param[in] indices Indices tensor. Supported tensor rank: up to 1. Must be one of the following types: U32/S32. Each value must be in range [0, input.shape[@p axis]), otherwise the result will become unpredictable.
* @param[out] output Destination tensor. Data type supported: Same as @p input
* @param[in] axis (Optional) The axis in @p input to gather @p indices from. Defaults to 0
*/
@@ -67,7 +67,7 @@ public:
/** Static function to check if given info will lead to a valid configuration of @ref CLGatherKernel
*
* @param[in] input Source tensor info. Supported tensor rank: up to 4. Data type supported: All.
- * @param[in] indices Indices tensor info. Supported tensor rank: up to 4. Must be one of the following types: U32/S32. Each value must be in range [0, input.shape[@p axis])
+ * @param[in] indices Indices tensor info. Supported tensor rank: up to 4. Must be one of the following types: U32/S32. Each value must be in range [0, input.shape[@p axis]), otherwise the result will become unpredictable.
* @param[in] output Destination tensor info. Data type supported: Same as @p input
* @param[in] axis (Optional) The axis in @p input to gather @p indices from. Defaults to 0
*
diff --git a/arm_compute/runtime/NEON/functions/NEGather.h b/arm_compute/runtime/NEON/functions/NEGather.h
index 8253e986df..9c7ae0134d 100644
--- a/arm_compute/runtime/NEON/functions/NEGather.h
+++ b/arm_compute/runtime/NEON/functions/NEGather.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2019-2022 Arm Limited.
+ * Copyright (c) 2019-2023 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -49,7 +49,7 @@ public:
* |All |All |
*
* @param[in] input Source tensor. Supported tensor rank: up to 4. Data type supported: All
- * @param[in] indices Indices tensor. Supported tensor rank: up to 3. Must be one of the following type: U32/S32. Each value Must be in range [0, input.shape[@p axis])
+ * @param[in] indices Indices tensor. Supported tensor rank: up to 3. Must be one of the following type: U32/S32. Each value must be in range [0, input.shape[@p axis]), otherwise the result will become unpredictable.
* @note The "axis" must be in the range [0, input.rank -1] when indices is a vector, and must be 1 when indices is a 2D or 3D tensor.
* @param[out] output Destination tensor. Data type supported: Same as @p input
* @param[in] axis (Optional) The axis in @p input to gather @p indices from. Defaults to 0
diff --git a/src/core/CL/cl_kernels/common/gather.cl b/src/core/CL/cl_kernels/common/gather.cl
index 76eaefa92e..a47c8a7bb7 100644
--- a/src/core/CL/cl_kernels/common/gather.cl
+++ b/src/core/CL/cl_kernels/common/gather.cl
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018-2021 Arm Limited.
+ * Copyright (c) 2018-2021, 2023 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -73,19 +73,23 @@ __kernel void gather(
#if AXIS == 0
const uint index = *(__global const uint *)vector_offset(&indices, px);
- __global const uchar *input_addr = tensor4D_offset(&input, index, py, pz, pw);
+ const uint safe_index = select((uint)0, index, index < INDEX_LIMIT);
+ __global const uchar *input_addr = tensor4D_offset(&input, safe_index, py, pz, pw);
#elif AXIS == 1
const uint index = *(__global const uint *)vector_offset(&indices, py);
- __global const uchar *input_addr = tensor4D_offset(&input, px, index, pz, pw);
+ const uint safe_index = select((uint)0, index, index < INDEX_LIMIT);
+ __global const uchar *input_addr = tensor4D_offset(&input, px, safe_index, pz, pw);
#elif AXIS == 2
const uint index = *(__global const uint *)vector_offset(&indices, pz);
- __global const uchar *input_addr = tensor4D_offset(&input, px, py, index, pw);
+ const uint safe_index = select((uint)0, index, index < INDEX_LIMIT);
+ __global const uchar *input_addr = tensor4D_offset(&input, px, py, safe_index, pw);
#elif AXIS == 3
const uint index = *(__global const uint *)vector_offset(&indices, pw);
- __global const uchar *input_addr = tensor4D_offset(&input, px, py, pz, index);
+ const uint safe_index = select((uint)0, index, index < INDEX_LIMIT);
+ __global const uchar *input_addr = tensor4D_offset(&input, px, py, pz, safe_index);
#endif //AXIS
- *(__global DATA_TYPE *)output.ptr = *((__global const DATA_TYPE *)input_addr);
+ *(__global DATA_TYPE *)output.ptr = select((DATA_TYPE)0, *((__global const DATA_TYPE *)input_addr), (DATA_TYPE)(index < INDEX_LIMIT));
}
#endif //defined(DATA_TYPE) && defined(AXIS) \ No newline at end of file
diff --git a/src/core/CL/kernels/CLGatherKernel.cpp b/src/core/CL/kernels/CLGatherKernel.cpp
index b49e6351a2..31a9a3bba4 100644
--- a/src/core/CL/kernels/CLGatherKernel.cpp
+++ b/src/core/CL/kernels/CLGatherKernel.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018-2021 Arm Limited.
+ * Copyright (c) 2018-2021, 2023 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -104,6 +104,7 @@ void CLGatherKernel::configure(const CLCompileContext &compile_context, const IC
build_opts.add_option("-DOUTPUT_DIM_Z=" + support::cpp11::to_string(output->info()->dimension(2)));
build_opts.add_option("-DINPUT_DIM_Z=" + support::cpp11::to_string(input->info()->dimension(2)));
build_opts.add_option("-DAXIS=" + support::cpp11::to_string(_axis));
+ build_opts.add_option("-DINDEX_LIMIT=" + support::cpp11::to_string(input->info()->tensor_shape()[_axis]));
// Create kernel
_kernel = create_kernel(compile_context, "gather", build_opts.options());
diff --git a/src/core/NEON/kernels/NEGatherKernel.cpp b/src/core/NEON/kernels/NEGatherKernel.cpp
index d361eb93fd..11332ffac8 100644
--- a/src/core/NEON/kernels/NEGatherKernel.cpp
+++ b/src/core/NEON/kernels/NEGatherKernel.cpp
@@ -37,26 +37,6 @@ namespace arm_compute
{
namespace
{
-/** Validate the indices
- *
- * Validate that indices are not negative
- *
- * @param[in] indices Indices tensor info.
- */
-
-template <typename U>
-void validate_indices(const ITensor *indices)
-{
- Window window;
- window.use_tensor_dimensions(indices->info()->tensor_shape());
- execute_window_loop(window, [&](const Coordinates & id)
- {
- const auto i = *(reinterpret_cast<int32_t *>(indices->ptr_to_element(id)));
- ARM_COMPUTE_UNUSED(i);
- ARM_COMPUTE_ERROR_ON(i < 0);
- });
-}
-
Status validate_arguments(const ITensorInfo *input, const ITensorInfo *indices, const ITensorInfo *output, int axis)
{
ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, indices, output);
@@ -108,6 +88,8 @@ void NEGatherKernel::gather_common(const Window &window, const ThreadInfo &info)
const auto window_end_x = window.x().end();
auto window_size_x = src_info->element_size();
+ const auto idx_limit = static_cast<TIndex>(src_info->tensor_shape()[_axis]);
+
if(_axis != 0)
{
dst_win.set(0, Window::Dimension(window_start_x, window_start_x + 1, 1));
@@ -131,9 +113,17 @@ void NEGatherKernel::gather_common(const Window &window, const ThreadInfo &info)
execute_window_loop(dst_win, [&](const Coordinates &) {
const auto idx = *reinterpret_cast<const TIndex *>(idx_it.ptr());
- const auto src_ptr = src_it.ptr() + idx * chunk_stride;
- std::copy_n(src_ptr, window_size_x, dst_it.ptr());
+ if(idx >= 0 && idx < idx_limit)
+ {
+ const auto src_ptr = src_it.ptr() + idx * chunk_stride;
+
+ std::copy_n(src_ptr, window_size_x, dst_it.ptr());
+ }
+ else
+ {
+ std::fill_n(dst_it.ptr(), window_size_x, 0);
+ }
}, src_it, idx_it, dst_it);
}
@@ -214,21 +204,6 @@ void NEGatherKernel::run(const Window &window, const ThreadInfo &info)
ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
ARM_COMPUTE_ERROR_ON(_func == nullptr);
- switch(_indices->info()->data_type())
- {
- case DataType::U32:
- validate_indices<uint32_t>(_indices);
- break;
-
- case DataType::S32:
- validate_indices<int32_t>(_indices);
- break;
-
- default:
- ARM_COMPUTE_ERROR("Not supported");
- break;
- }
-
(this->*_func)(window, info);
}
diff --git a/tests/validation/fixtures/GatherFixture.h b/tests/validation/fixtures/GatherFixture.h
index 452a201f82..f6f70023b9 100644
--- a/tests/validation/fixtures/GatherFixture.h
+++ b/tests/validation/fixtures/GatherFixture.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018-2021 Arm Limited.
+ * Copyright (c) 2018-2021, 2023 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -67,7 +67,10 @@ protected:
std::mt19937 gen(library->seed());
uint32_t *indices_ptr = static_cast<uint32_t *>(indices.data());
- std::uniform_int_distribution<uint32_t> dist_index(0, input_shape[actual_axis] - 1);
+ // 10% of the time the index is out-of-range.
+ uint32_t max_index = input_shape[actual_axis] + input_shape[actual_axis] / 9 + 1;
+
+ std::uniform_int_distribution<uint32_t> dist_index(0, max_index - 1);
//Let's consider 1D indices
for(unsigned int ind = 0; ind < indices_shape[0]; ind++)
{
diff --git a/tests/validation/reference/Gather.cpp b/tests/validation/reference/Gather.cpp
index 12d1a3cd3c..c90c04f8cc 100644
--- a/tests/validation/reference/Gather.cpp
+++ b/tests/validation/reference/Gather.cpp
@@ -46,10 +46,14 @@ SimpleTensor<T> gather(const SimpleTensor<T> &src, const SimpleTensor<uint32_t>
const auto indices_ptr = static_cast<const uint32_t *>(indices.data());
const auto dst_ptr = static_cast<T *>(dst.data());
+ const uint32_t index_limit = src.shape()[actual_axis];
+
Window win;
win.use_tensor_dimensions(dst_shape);
execute_window_loop(win, [&](const Coordinates &dst_coords) {
+ const auto dst_addr = coords2index(dst.shape(), dst_coords);
+
// Calculate the coordinates of the index value.
Coordinates idx_coords;
@@ -58,23 +62,33 @@ SimpleTensor<T> gather(const SimpleTensor<T> &src, const SimpleTensor<uint32_t>
idx_coords.set(i, dst_coords[i + actual_axis]);
}
- // Calculate the coordinates of the source data.
- Coordinates src_coords;
+ const auto index = indices_ptr[coords2index(indices.shape(), idx_coords)];
- for(size_t i = 0; i < actual_axis; ++i)
+ if(index < index_limit)
{
- src_coords.set(i, dst_coords[i]);
- }
+ // Calculate the coordinates of the source data.
+ Coordinates src_coords;
+
+ for(size_t i = 0; i < actual_axis; ++i)
+ {
+ src_coords.set(i, dst_coords[i]);
+ }
- src_coords.set(actual_axis, indices_ptr[coords2index(indices.shape(), idx_coords)]);
+ src_coords.set(actual_axis, index);
- for(size_t i = actual_axis + 1; i < src.shape().num_dimensions(); ++i)
+ for(size_t i = actual_axis + 1; i < src.shape().num_dimensions(); ++i)
+ {
+ src_coords.set(i, dst_coords[i + indices.shape().num_dimensions() - 1]);
+ }
+
+ // Copy the data.
+ const auto src_addr = coords2index(src.shape(), src_coords);
+ dst_ptr[dst_addr] = src_ptr[src_addr];
+ }
+ else
{
- src_coords.set(i, dst_coords[i + indices.shape().num_dimensions() - 1]);
+ dst_ptr[dst_addr] = 0;
}
-
- // Copy the data.
- dst_ptr[coords2index(dst.shape(), dst_coords)] = src_ptr[coords2index(src.shape(), src_coords)];
});
return dst;