aboutsummaryrefslogtreecommitdiff
path: root/src/core/CL
diff options
context:
space:
mode:
Diffstat (limited to 'src/core/CL')
-rw-r--r--src/core/CL/cl_kernels/common/gather.cl16
-rw-r--r--src/core/CL/kernels/CLGatherKernel.cpp3
2 files changed, 12 insertions, 7 deletions
diff --git a/src/core/CL/cl_kernels/common/gather.cl b/src/core/CL/cl_kernels/common/gather.cl
index 76eaefa92e..a47c8a7bb7 100644
--- a/src/core/CL/cl_kernels/common/gather.cl
+++ b/src/core/CL/cl_kernels/common/gather.cl
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018-2021 Arm Limited.
+ * Copyright (c) 2018-2021, 2023 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -73,19 +73,23 @@ __kernel void gather(
#if AXIS == 0
const uint index = *(__global const uint *)vector_offset(&indices, px);
- __global const uchar *input_addr = tensor4D_offset(&input, index, py, pz, pw);
+ const uint safe_index = select((uint)0, index, index < INDEX_LIMIT);
+ __global const uchar *input_addr = tensor4D_offset(&input, safe_index, py, pz, pw);
#elif AXIS == 1
const uint index = *(__global const uint *)vector_offset(&indices, py);
- __global const uchar *input_addr = tensor4D_offset(&input, px, index, pz, pw);
+ const uint safe_index = select((uint)0, index, index < INDEX_LIMIT);
+ __global const uchar *input_addr = tensor4D_offset(&input, px, safe_index, pz, pw);
#elif AXIS == 2
const uint index = *(__global const uint *)vector_offset(&indices, pz);
- __global const uchar *input_addr = tensor4D_offset(&input, px, py, index, pw);
+ const uint safe_index = select((uint)0, index, index < INDEX_LIMIT);
+ __global const uchar *input_addr = tensor4D_offset(&input, px, py, safe_index, pw);
#elif AXIS == 3
const uint index = *(__global const uint *)vector_offset(&indices, pw);
- __global const uchar *input_addr = tensor4D_offset(&input, px, py, pz, index);
+ const uint safe_index = select((uint)0, index, index < INDEX_LIMIT);
+ __global const uchar *input_addr = tensor4D_offset(&input, px, py, pz, safe_index);
#endif //AXIS
- *(__global DATA_TYPE *)output.ptr = *((__global const DATA_TYPE *)input_addr);
+ *(__global DATA_TYPE *)output.ptr = select((DATA_TYPE)0, *((__global const DATA_TYPE *)input_addr), (DATA_TYPE)(index < INDEX_LIMIT));
}
#endif //defined(DATA_TYPE) && defined(AXIS) \ No newline at end of file
diff --git a/src/core/CL/kernels/CLGatherKernel.cpp b/src/core/CL/kernels/CLGatherKernel.cpp
index b49e6351a2..31a9a3bba4 100644
--- a/src/core/CL/kernels/CLGatherKernel.cpp
+++ b/src/core/CL/kernels/CLGatherKernel.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018-2021 Arm Limited.
+ * Copyright (c) 2018-2021, 2023 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -104,6 +104,7 @@ void CLGatherKernel::configure(const CLCompileContext &compile_context, const IC
build_opts.add_option("-DOUTPUT_DIM_Z=" + support::cpp11::to_string(output->info()->dimension(2)));
build_opts.add_option("-DINPUT_DIM_Z=" + support::cpp11::to_string(input->info()->dimension(2)));
build_opts.add_option("-DAXIS=" + support::cpp11::to_string(_axis));
+ build_opts.add_option("-DINDEX_LIMIT=" + support::cpp11::to_string(input->info()->tensor_shape()[_axis]));
// Create kernel
_kernel = create_kernel(compile_context, "gather", build_opts.options());