aboutsummaryrefslogtreecommitdiff
path: root/src/core/CL/cl_kernels/common/gather.cl
diff options
context:
space:
mode:
authorViet-Hoa Do <viet-hoa.do@arm.com>2023-03-15 16:52:05 +0000
committerViet-Hoa Do <viet-hoa.do@arm.com>2023-04-28 13:16:39 +0000
commita25582c4e7dddd26419e0a3316614e8309928934 (patch)
tree3e71a83870f561d2abb8df802c56009224628152 /src/core/CL/cl_kernels/common/gather.cl
parenteaae8999ac8027a5fb96162061ad8ccc490515cb (diff)
downloadComputeLibrary-a25582c4e7dddd26419e0a3316614e8309928934.tar.gz
Fix the gather layer indices check
* If the index is out-of-bound, both CPU and GPU implementations of the gather layer will output 0. Resolves: COMPMID-5964 Signed-off-by: Viet-Hoa Do <viet-hoa.do@arm.com> Change-Id: Ib029b3acfb31452f2097c8c75448fb2697cfa332 Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/9487 Tested-by: Arm Jenkins <bsgcomp@arm.com> Reviewed-by: Pablo Marquez Tello <pablo.tello@arm.com> Comments-Addressed: Arm Jenkins <bsgcomp@arm.com> Benchmark: Arm Jenkins <bsgcomp@arm.com>
Diffstat (limited to 'src/core/CL/cl_kernels/common/gather.cl')
-rw-r--r--src/core/CL/cl_kernels/common/gather.cl16
1 files changed, 10 insertions, 6 deletions
diff --git a/src/core/CL/cl_kernels/common/gather.cl b/src/core/CL/cl_kernels/common/gather.cl
index 76eaefa92e..a47c8a7bb7 100644
--- a/src/core/CL/cl_kernels/common/gather.cl
+++ b/src/core/CL/cl_kernels/common/gather.cl
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018-2021 Arm Limited.
+ * Copyright (c) 2018-2021, 2023 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -73,19 +73,23 @@ __kernel void gather(
#if AXIS == 0
const uint index = *(__global const uint *)vector_offset(&indices, px);
- __global const uchar *input_addr = tensor4D_offset(&input, index, py, pz, pw);
+ const uint safe_index = select((uint)0, index, index < INDEX_LIMIT);
+ __global const uchar *input_addr = tensor4D_offset(&input, safe_index, py, pz, pw);
#elif AXIS == 1
const uint index = *(__global const uint *)vector_offset(&indices, py);
- __global const uchar *input_addr = tensor4D_offset(&input, px, index, pz, pw);
+ const uint safe_index = select((uint)0, index, index < INDEX_LIMIT);
+ __global const uchar *input_addr = tensor4D_offset(&input, px, safe_index, pz, pw);
#elif AXIS == 2
const uint index = *(__global const uint *)vector_offset(&indices, pz);
- __global const uchar *input_addr = tensor4D_offset(&input, px, py, index, pw);
+ const uint safe_index = select((uint)0, index, index < INDEX_LIMIT);
+ __global const uchar *input_addr = tensor4D_offset(&input, px, py, safe_index, pw);
#elif AXIS == 3
const uint index = *(__global const uint *)vector_offset(&indices, pw);
- __global const uchar *input_addr = tensor4D_offset(&input, px, py, pz, index);
+ const uint safe_index = select((uint)0, index, index < INDEX_LIMIT);
+ __global const uchar *input_addr = tensor4D_offset(&input, px, py, pz, safe_index);
#endif //AXIS
- *(__global DATA_TYPE *)output.ptr = *((__global const DATA_TYPE *)input_addr);
+ *(__global DATA_TYPE *)output.ptr = select((DATA_TYPE)0, *((__global const DATA_TYPE *)input_addr), (DATA_TYPE)(index < INDEX_LIMIT));
}
#endif //defined(DATA_TYPE) && defined(AXIS) \ No newline at end of file