diff options
author | Omar Al Khatib <omar.alkhatib@arm.com> | 2023-04-26 11:31:45 +0100 |
---|---|---|
committer | Omar Al Khatib <omar.alkhatib@arm.com> | 2023-05-03 13:22:48 +0000 |
commit | cdd1e039ad598aec10d8c1b81e08de9412324bf2 (patch) | |
tree | 344bfa6dc1e30604c6e67533eccb08a71e235fde /src/core/CL/cl_kernels | |
parent | 911d5728fccdabbdf41549c58f0266e49c2aeaf0 (diff) | |
download | ComputeLibrary-cdd1e039ad598aec10d8c1b81e08de9412324bf2.tar.gz |
Support multi-dimensional indices in the CL Gather Layer up to four-dimensional output tensors
Resolves [COMPMID-5775]
Signed-off-by: Omar Al Khatib <omar.alkhatib@arm.com>
Change-Id: I6f6c12ac08f0b0ad070ca5d715c531c2c3762c30
Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/9498
Tested-by: Arm Jenkins <bsgcomp@arm.com>
Reviewed-by: Viet-Hoa Do <viet-hoa.do@arm.com>
Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
Benchmark: Arm Jenkins <bsgcomp@arm.com>
Diffstat (limited to 'src/core/CL/cl_kernels')
-rw-r--r-- | src/core/CL/cl_kernels/common/gather.cl | 56 |
1 files changed, 46 insertions, 10 deletions
diff --git a/src/core/CL/cl_kernels/common/gather.cl b/src/core/CL/cl_kernels/common/gather.cl index a47c8a7bb7..5d180f3781 100644 --- a/src/core/CL/cl_kernels/common/gather.cl +++ b/src/core/CL/cl_kernels/common/gather.cl @@ -59,34 +59,70 @@ */ __kernel void gather( TENSOR4D_DECLARATION(input), - VECTOR_DECLARATION(indices), + TENSOR4D_DECLARATION(indices), TENSOR4D_DECLARATION(output)) { const int px = get_global_id(0); const int py = get_global_id(1); const int pz = get_global_id(2) % OUTPUT_DIM_Z; - const int pw = get_global_id(2) / OUTPUT_DIM_Z; + const int pw = (get_global_id(2) / OUTPUT_DIM_Z ); const Tensor4D input = CONVERT_TO_TENSOR4D_STRUCT_NO_STEP(input, INPUT_DIM_Z); - const Vector indices = CONVERT_TO_VECTOR_STRUCT_NO_STEP(indices); + const Tensor4D indices = CONVERT_TO_TENSOR4D_STRUCT_NO_STEP(indices, INDICES_DIM_Z); Tensor4D output = CONVERT_TO_TENSOR4D_STRUCT(output, OUTPUT_DIM_Z); #if AXIS == 0 - const uint index = *(__global const uint *)vector_offset(&indices, px); +#if INDICES_DIMS == 1 + const uint index = *(__global const uint *)tensor4D_offset(&indices, px, 0, 0, 0); const uint safe_index = select((uint)0, index, index < INDEX_LIMIT); __global const uchar *input_addr = tensor4D_offset(&input, safe_index, py, pz, pw); +#elif INDICES_DIMS == 2 + const uint index = *(__global const uint *)tensor4D_offset(&indices, px, py, 0, 0); + const uint safe_index = select((uint)0, index, index < INDEX_LIMIT); + __global const uchar *input_addr = tensor4D_offset(&input, safe_index, pz, pw, 0); +#elif INDICES_DIMS == 3 + const uint index = *(__global const uint *)tensor4D_offset(&indices, px, py, pz, 0); + const uint safe_index = select((uint)0, index, index < INDEX_LIMIT); + __global const uchar *input_addr = tensor4D_offset(&input, safe_index, pw, 0, 0); +#elif INDICES_DIMS == 4 + const uint index = *(__global const uint *)tensor4D_offset(&indices, px, py, pz, pw); + const uint safe_index = select((uint)0, index, index < INDEX_LIMIT); + __global const uchar *input_addr = tensor4D_offset(&input, safe_index, 0, 0, 0); +#endif //INDICES_DIMS + #elif AXIS == 1 - const uint index = *(__global const uint *)vector_offset(&indices, py); +#if INDICES_DIMS == 1 + const uint index = *(__global const uint *)tensor4D_offset(&indices, py, 0, 0, 0); + const uint safe_index = select((uint)0, index, index < INDEX_LIMIT); + __global const uchar *input_addr = tensor4D_offset(&input, px, safe_index, pz, pw); +#elif INDICES_DIMS == 2 + const uint index = *(__global const uint *)tensor4D_offset(&indices, py, pz, 0, 0); const uint safe_index = select((uint)0, index, index < INDEX_LIMIT); - __global const uchar *input_addr = tensor4D_offset(&input, px, safe_index, pz, pw); + __global const uchar *input_addr = tensor4D_offset(&input, px, safe_index, pw, 0); +#elif INDICES_DIMS == 3 + const uint index = *(__global const uint *)tensor4D_offset(&indices, py, pz, pw, 0); + const uint safe_index = select((uint)0, index, index < INDEX_LIMIT); + __global const uchar *input_addr = tensor4D_offset(&input, px, safe_index, 0, 0); +#endif //INDICES_DIMS + #elif AXIS == 2 - const uint index = *(__global const uint *)vector_offset(&indices, pz); +#if INDICES_DIMS == 1 + const uint index = *(__global const uint *)tensor4D_offset(&indices, pz, 0, 0, 0); + const uint safe_index = select((uint)0, index, index < INDEX_LIMIT); + __global const uchar *input_addr = tensor4D_offset(&input, px, py, safe_index, pw); +#elif INDICES_DIMS == 2 + const uint index = *(__global const uint *)tensor4D_offset(&indices, pz, pw, 0, 0); const uint safe_index = select((uint)0, index, index < INDEX_LIMIT); - __global const uchar *input_addr = tensor4D_offset(&input, px, py, safe_index, pw); + __global const uchar *input_addr = tensor4D_offset(&input, px, py, safe_index, 0); +#endif //INDICES_DIMS + #elif AXIS == 3 - const uint index = *(__global const uint *)vector_offset(&indices, pw); +#if INDICES_DIMS == 1 + const uint index = *(__global const uint *)tensor4D_offset(&indices, pw, 0, 0, 0); const uint safe_index = select((uint)0, index, index < INDEX_LIMIT); - __global const uchar *input_addr = tensor4D_offset(&input, px, py, pz, safe_index); + __global const uchar *input_addr = tensor4D_offset(&input, px, py, pz, safe_index); +#endif //INDICES_DIMS + #endif //AXIS *(__global DATA_TYPE *)output.ptr = select((DATA_TYPE)0, *((__global const DATA_TYPE *)input_addr), (DATA_TYPE)(index < INDEX_LIMIT)); |