diff options
author | Georgios Pinitas <georgios.pinitas@arm.com> | 2018-08-24 11:25:32 +0100 |
---|---|---|
committer | Anthony Barbier <anthony.barbier@arm.com> | 2018-11-02 16:54:54 +0000 |
commit | c1a72451273ec019e3e74c4b53ea847afe8ddf7c (patch) | |
tree | b4bd62a7ccd22a2c60070d7fa23ceba794dcac5c /src/core/CL/cl_kernels | |
parent | 6a8d3b6db13042a859972c33cf40cfeb6d7cfcda (diff) | |
download | ComputeLibrary-c1a72451273ec019e3e74c4b53ea847afe8ddf7c.tar.gz |
COMPMID-1332: Implement Slice for CL
Change-Id: I0dbc4fd7f640d31daa1970eb3da0e941cb771f2b
Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/146145
Tested-by: Jenkins <bsgcomp@arm.com>
Reviewed-by: Giorgio Arena <giorgio.arena@arm.com>
Reviewed-by: Michalis Spyrou <michalis.spyrou@arm.com>
Diffstat (limited to 'src/core/CL/cl_kernels')
-rw-r--r-- | src/core/CL/cl_kernels/slice_ops.cl (renamed from src/core/CL/cl_kernels/strided_slice.cl) | 46 |
1 files changed, 34 insertions, 12 deletions
diff --git a/src/core/CL/cl_kernels/strided_slice.cl b/src/core/CL/cl_kernels/slice_ops.cl index 7c68fb9a07..bc3df47345 100644 --- a/src/core/CL/cl_kernels/strided_slice.cl +++ b/src/core/CL/cl_kernels/slice_ops.cl @@ -61,25 +61,47 @@ __kernel void strided_slice( Tensor4D input = CONVERT_TO_TENSOR4D_STRUCT_NO_STEP(input, SRC_DEPTH); Tensor4D output = CONVERT_TO_TENSOR4D_STRUCT(output, DST_DEPTH); - int offset_0 = 0; - int offset_1 = 0; - int offset_2 = 0; - int offset_3 = 0; + int offset = 0; - // Calculate offset -#if defined(START_0) && defined(STRIDE_0) - offset_0 = (int)START_0 + (int)get_global_id(0) * (int)STRIDE_0; + // Offset X +#if defined(START_0) && defined(STRIDE_0) && defined(VEC_SIZE) && defined(LAST_ACCESSED_X) + // Check if access on width gets out of bounds + // If it does shift access vector to access elements within bounds + const int xi = (int)(get_global_id(0) * VEC_SIZE); + offset = (int)START_0 + min(xi, (int)LAST_ACCESSED_X); + input.ptr += offset * input_stride_x; + output.ptr -= max(xi - (int)LAST_ACCESSED_X, 0) * output_stride_x; +#elif defined(START_0) && defined(STRIDE_0) + offset = (int)START_0 + (int)get_global_id(0) * (int)STRIDE_0; + input.ptr += offset * input_stride_x; #endif // defined(START_0) && defined(STRIDE_0) + + // Offset Y #if defined(START_1) && defined(STRIDE_1) - offset_1 = (int)START_1 + (int)get_global_id(1) * (int)STRIDE_1; + offset = (int)START_1 + (int)get_global_id(1) * (int)STRIDE_1; + input.ptr += offset * input_stride_y; #endif // defined(START_1) && defined(STRIDE_1) + + // Offset Z #if defined(START_2) && defined(STRIDE_2) - offset_2 = (int)START_2 + ((int)get_global_id(2) % (int)DST_DEPTH) * (int)STRIDE_2; + offset = (int)START_2 + ((int)get_global_id(2) % (int)DST_DEPTH) * (int)STRIDE_2; + input.ptr += offset * input_stride_z; #endif // defined(START_2) && defined(STRIDE_2) + + // Offset depth #if defined(START_3) && defined(STRIDE_3) - offset_3 = (int)START_3 + ((int)get_global_id(2) / (int)DST_DEPTH) * (int)STRIDE_3; -#endif // defined(START_2) && defined(STRIDE_2) + offset = (int)START_3 + ((int)get_global_id(2) / (int)DST_DEPTH) * (int)STRIDE_3; + input.ptr += offset * input_stride_w; +#endif // defined(START_3) && defined(STRIDE_3) // Store result - *((__global DATA_TYPE *)(output.ptr)) = *((__global DATA_TYPE *)tensor4D_offset(&input, offset_0, offset_1, offset_2, offset_3)); +#if defined(VEC_SIZE) && defined(LAST_ACCESSED_X) + VEC_DATA_TYPE(DATA_TYPE, VEC_SIZE) + val = VLOAD(VEC_SIZE)(0, (__global DATA_TYPE *)(input.ptr)); + + VSTORE(VEC_SIZE) + (val, 0, (__global DATA_TYPE *)(output.ptr)); +#else // defined(VEC_SIZE) && defined(LAST_ACCESSED_X) + *((__global DATA_TYPE *)(output.ptr)) = *((__global DATA_TYPE *)(input.ptr)); +#endif // defined(VEC_SIZE) && defined(LAST_ACCESSED_X) } |