diff options
Diffstat (limited to 'src/core/CL/cl_kernels')
-rw-r--r-- | src/core/CL/cl_kernels/slice_ops.cl (renamed from src/core/CL/cl_kernels/strided_slice.cl) | 46 |
1 files changed, 34 insertions, 12 deletions
diff --git a/src/core/CL/cl_kernels/strided_slice.cl b/src/core/CL/cl_kernels/slice_ops.cl index 7c68fb9a07..bc3df47345 100644 --- a/src/core/CL/cl_kernels/strided_slice.cl +++ b/src/core/CL/cl_kernels/slice_ops.cl @@ -61,25 +61,47 @@ __kernel void strided_slice( Tensor4D input = CONVERT_TO_TENSOR4D_STRUCT_NO_STEP(input, SRC_DEPTH); Tensor4D output = CONVERT_TO_TENSOR4D_STRUCT(output, DST_DEPTH); - int offset_0 = 0; - int offset_1 = 0; - int offset_2 = 0; - int offset_3 = 0; + int offset = 0; - // Calculate offset -#if defined(START_0) && defined(STRIDE_0) - offset_0 = (int)START_0 + (int)get_global_id(0) * (int)STRIDE_0; + // Offset X +#if defined(START_0) && defined(STRIDE_0) && defined(VEC_SIZE) && defined(LAST_ACCESSED_X) + // Check if access on width gets out of bounds + // If it does shift access vector to access elements within bounds + const int xi = (int)(get_global_id(0) * VEC_SIZE); + offset = (int)START_0 + min(xi, (int)LAST_ACCESSED_X); + input.ptr += offset * input_stride_x; + output.ptr -= max(xi - (int)LAST_ACCESSED_X, 0) * output_stride_x; +#elif defined(START_0) && defined(STRIDE_0) + offset = (int)START_0 + (int)get_global_id(0) * (int)STRIDE_0; + input.ptr += offset * input_stride_x; #endif // defined(START_0) && defined(STRIDE_0) + + // Offset Y #if defined(START_1) && defined(STRIDE_1) - offset_1 = (int)START_1 + (int)get_global_id(1) * (int)STRIDE_1; + offset = (int)START_1 + (int)get_global_id(1) * (int)STRIDE_1; + input.ptr += offset * input_stride_y; #endif // defined(START_1) && defined(STRIDE_1) + + // Offset Z #if defined(START_2) && defined(STRIDE_2) - offset_2 = (int)START_2 + ((int)get_global_id(2) % (int)DST_DEPTH) * (int)STRIDE_2; + offset = (int)START_2 + ((int)get_global_id(2) % (int)DST_DEPTH) * (int)STRIDE_2; + input.ptr += offset * input_stride_z; #endif // defined(START_2) && defined(STRIDE_2) + + // Offset depth #if defined(START_3) && defined(STRIDE_3) - offset_3 = (int)START_3 + ((int)get_global_id(2) / (int)DST_DEPTH) * (int)STRIDE_3; -#endif // defined(START_2) && defined(STRIDE_2) + offset = (int)START_3 + ((int)get_global_id(2) / (int)DST_DEPTH) * (int)STRIDE_3; + input.ptr += offset * input_stride_w; +#endif // defined(START_3) && defined(STRIDE_3) // Store result - *((__global DATA_TYPE *)(output.ptr)) = *((__global DATA_TYPE *)tensor4D_offset(&input, offset_0, offset_1, offset_2, offset_3)); +#if defined(VEC_SIZE) && defined(LAST_ACCESSED_X) + VEC_DATA_TYPE(DATA_TYPE, VEC_SIZE) + val = VLOAD(VEC_SIZE)(0, (__global DATA_TYPE *)(input.ptr)); + + VSTORE(VEC_SIZE) + (val, 0, (__global DATA_TYPE *)(output.ptr)); +#else // defined(VEC_SIZE) && defined(LAST_ACCESSED_X) + *((__global DATA_TYPE *)(output.ptr)) = *((__global DATA_TYPE *)(input.ptr)); +#endif // defined(VEC_SIZE) && defined(LAST_ACCESSED_X) } |