aboutsummaryrefslogtreecommitdiff
path: root/src/core/CL/cl_kernels
diff options
context:
space:
mode:
Diffstat (limited to 'src/core/CL/cl_kernels')
-rw-r--r--src/core/CL/cl_kernels/slice_ops.cl (renamed from src/core/CL/cl_kernels/strided_slice.cl)46
1 files changed, 34 insertions, 12 deletions
diff --git a/src/core/CL/cl_kernels/strided_slice.cl b/src/core/CL/cl_kernels/slice_ops.cl
index 7c68fb9a07..bc3df47345 100644
--- a/src/core/CL/cl_kernels/strided_slice.cl
+++ b/src/core/CL/cl_kernels/slice_ops.cl
@@ -61,25 +61,47 @@ __kernel void strided_slice(
Tensor4D input = CONVERT_TO_TENSOR4D_STRUCT_NO_STEP(input, SRC_DEPTH);
Tensor4D output = CONVERT_TO_TENSOR4D_STRUCT(output, DST_DEPTH);
- int offset_0 = 0;
- int offset_1 = 0;
- int offset_2 = 0;
- int offset_3 = 0;
+ int offset = 0;
- // Calculate offset
-#if defined(START_0) && defined(STRIDE_0)
- offset_0 = (int)START_0 + (int)get_global_id(0) * (int)STRIDE_0;
+ // Offset X
+#if defined(START_0) && defined(STRIDE_0) && defined(VEC_SIZE) && defined(LAST_ACCESSED_X)
+ // Check if access on width gets out of bounds
+ // If it does shift access vector to access elements within bounds
+ const int xi = (int)(get_global_id(0) * VEC_SIZE);
+ offset = (int)START_0 + min(xi, (int)LAST_ACCESSED_X);
+ input.ptr += offset * input_stride_x;
+ output.ptr -= max(xi - (int)LAST_ACCESSED_X, 0) * output_stride_x;
+#elif defined(START_0) && defined(STRIDE_0)
+ offset = (int)START_0 + (int)get_global_id(0) * (int)STRIDE_0;
+ input.ptr += offset * input_stride_x;
#endif // defined(START_0) && defined(STRIDE_0)
+
+ // Offset Y
#if defined(START_1) && defined(STRIDE_1)
- offset_1 = (int)START_1 + (int)get_global_id(1) * (int)STRIDE_1;
+ offset = (int)START_1 + (int)get_global_id(1) * (int)STRIDE_1;
+ input.ptr += offset * input_stride_y;
#endif // defined(START_1) && defined(STRIDE_1)
+
+ // Offset Z
#if defined(START_2) && defined(STRIDE_2)
- offset_2 = (int)START_2 + ((int)get_global_id(2) % (int)DST_DEPTH) * (int)STRIDE_2;
+ offset = (int)START_2 + ((int)get_global_id(2) % (int)DST_DEPTH) * (int)STRIDE_2;
+ input.ptr += offset * input_stride_z;
#endif // defined(START_2) && defined(STRIDE_2)
+
+ // Offset depth
#if defined(START_3) && defined(STRIDE_3)
- offset_3 = (int)START_3 + ((int)get_global_id(2) / (int)DST_DEPTH) * (int)STRIDE_3;
-#endif // defined(START_2) && defined(STRIDE_2)
+ offset = (int)START_3 + ((int)get_global_id(2) / (int)DST_DEPTH) * (int)STRIDE_3;
+ input.ptr += offset * input_stride_w;
+#endif // defined(START_3) && defined(STRIDE_3)
// Store result
- *((__global DATA_TYPE *)(output.ptr)) = *((__global DATA_TYPE *)tensor4D_offset(&input, offset_0, offset_1, offset_2, offset_3));
+#if defined(VEC_SIZE) && defined(LAST_ACCESSED_X)
+ VEC_DATA_TYPE(DATA_TYPE, VEC_SIZE)
+ val = VLOAD(VEC_SIZE)(0, (__global DATA_TYPE *)(input.ptr));
+
+ VSTORE(VEC_SIZE)
+ (val, 0, (__global DATA_TYPE *)(output.ptr));
+#else // defined(VEC_SIZE) && defined(LAST_ACCESSED_X)
+ *((__global DATA_TYPE *)(output.ptr)) = *((__global DATA_TYPE *)(input.ptr));
+#endif // defined(VEC_SIZE) && defined(LAST_ACCESSED_X)
}