diff options
Diffstat (limited to 'src/core/CL/cl_kernels/copy_tensor.cl')
-rw-r--r-- | src/core/CL/cl_kernels/copy_tensor.cl | 17 |
1 files changed, 16 insertions, 1 deletions
diff --git a/src/core/CL/cl_kernels/copy_tensor.cl b/src/core/CL/cl_kernels/copy_tensor.cl index 4bbbf11bea..f4366b889a 100644 --- a/src/core/CL/cl_kernels/copy_tensor.cl +++ b/src/core/CL/cl_kernels/copy_tensor.cl @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018 ARM Limited. + * Copyright (c) 2018-2019 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -77,6 +77,7 @@ __kernel void copy_pad_tensor( } #endif // Compile time constants +#if defined(DATA_TYPE) /** Performs a copy of input tensor to the output tensor. * * @param[in] in_ptr Pointer to the source tensor. Supported data types: U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32 @@ -103,6 +104,16 @@ __kernel void copy_tensor( Tensor3D in = CONVERT_TO_TENSOR3D_STRUCT(in); Tensor3D out = CONVERT_TO_TENSOR3D_STRUCT(out); +#if defined(VEC_SIZE) + +#if defined(LAST_ACCESSED_X) + // Check if access on width gets out of bounds + // If it does then shift access vector to access elements within bounds + const int shift = max((int)(get_global_id(0) * VEC_SIZE) - (int)LAST_ACCESSED_X, 0); + in.ptr -= shift * in.stride_x; + out.ptr -= shift * out.stride_x; +#endif // defined(LAST_ACCESSED_X) + // Load data VEC_DATA_TYPE(DATA_TYPE, VEC_SIZE) data = VLOAD(VEC_SIZE)(0, (__global DATA_TYPE *)in.ptr); @@ -110,4 +121,8 @@ __kernel void copy_tensor( // Store result VSTORE(VEC_SIZE) (data, 0, (__global DATA_TYPE *)out.ptr); +#else // defined(VEC_SIZE) + *((__global DATA_TYPE *)(out.ptr)) = *((__global DATA_TYPE *)(in.ptr)); +#endif // defined(VEC_SIZE) } +#endif // defined(DATA_TYPE)
\ No newline at end of file |