aboutsummaryrefslogtreecommitdiff
path: root/src/core/CL/cl_kernels/copy_tensor.cl
diff options
context:
space:
mode:
Diffstat (limited to 'src/core/CL/cl_kernels/copy_tensor.cl')
-rw-r--r--src/core/CL/cl_kernels/copy_tensor.cl17
1 files changed, 16 insertions, 1 deletions
diff --git a/src/core/CL/cl_kernels/copy_tensor.cl b/src/core/CL/cl_kernels/copy_tensor.cl
index 4bbbf11bea..f4366b889a 100644
--- a/src/core/CL/cl_kernels/copy_tensor.cl
+++ b/src/core/CL/cl_kernels/copy_tensor.cl
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018 ARM Limited.
+ * Copyright (c) 2018-2019 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -77,6 +77,7 @@ __kernel void copy_pad_tensor(
}
#endif // Compile time constants
+#if defined(DATA_TYPE)
/** Performs a copy of input tensor to the output tensor.
*
* @param[in] in_ptr Pointer to the source tensor. Supported data types: U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32
@@ -103,6 +104,16 @@ __kernel void copy_tensor(
Tensor3D in = CONVERT_TO_TENSOR3D_STRUCT(in);
Tensor3D out = CONVERT_TO_TENSOR3D_STRUCT(out);
+#if defined(VEC_SIZE)
+
+#if defined(LAST_ACCESSED_X)
+ // Check if access on width gets out of bounds
+ // If it does then shift access vector to access elements within bounds
+ const int shift = max((int)(get_global_id(0) * VEC_SIZE) - (int)LAST_ACCESSED_X, 0);
+ in.ptr -= shift * in.stride_x;
+ out.ptr -= shift * out.stride_x;
+#endif // defined(LAST_ACCESSED_X)
+
// Load data
VEC_DATA_TYPE(DATA_TYPE, VEC_SIZE)
data = VLOAD(VEC_SIZE)(0, (__global DATA_TYPE *)in.ptr);
@@ -110,4 +121,8 @@ __kernel void copy_tensor(
// Store result
VSTORE(VEC_SIZE)
(data, 0, (__global DATA_TYPE *)out.ptr);
+#else // defined(VEC_SIZE)
+ *((__global DATA_TYPE *)(out.ptr)) = *((__global DATA_TYPE *)(in.ptr));
+#endif // defined(VEC_SIZE)
}
+#endif // defined(DATA_TYPE) \ No newline at end of file