aboutsummaryrefslogtreecommitdiff
path: root/src/core/CL/cl_kernels/copy_tensor.cl
diff options
context:
space:
mode:
authorGeorge Wort <george.wort@arm.com>2019-02-15 15:12:52 +0000
committerManuel Bottini <manuel.bottini@arm.com>2019-03-13 13:54:10 +0000
commit894066de8cc26d1a3aca62dcaa6b30a2a1116028 (patch)
tree9dcb227018ea69fcfb83f7b25be2009fdd16e18e /src/core/CL/cl_kernels/copy_tensor.cl
parentadfb2737046028c042f0aecaff87733a442da29f (diff)
downloadComputeLibrary-894066de8cc26d1a3aca62dcaa6b30a2a1116028.tar.gz
COMPMID-1844: Implement CLCrop
Change-Id: I8822c37adc45960705dc3f32a53214795ba3cf39 Signed-off-by: George Wort <george.wort@arm.com> Reviewed-on: https://review.mlplatform.org/c/789 Reviewed-by: Manuel Bottini <manuel.bottini@arm.com> Tested-by: Arm Jenkins <bsgcomp@arm.com> Reviewed-by: Pablo Marquez <pablo.tello@arm.com>
Diffstat (limited to 'src/core/CL/cl_kernels/copy_tensor.cl')
-rw-r--r--src/core/CL/cl_kernels/copy_tensor.cl17
1 files changed, 16 insertions, 1 deletions
diff --git a/src/core/CL/cl_kernels/copy_tensor.cl b/src/core/CL/cl_kernels/copy_tensor.cl
index 4bbbf11bea..f4366b889a 100644
--- a/src/core/CL/cl_kernels/copy_tensor.cl
+++ b/src/core/CL/cl_kernels/copy_tensor.cl
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018 ARM Limited.
+ * Copyright (c) 2018-2019 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -77,6 +77,7 @@ __kernel void copy_pad_tensor(
}
#endif // Compile time constants
+#if defined(DATA_TYPE)
/** Performs a copy of input tensor to the output tensor.
*
* @param[in] in_ptr Pointer to the source tensor. Supported data types: U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32
@@ -103,6 +104,16 @@ __kernel void copy_tensor(
Tensor3D in = CONVERT_TO_TENSOR3D_STRUCT(in);
Tensor3D out = CONVERT_TO_TENSOR3D_STRUCT(out);
+#if defined(VEC_SIZE)
+
+#if defined(LAST_ACCESSED_X)
+ // Check if access on width gets out of bounds
+ // If it does then shift access vector to access elements within bounds
+ const int shift = max((int)(get_global_id(0) * VEC_SIZE) - (int)LAST_ACCESSED_X, 0);
+ in.ptr -= shift * in.stride_x;
+ out.ptr -= shift * out.stride_x;
+#endif // defined(LAST_ACCESSED_X)
+
// Load data
VEC_DATA_TYPE(DATA_TYPE, VEC_SIZE)
data = VLOAD(VEC_SIZE)(0, (__global DATA_TYPE *)in.ptr);
@@ -110,4 +121,8 @@ __kernel void copy_tensor(
// Store result
VSTORE(VEC_SIZE)
(data, 0, (__global DATA_TYPE *)out.ptr);
+#else // defined(VEC_SIZE)
+ *((__global DATA_TYPE *)(out.ptr)) = *((__global DATA_TYPE *)(in.ptr));
+#endif // defined(VEC_SIZE)
}
+#endif // defined(DATA_TYPE) \ No newline at end of file