diff options
Diffstat (limited to 'src/core/CL/cl_kernels')
-rw-r--r-- | src/core/CL/cl_kernels/concatenate.cl | 37 |
1 files changed, 36 insertions, 1 deletions
diff --git a/src/core/CL/cl_kernels/concatenate.cl b/src/core/CL/cl_kernels/concatenate.cl index a92ab5bdad..f97ae13a9a 100644 --- a/src/core/CL/cl_kernels/concatenate.cl +++ b/src/core/CL/cl_kernels/concatenate.cl @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017 ARM Limited. + * Copyright (c) 2017-2018 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -23,6 +23,41 @@ */ #include "helpers.h" +/** This kernel concatenates the input tensor into the output tensor along the first dimension + * + * @param[in] src_ptr Pointer to the source tensor. Supported data types: QS8, QASYMM8, QS16, F16, F32 + * @param[in] src_stride_x Stride of the source tensor in X dimension (in bytes) + * @param[in] src_step_x src_stride_x * number of elements along X processed per workitem(in bytes) + * @param[in] src_stride_y Stride of the source tensor in Y dimension (in bytes) + * @param[in] src_step_y src_stride_y * number of elements along Y processed per workitem(in bytes) + * @param[in] src_stride_z Stride of the source tensor in Z dimension (in bytes) + * @param[in] src_step_z src_stride_z * number of elements along Z processed per workitem(in bytes) + * @param[in] src_offset_first_element_in_bytes The offset of the first element in the source tensor + * @param[out] dst_ptr Pointer to the destination tensor. Supported data types: same as @p src_ptr + * @param[in] dst_stride_x Stride of the destination tensor in X dimension (in bytes) + * @param[in] dst_step_x dst_stride_x * number of elements along X processed per workitem(in bytes) + * @param[in] dst_stride_y Stride of the destination tensor in Y dimension (in bytes) + * @param[in] dst_step_y dst_stride_y * number of elements along Y processed per workitem(in bytes) + * @param[in] dst_stride_z Stride of the source tensor in Z dimension (in bytes) + * @param[in] dst_step_z dst_stride_z * number of elements along Z processed per workitem(in bytes) + * @param[in] dst_offset_first_element_in_bytes The offset of the first element in the destination tensor + * @param[in] offset The offset to the first valid element of the output tensor in bytes + */ +__kernel void concatenate_width( + TENSOR3D_DECLARATION(src), + TENSOR3D_DECLARATION(dst), + int offset) +{ + Tensor3D src = CONVERT_TO_TENSOR3D_STRUCT(src); + Tensor3D dst = CONVERT_TO_TENSOR3D_STRUCT(dst); + + VEC_DATA_TYPE(DATA_TYPE, VEC_SIZE) + source_values = VLOAD(VEC_SIZE)(0, (__global DATA_TYPE *)src.ptr); + + VSTORE(VEC_SIZE) + (source_values, 0, (__global DATA_TYPE *)(dst.ptr + offset)); +} + /** This kernel concatenates the input tensor into the output tensor along the third dimension * * @param[in] src_ptr Pointer to the source tensor. Supported data types: QS8, QS16, F16, F32 |