diff options
author | Michalis Spyrou <michalis.spyrou@arm.com> | 2018-05-09 09:59:23 +0100 |
---|---|---|
committer | Anthony Barbier <anthony.barbier@arm.com> | 2018-11-02 16:52:35 +0000 |
commit | 55b3d1216b4011d86d5f06335e518dc924987ae5 (patch) | |
tree | a94ca420f4385ff301a770fbeb5b4f930ac60105 /src/core/CL/cl_kernels | |
parent | a8aef2916379402e241d9f2c5e0faf3f99c860f7 (diff) | |
download | ComputeLibrary-55b3d1216b4011d86d5f06335e518dc924987ae5.tar.gz |
COMPMID-1137 OpenCL concatenate width
Change-Id: I40faba421281b1cf080fa6a825d04a4366cdaeb0
Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/130700
Reviewed-by: Anthony Barbier <anthony.barbier@arm.com>
Tested-by: Jenkins <bsgcomp@arm.com>
Reviewed-by: Georgios Pinitas <georgios.pinitas@arm.com>
Diffstat (limited to 'src/core/CL/cl_kernels')
-rw-r--r-- | src/core/CL/cl_kernels/concatenate.cl | 37 |
1 files changed, 36 insertions, 1 deletions
diff --git a/src/core/CL/cl_kernels/concatenate.cl b/src/core/CL/cl_kernels/concatenate.cl index a92ab5bdad..f97ae13a9a 100644 --- a/src/core/CL/cl_kernels/concatenate.cl +++ b/src/core/CL/cl_kernels/concatenate.cl @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017 ARM Limited. + * Copyright (c) 2017-2018 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -23,6 +23,41 @@ */ #include "helpers.h" +/** This kernel concatenates the input tensor into the output tensor along the first dimension + * + * @param[in] src_ptr Pointer to the source tensor. Supported data types: QS8, QASYMM8, QS16, F16, F32 + * @param[in] src_stride_x Stride of the source tensor in X dimension (in bytes) + * @param[in] src_step_x src_stride_x * number of elements along X processed per workitem(in bytes) + * @param[in] src_stride_y Stride of the source tensor in Y dimension (in bytes) + * @param[in] src_step_y src_stride_y * number of elements along Y processed per workitem(in bytes) + * @param[in] src_stride_z Stride of the source tensor in Z dimension (in bytes) + * @param[in] src_step_z src_stride_z * number of elements along Z processed per workitem(in bytes) + * @param[in] src_offset_first_element_in_bytes The offset of the first element in the source tensor + * @param[out] dst_ptr Pointer to the destination tensor. Supported data types: same as @p src_ptr + * @param[in] dst_stride_x Stride of the destination tensor in X dimension (in bytes) + * @param[in] dst_step_x dst_stride_x * number of elements along X processed per workitem(in bytes) + * @param[in] dst_stride_y Stride of the destination tensor in Y dimension (in bytes) + * @param[in] dst_step_y dst_stride_y * number of elements along Y processed per workitem(in bytes) + * @param[in] dst_stride_z Stride of the source tensor in Z dimension (in bytes) + * @param[in] dst_step_z dst_stride_z * number of elements along Z processed per workitem(in bytes) + * @param[in] dst_offset_first_element_in_bytes The offset of the first element in the destination tensor + * @param[in] offset The offset to the first valid element of the output tensor in bytes + */ +__kernel void concatenate_width( + TENSOR3D_DECLARATION(src), + TENSOR3D_DECLARATION(dst), + int offset) +{ + Tensor3D src = CONVERT_TO_TENSOR3D_STRUCT(src); + Tensor3D dst = CONVERT_TO_TENSOR3D_STRUCT(dst); + + VEC_DATA_TYPE(DATA_TYPE, VEC_SIZE) + source_values = VLOAD(VEC_SIZE)(0, (__global DATA_TYPE *)src.ptr); + + VSTORE(VEC_SIZE) + (source_values, 0, (__global DATA_TYPE *)(dst.ptr + offset)); +} + /** This kernel concatenates the input tensor into the output tensor along the third dimension * * @param[in] src_ptr Pointer to the source tensor. Supported data types: QS8, QS16, F16, F32 |