diff options
author | Gian Marco Iodice <gianmarco.iodice@arm.com> | 2018-08-10 09:34:11 +0100 |
---|---|---|
committer | Anthony Barbier <anthony.barbier@arm.com> | 2018-11-02 16:54:54 +0000 |
commit | 1d1f32ce7ef6acea4afd4cf6a929436640b72ccd (patch) | |
tree | 16e6d0902b0663464f382bac86b0220f0f7c2a97 | |
parent | 9f5a64379ff30a39718ea2bd7e64c6473da866ce (diff) | |
download | ComputeLibrary-1d1f32ce7ef6acea4afd4cf6a929436640b72ccd.tar.gz |
COMPMID-1188 - Passed WIDTH_OFFSET at compile time in CLWidthDepthConcatenateLayerKernel
Change-Id: Icab813cd432174608621ee6a87015aeb10ab822d
Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/143570
Tested-by: Jenkins <bsgcomp@arm.com>
Reviewed-by: Anthony Barbier <anthony.barbier@arm.com>
-rw-r--r-- | src/core/CL/cl_kernels/concatenate.cl | 14 | ||||
-rw-r--r-- | src/core/CL/kernels/CLWidthConcatenateLayerKernel.cpp | 6 |
2 files changed, 11 insertions, 9 deletions
diff --git a/src/core/CL/cl_kernels/concatenate.cl b/src/core/CL/cl_kernels/concatenate.cl index 6ec8383c52..16c4363899 100644 --- a/src/core/CL/cl_kernels/concatenate.cl +++ b/src/core/CL/cl_kernels/concatenate.cl @@ -23,9 +23,14 @@ */ #include "helpers.h" +#if defined(DATA_TYPE) +#if defined(WIDTH_OFFSET) /** This kernel concatenates the input tensor into the output tensor along the first dimension * - * @param[in] src_ptr Pointer to the source tensor. Supported data types: QASYMM8, F16, F32 + * @note The data type has to be passed at compile time using -DDATA_TYPE. i.e. -DDATA_TYPE=float + * @note The offset for the first spatial dimension has to be passed at compile time using -DWIDTH_OFFSET. i.e. -DWIDTH_OFFSET=128 + * + * @param[in] src_ptr Pointer to the source tensor. Supported data types: U8/S8/QASYMM8/U16/S16/F16/U32/F32 * @param[in] src_stride_x Stride of the source tensor in X dimension (in bytes) * @param[in] src_step_x src_stride_x * number of elements along X processed per workitem(in bytes) * @param[in] src_stride_y Stride of the source tensor in Y dimension (in bytes) @@ -45,8 +50,7 @@ */ __kernel void concatenate_width( TENSOR3D_DECLARATION(src), - TENSOR3D_DECLARATION(dst), - int offset) + TENSOR3D_DECLARATION(dst)) { Tensor3D src = CONVERT_TO_TENSOR3D_STRUCT(src); Tensor3D dst = CONVERT_TO_TENSOR3D_STRUCT(dst); @@ -55,8 +59,9 @@ __kernel void concatenate_width( source_values = VLOAD(VEC_SIZE)(0, (__global DATA_TYPE *)src.ptr); VSTORE(VEC_SIZE) - (source_values, 0, (__global DATA_TYPE *)(dst.ptr + offset)); + (source_values, 0, (__global DATA_TYPE *)(dst.ptr) + WIDTH_OFFSET); } +#endif // defined(WIDTH_OFFSET) /** This kernel concatenates the input tensor into the output tensor along the third dimension * @@ -92,3 +97,4 @@ __kernel void concatenate_depth( VSTORE(VEC_SIZE) (source_values, 0, (__global DATA_TYPE *)(dst.ptr + offsets.z)); } +#endif // defined(DATA_TYPE)
\ No newline at end of file diff --git a/src/core/CL/kernels/CLWidthConcatenateLayerKernel.cpp b/src/core/CL/kernels/CLWidthConcatenateLayerKernel.cpp index 587ba690c2..e2ca05a72a 100644 --- a/src/core/CL/kernels/CLWidthConcatenateLayerKernel.cpp +++ b/src/core/CL/kernels/CLWidthConcatenateLayerKernel.cpp @@ -102,15 +102,11 @@ void CLWidthConcatenateLayerKernel::configure(const ICLTensor *input, unsigned i CLBuildOptions build_opts; build_opts.add_option("-DDATA_TYPE=" + get_underlying_cl_type_from_data_type(input->info()->data_type())); build_opts.add_option("-DVEC_SIZE=" + support::cpp11::to_string(num_elems_processed_per_iteration)); + build_opts.add_option("-DWIDTH_OFFSET=" + support::cpp11::to_string(_width_offset)); // Create kernel _kernel = static_cast<cl::Kernel>(CLKernelLibrary::get().create_kernel("concatenate_width", build_opts.options())); - const int offset_to_first_elements_in_bytes = _width_offset * _output->info()->strides_in_bytes()[0]; - - unsigned int idx = 2 * num_arguments_per_3D_tensor(); // Skip the input and output parameters - _kernel.setArg<cl_int>(idx, offset_to_first_elements_in_bytes); - // Configure kernel window auto win_config = validate_and_configure_window(input->info(), width_offset, output->info()); ARM_COMPUTE_ERROR_THROW_ON(std::get<0>(win_config)); |