aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--src/core/CL/cl_kernels/concatenate.cl14
-rw-r--r--src/core/CL/kernels/CLWidthConcatenateLayerKernel.cpp6
2 files changed, 11 insertions, 9 deletions
diff --git a/src/core/CL/cl_kernels/concatenate.cl b/src/core/CL/cl_kernels/concatenate.cl
index 6ec8383c52..16c4363899 100644
--- a/src/core/CL/cl_kernels/concatenate.cl
+++ b/src/core/CL/cl_kernels/concatenate.cl
@@ -23,9 +23,14 @@
*/
#include "helpers.h"
+#if defined(DATA_TYPE)
+#if defined(WIDTH_OFFSET)
/** This kernel concatenates the input tensor into the output tensor along the first dimension
*
- * @param[in] src_ptr Pointer to the source tensor. Supported data types: QASYMM8, F16, F32
+ * @note The data type has to be passed at compile time using -DDATA_TYPE. i.e. -DDATA_TYPE=float
+ * @note The offset for the first spatial dimension has to be passed at compile time using -DWIDTH_OFFSET. i.e. -DWIDTH_OFFSET=128
+ *
+ * @param[in] src_ptr Pointer to the source tensor. Supported data types: U8/S8/QASYMM8/U16/S16/F16/U32/F32
* @param[in] src_stride_x Stride of the source tensor in X dimension (in bytes)
* @param[in] src_step_x src_stride_x * number of elements along X processed per workitem(in bytes)
* @param[in] src_stride_y Stride of the source tensor in Y dimension (in bytes)
@@ -45,8 +50,7 @@
*/
__kernel void concatenate_width(
TENSOR3D_DECLARATION(src),
- TENSOR3D_DECLARATION(dst),
- int offset)
+ TENSOR3D_DECLARATION(dst))
{
Tensor3D src = CONVERT_TO_TENSOR3D_STRUCT(src);
Tensor3D dst = CONVERT_TO_TENSOR3D_STRUCT(dst);
@@ -55,8 +59,9 @@ __kernel void concatenate_width(
source_values = VLOAD(VEC_SIZE)(0, (__global DATA_TYPE *)src.ptr);
VSTORE(VEC_SIZE)
- (source_values, 0, (__global DATA_TYPE *)(dst.ptr + offset));
+ (source_values, 0, (__global DATA_TYPE *)(dst.ptr) + WIDTH_OFFSET);
}
+#endif // defined(WIDTH_OFFSET)
/** This kernel concatenates the input tensor into the output tensor along the third dimension
*
@@ -92,3 +97,4 @@ __kernel void concatenate_depth(
VSTORE(VEC_SIZE)
(source_values, 0, (__global DATA_TYPE *)(dst.ptr + offsets.z));
}
+#endif // defined(DATA_TYPE) \ No newline at end of file
diff --git a/src/core/CL/kernels/CLWidthConcatenateLayerKernel.cpp b/src/core/CL/kernels/CLWidthConcatenateLayerKernel.cpp
index 587ba690c2..e2ca05a72a 100644
--- a/src/core/CL/kernels/CLWidthConcatenateLayerKernel.cpp
+++ b/src/core/CL/kernels/CLWidthConcatenateLayerKernel.cpp
@@ -102,15 +102,11 @@ void CLWidthConcatenateLayerKernel::configure(const ICLTensor *input, unsigned i
CLBuildOptions build_opts;
build_opts.add_option("-DDATA_TYPE=" + get_underlying_cl_type_from_data_type(input->info()->data_type()));
build_opts.add_option("-DVEC_SIZE=" + support::cpp11::to_string(num_elems_processed_per_iteration));
+ build_opts.add_option("-DWIDTH_OFFSET=" + support::cpp11::to_string(_width_offset));
// Create kernel
_kernel = static_cast<cl::Kernel>(CLKernelLibrary::get().create_kernel("concatenate_width", build_opts.options()));
- const int offset_to_first_elements_in_bytes = _width_offset * _output->info()->strides_in_bytes()[0];
-
- unsigned int idx = 2 * num_arguments_per_3D_tensor(); // Skip the input and output parameters
- _kernel.setArg<cl_int>(idx, offset_to_first_elements_in_bytes);
-
// Configure kernel window
auto win_config = validate_and_configure_window(input->info(), width_offset, output->info());
ARM_COMPUTE_ERROR_THROW_ON(std::get<0>(win_config));