aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMichele Di Giorgio <michele.digiorgio@arm.com>2018-10-19 15:46:19 +0100
committerAnthony Barbier <anthony.barbier@arm.com>2018-11-02 16:55:45 +0000
commite6dbde0128bf33b5d72a00c480bd92c290fd17b7 (patch)
tree1e5f501a536b7b991ff54430f377b37e37d6d929
parentbcf8a968da4b26926df8bb770df16d82146bcb54 (diff)
downloadComputeLibrary-e6dbde0128bf33b5d72a00c480bd92c290fd17b7.tar.gz
COMPMID-1667: Add 4D tensors support to CLWidthConcatenateLayerKernel
Change-Id: Ibc0b1242804c2fdb183825406e3c78bd0d1d3564 Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/154368 Reviewed-by: Pablo Tello <pablo.tello@arm.com> Tested-by: bsgcomp <bsgcomp@arm.com>
-rw-r--r--src/core/CL/cl_kernels/concatenate.cl26
-rw-r--r--src/core/CL/kernels/CLWidthConcatenateLayerKernel.cpp21
-rw-r--r--tests/datasets/ShapeDatasets.h3
-rw-r--r--tests/validation/CL/WidthConcatenateLayer.cpp15
-rw-r--r--tests/validation/reference/WidthConcatenateLayer.cpp20
5 files changed, 50 insertions, 35 deletions
diff --git a/src/core/CL/cl_kernels/concatenate.cl b/src/core/CL/cl_kernels/concatenate.cl
index 16c4363899..a232a94dfc 100644
--- a/src/core/CL/cl_kernels/concatenate.cl
+++ b/src/core/CL/cl_kernels/concatenate.cl
@@ -23,12 +23,15 @@
*/
#include "helpers.h"
-#if defined(DATA_TYPE)
-#if defined(WIDTH_OFFSET)
+#if defined(DATA_TYPE) && defined(VEC_SIZE)
+
+#if defined(WIDTH_OFFSET) && defined(DEPTH)
/** This kernel concatenates the input tensor into the output tensor along the first dimension
*
* @note The data type has to be passed at compile time using -DDATA_TYPE. i.e. -DDATA_TYPE=float
+ * @note Vector size has to be passed at compile time using -DVEC_SIZE. i.e. -DVEC_SIZE=16
* @note The offset for the first spatial dimension has to be passed at compile time using -DWIDTH_OFFSET. i.e. -DWIDTH_OFFSET=128
+ * @note Tensor depth should be given as a preprocessor argument using -DDEPTH=size. e.g. -DDEPTH16
*
* @param[in] src_ptr Pointer to the source tensor. Supported data types: U8/S8/QASYMM8/U16/S16/F16/U32/F32
* @param[in] src_stride_x Stride of the source tensor in X dimension (in bytes)
@@ -37,6 +40,8 @@
* @param[in] src_step_y src_stride_y * number of elements along Y processed per workitem(in bytes)
* @param[in] src_stride_z Stride of the source tensor in Z dimension (in bytes)
* @param[in] src_step_z src_stride_z * number of elements along Z processed per workitem(in bytes)
+ * @param[in] src_stride_w Stride of the first source tensor in Z dimension (in bytes)
+ * @param[in] src_step_w src_stride_z * number of elements along Z processed per workitem(in bytes)
* @param[in] src_offset_first_element_in_bytes The offset of the first element in the source tensor
* @param[out] dst_ptr Pointer to the destination tensor. Supported data types: same as @p src_ptr
* @param[in] dst_stride_x Stride of the destination tensor in X dimension (in bytes)
@@ -45,15 +50,17 @@
* @param[in] dst_step_y dst_stride_y * number of elements along Y processed per workitem(in bytes)
* @param[in] dst_stride_z Stride of the source tensor in Z dimension (in bytes)
* @param[in] dst_step_z dst_stride_z * number of elements along Z processed per workitem(in bytes)
+ * @param[in] dst_stride_w Stride of the destination tensor in Z dimension (in bytes)
+ * @param[in] dst_step_w output_stride_z * number of elements along Z processed per workitem(in bytes)
* @param[in] dst_offset_first_element_in_bytes The offset of the first element in the destination tensor
* @param[in] offset The offset to the first valid element of the output tensor in bytes
*/
__kernel void concatenate_width(
- TENSOR3D_DECLARATION(src),
- TENSOR3D_DECLARATION(dst))
+ TENSOR4D_DECLARATION(src),
+ TENSOR4D_DECLARATION(dst))
{
- Tensor3D src = CONVERT_TO_TENSOR3D_STRUCT(src);
- Tensor3D dst = CONVERT_TO_TENSOR3D_STRUCT(dst);
+ Tensor4D src = CONVERT_TO_TENSOR4D_STRUCT(src, DEPTH);
+ Tensor4D dst = CONVERT_TO_TENSOR4D_STRUCT(dst, DEPTH);
VEC_DATA_TYPE(DATA_TYPE, VEC_SIZE)
source_values = VLOAD(VEC_SIZE)(0, (__global DATA_TYPE *)src.ptr);
@@ -61,10 +68,13 @@ __kernel void concatenate_width(
VSTORE(VEC_SIZE)
(source_values, 0, (__global DATA_TYPE *)(dst.ptr) + WIDTH_OFFSET);
}
-#endif // defined(WIDTH_OFFSET)
+#endif /* defined(WIDTH_OFFSET) && defined(DEPTH) */
/** This kernel concatenates the input tensor into the output tensor along the third dimension
*
+ * @note The data type has to be passed at compile time using -DDATA_TYPE. i.e. -DDATA_TYPE=float
+ * @note Vector size has to be passed at compile time using -DVEC_SIZE. i.e. -DVEC_SIZE=16
+ *
* @param[in] src_ptr Pointer to the source tensor. Supported data types: F16, F32
* @param[in] src_stride_x Stride of the source tensor in X dimension (in bytes)
* @param[in] src_step_x src_stride_x * number of elements along X processed per workitem(in bytes)
@@ -97,4 +107,4 @@ __kernel void concatenate_depth(
VSTORE(VEC_SIZE)
(source_values, 0, (__global DATA_TYPE *)(dst.ptr + offsets.z));
}
-#endif // defined(DATA_TYPE) \ No newline at end of file
+#endif /* defined(DATA_TYPE) && defined(VEC_SIZE) */
diff --git a/src/core/CL/kernels/CLWidthConcatenateLayerKernel.cpp b/src/core/CL/kernels/CLWidthConcatenateLayerKernel.cpp
index e5ab8d2304..c51c5796d1 100644
--- a/src/core/CL/kernels/CLWidthConcatenateLayerKernel.cpp
+++ b/src/core/CL/kernels/CLWidthConcatenateLayerKernel.cpp
@@ -53,8 +53,10 @@ std::pair<Status, Window> validate_and_configure_window(ITensorInfo *input, unsi
AccessWindowHorizontal output_access(output, width_offset, num_elems_processed_per_iteration);
bool window_changed = update_window_and_padding(win, input_access, output_access);
+ Window win_collapsed = win.collapse(win, Window::DimZ);
+
Status err = (window_changed) ? ARM_COMPUTE_CREATE_ERROR(ErrorCode::RUNTIME_ERROR, "Insufficient Padding!") : Status{};
- return std::make_pair(err, win);
+ return std::make_pair(err, win_collapsed);
}
Status validate_arguments(const ITensorInfo *input, unsigned int width_offset, const ITensorInfo *output)
{
@@ -69,7 +71,7 @@ Status validate_arguments(const ITensorInfo *input, unsigned int width_offset, c
{
ARM_COMPUTE_RETURN_ERROR_ON(input->dimension(i) != output->dimension(i));
}
- ARM_COMPUTE_RETURN_ERROR_ON(input->num_dimensions() > 3);
+ ARM_COMPUTE_RETURN_ERROR_ON(input->num_dimensions() > 4);
return Status{};
}
@@ -103,6 +105,7 @@ void CLWidthConcatenateLayerKernel::configure(const ICLTensor *input, unsigned i
build_opts.add_option("-DDATA_TYPE=" + get_underlying_cl_type_from_data_type(input->info()->data_type()));
build_opts.add_option("-DVEC_SIZE=" + support::cpp11::to_string(num_elems_processed_per_iteration));
build_opts.add_option("-DWIDTH_OFFSET=" + support::cpp11::to_string(_width_offset));
+ build_opts.add_option("-DDEPTH=" + support::cpp11::to_string(input->info()->dimension(2)));
// Create kernel
_kernel = static_cast<cl::Kernel>(CLKernelLibrary::get().create_kernel("concatenate_width", build_opts.options()));
@@ -119,14 +122,8 @@ void CLWidthConcatenateLayerKernel::run(const Window &window, cl::CommandQueue &
ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(ICLKernel::window(), window);
- Window slice = window.first_slice_window_3D();
-
- do
- {
- unsigned int idx = 0;
- add_3D_tensor_argument(idx, _input, slice);
- add_3D_tensor_argument(idx, _output, slice);
- enqueue(queue, *this, slice);
- }
- while(window.slide_window_slice_3D(slice));
+ unsigned int idx = 0;
+ add_4D_tensor_argument(idx, _input, window);
+ add_4D_tensor_argument(idx, _output, window);
+ enqueue(queue, *this, window);
}
diff --git a/tests/datasets/ShapeDatasets.h b/tests/datasets/ShapeDatasets.h
index 483fada5d8..ffacf34620 100644
--- a/tests/datasets/ShapeDatasets.h
+++ b/tests/datasets/ShapeDatasets.h
@@ -730,7 +730,8 @@ public:
{
TensorShape{ 232U, 65U, 3U },
TensorShape{ 432U, 65U, 3U },
- TensorShape{ 124U, 65U, 3U }
+ TensorShape{ 124U, 65U, 3U },
+ TensorShape{ 124U, 65U, 3U, 4U }
})
{
}
diff --git a/tests/validation/CL/WidthConcatenateLayer.cpp b/tests/validation/CL/WidthConcatenateLayer.cpp
index 6af3c64f73..6ff1dfca54 100644
--- a/tests/validation/CL/WidthConcatenateLayer.cpp
+++ b/tests/validation/CL/WidthConcatenateLayer.cpp
@@ -103,14 +103,16 @@ using CLWidthConcatenateLayerFixture = WidthConcatenateLayerValidationFixture<CL
TEST_SUITE(Float)
TEST_SUITE(FP16)
-FIXTURE_DATA_TEST_CASE(RunSmall, CLWidthConcatenateLayerFixture<half>, framework::DatasetMode::PRECOMMIT, combine(datasets::Small2DShapes(), framework::dataset::make("DataType",
- DataType::F16)))
+FIXTURE_DATA_TEST_CASE(RunSmall, CLWidthConcatenateLayerFixture<half>, framework::DatasetMode::PRECOMMIT, combine(concat(datasets::Small2DShapes(), datasets::Tiny4DShapes()),
+ framework::dataset::make("DataType",
+ DataType::F16)))
{
// Validate output
validate(CLAccessor(_target), _reference);
}
-FIXTURE_DATA_TEST_CASE(RunLarge, CLWidthConcatenateLayerFixture<half>, framework::DatasetMode::NIGHTLY, combine(datasets::Large2DShapes(), framework::dataset::make("DataType",
- DataType::F16)))
+FIXTURE_DATA_TEST_CASE(RunLarge, CLWidthConcatenateLayerFixture<half>, framework::DatasetMode::NIGHTLY, combine(concat(datasets::Large2DShapes(), datasets::Small4DShapes()),
+ framework::dataset::make("DataType",
+ DataType::F16)))
{
// Validate output
validate(CLAccessor(_target), _reference);
@@ -118,8 +120,9 @@ FIXTURE_DATA_TEST_CASE(RunLarge, CLWidthConcatenateLayerFixture<half>, framework
TEST_SUITE_END()
TEST_SUITE(FP32)
-FIXTURE_DATA_TEST_CASE(RunSmall, CLWidthConcatenateLayerFixture<float>, framework::DatasetMode::PRECOMMIT, combine(datasets::Small2DShapes(), framework::dataset::make("DataType",
- DataType::F32)))
+FIXTURE_DATA_TEST_CASE(RunSmall, CLWidthConcatenateLayerFixture<float>, framework::DatasetMode::PRECOMMIT, combine(concat(datasets::Small2DShapes(), datasets::Tiny4DShapes()),
+ framework::dataset::make("DataType",
+ DataType::F32)))
{
// Validate output
validate(CLAccessor(_target), _reference);
diff --git a/tests/validation/reference/WidthConcatenateLayer.cpp b/tests/validation/reference/WidthConcatenateLayer.cpp
index 8662199306..6be171b64d 100644
--- a/tests/validation/reference/WidthConcatenateLayer.cpp
+++ b/tests/validation/reference/WidthConcatenateLayer.cpp
@@ -59,20 +59,24 @@ SimpleTensor<T> widthconcatenate_layer(const std::vector<SimpleTensor<T>> &srcs)
{
ARM_COMPUTE_ERROR_ON(width_offset >= width_out);
- const int width = src.shape().x();
- const int height = src.shape().y();
- const int depth = src.shape().z();
+ const int width = src.shape().x();
+ const int height = src.shape().y();
+ const int depth = src.shape().z();
+ const int upper_dims = src.shape().total_size() / (width * height * depth);
const T *src_ptr = src.data();
T *dst_ptr = dst.data();
- for(int d = 0; d < depth; ++d)
+ for(int u = 0; u < upper_dims; ++u)
{
- for(int r = 0; r < height; ++r)
+ for(int d = 0; d < depth; ++d)
{
- int offset = d * height + r;
- std::copy(src_ptr, src_ptr + width, dst_ptr + width_offset + offset * width_out);
- src_ptr += width;
+ for(int r = 0; r < height; ++r)
+ {
+ const int offset = u * height * depth + d * height + r;
+ std::copy(src_ptr, src_ptr + width, dst_ptr + width_offset + offset * width_out);
+ src_ptr += width;
+ }
}
}