From c2a60593436387d20ff142a619f4c3955a5cd41b Mon Sep 17 00:00:00 2001 From: Matthew Jackson Date: Fri, 30 Aug 2019 15:19:42 +0100 Subject: COMPMID-2639: CLPadLayer support for 4D padding Add support for 4D padding to CLPadLayerKernel. Add validation tests with 4D padding. Change-Id: I5579cc441a155c03fa1d14c6e77ba8ec693a806d Signed-off-by: Matthew Jackson Reviewed-on: https://review.mlplatform.org/c/1847 Comments-Addressed: Arm Jenkins Reviewed-by: Giorgio Arena Tested-by: Arm Jenkins --- src/core/CL/cl_kernels/pad_layer.cl | 32 ++++++++++++++------ src/core/CL/kernels/CLPadLayerKernel.cpp | 8 +++++ src/runtime/CL/functions/CLPadLayer.cpp | 2 +- tests/validation/CL/PadLayer.cpp | 52 ++++++++++++++++++++++++-------- 4 files changed, 71 insertions(+), 23 deletions(-) diff --git a/src/core/CL/cl_kernels/pad_layer.cl b/src/core/CL/cl_kernels/pad_layer.cl index ace2f0d3a0..fac97d25d9 100644 --- a/src/core/CL/cl_kernels/pad_layer.cl +++ b/src/core/CL/cl_kernels/pad_layer.cl @@ -53,12 +53,15 @@ * @note Data type to use for the select instruction must be passed using the -DSELECT_DT compile flag, e.g. -DSELECT_DT=float * @note In case pad left is more than the vector size, the number of threads to skil alond the X axis must be passed using the * -DTHREADS_TO_SKIP_X compile flag, e.g. -DTHREADS_TO_SKIP_X=1. This is defined as (PAD_LEFT / VEC_SIZE) - * @note In pad also needs to be added to the top of the tensor, the following compile flags must be passed at compile time: + * @note If pad also needs to be added to the top of the tensor, the following compile flags must be passed at compile time: * -# -DPAD_TOP: Pad to add to the top of the input tensor (e.g. -DPAD_TOP=3) * -# -DSRC_HEIGHT: Input tensor's height (e.g. -DSRC_HEIGHT=127) - * @note In pad also needs to be added to the depth of the tensor, the following compile flags must be passed at compile time: + * @note If pad also needs to be added to the depth of the tensor, the following compile flags must be passed at compile time: * -# -DPAD_NEAR: Pad to add before the first plane of the input tensor (e.g. -DPAD_NEAR=3) * -# -DSRC_DEPTH: Input tensor's depth (e.g. -DSRC_DEPTH=32) + * @note If pad also needs to be added to the batch of the tensor, the following compile flags must be passed at compile time: + * -# -DPAD_BTOP: Pad to add before the first batch of the input tensor (e.g. -DPAD_BTOP=3) + * -# -DSRC_BATCH: Input tensor's batch size (e.g. -DSRC_BATCH=4) * * @param[in] src_ptr Pointer to the source image. Supported data types: U8, S8, QASYMM8, U16, S16, U32, S32, F16, F32 * @param[in] src_stride_x Stride of the source image in X dimension (in bytes) @@ -78,22 +81,30 @@ * @param[in] dst_offset_first_element_in_bytes The offset of the first element in the destination image */ __kernel void pad_layer(TENSOR3D_DECLARATION(src), - TENSOR3D_DECLARATION(dst)) + TENSOR3D_DECLARATION(dst), + uint batch) { const int x = get_global_id(0); const int y = get_global_id(1); const int z = get_global_id(2); +#if defined(PAD_BTOP) || defined(PAD_NEAR) + uint cond = 0; +#if defined(PAD_BTOP) + cond |= batch < PAD_BTOP || batch >= (PAD_BTOP + SRC_BATCH); +#endif // defined(PAD_BTOP) + #if defined(PAD_NEAR) - if(z < PAD_NEAR || z >= (PAD_NEAR + SRC_DEPTH)) + cond |= z < PAD_NEAR || z >= (PAD_NEAR + SRC_DEPTH); +#endif // defined(PAD_NEAR) + if(cond) { Tensor3D dst = CONVERT_TO_TENSOR3D_STRUCT(dst); - VSTORE(VEC_SIZE) - ((VEC_TYPE)CONST_VAL, 0, (__global DATA_TYPE *)dst.ptr); + VSTORE(VEC_SIZE)((VEC_TYPE)CONST_VAL, 0, (__global DATA_TYPE *)dst.ptr); } else { -#endif // defined(PAD_NEAR) +#endif // defined(PAD_BTOP) || defined(PAD_NEAR) Tensor3D src = CONVERT_TO_TENSOR3D_STRUCT(src); Tensor3D dst = CONVERT_TO_TENSOR3D_STRUCT(dst); @@ -115,6 +126,9 @@ __kernel void pad_layer(TENSOR3D_DECLARATION(src), #if defined(PAD_NEAR) src.ptr -= PAD_NEAR * src_step_z; #endif // defined(PAD_NEAR) +#if defined(PAD_BTOP) + src.ptr -= PAD_BTOP * SRC_DEPTH * src_step_z; +#endif // defined(PAD_BTOP) VEC_TYPE src_vals = VLOAD(VEC_SIZE)(0, (__global DATA_TYPE *)src.ptr); @@ -125,8 +139,8 @@ __kernel void pad_layer(TENSOR3D_DECLARATION(src), #endif // defined(PAD_TOP) VSTORE(VEC_SIZE) (select(src_vals, (VEC_TYPE)CONST_VAL, CONVERT_SELECT(cond)), 0, (__global DATA_TYPE *)dst.ptr); -#if defined(PAD_NEAR) +#if defined(PAD_NEAR) || defined(PAD_BTOP) } -#endif // defined(PAD_NEAR) +#endif // defined(PAD_NEAR) || defined(PAD_BTOP) } #endif // defined(DATA_TYPE) && defined(VEC_SIZE) && defined(CONST_VAL) diff --git a/src/core/CL/kernels/CLPadLayerKernel.cpp b/src/core/CL/kernels/CLPadLayerKernel.cpp index 5270e31bbe..52b65c39b1 100644 --- a/src/core/CL/kernels/CLPadLayerKernel.cpp +++ b/src/core/CL/kernels/CLPadLayerKernel.cpp @@ -99,6 +99,12 @@ void CLPadLayerKernel::configure(const ICLTensor *input, ICLTensor *output, cons { build_opts.add_option("-DPAD_NEAR=" + support::cpp11::to_string(padding.at(2).first)); build_opts.add_option("-DSRC_DEPTH=" + support::cpp11::to_string(input->info()->dimension(2))); + + if(padding.size() > 3) + { + build_opts.add_option("-DPAD_BTOP=" + support::cpp11::to_string(padding.at(3).first)); + build_opts.add_option("-DSRC_BATCH=" + support::cpp11::to_string(input->info()->dimension(3))); + } } } @@ -130,11 +136,13 @@ void CLPadLayerKernel::run(const Window &window, cl::CommandQueue &queue) Window slice_out = window.first_slice_window_3D(); Window slice_in = win_in.first_slice_window_3D(); + unsigned int batch = 0; do { unsigned int idx = 0; add_3D_tensor_argument(idx, _input, slice_in); add_3D_tensor_argument(idx, _output, slice_out); + add_argument(idx, batch++); enqueue(queue, *this, slice_out, lws_hint()); } diff --git a/src/runtime/CL/functions/CLPadLayer.cpp b/src/runtime/CL/functions/CLPadLayer.cpp index dcd0a5b1ad..88b1b77a0d 100644 --- a/src/runtime/CL/functions/CLPadLayer.cpp +++ b/src/runtime/CL/functions/CLPadLayer.cpp @@ -183,7 +183,6 @@ void CLPadLayer::configure(ICLTensor *input, ICLTensor *output, const PaddingLis Status CLPadLayer::validate(const ITensorInfo *input, const ITensorInfo *output, const PaddingList &padding, PixelValue constant_value, PaddingMode mode) { ARM_COMPUTE_RETURN_ERROR_ON(padding.size() > input->num_dimensions()); - TensorShape padded_shape = misc::shape_calculator::compute_padded_shape(input->tensor_shape(), padding); // Use CLCopyKernel and CLMemsetKernel to validate all padding modes as this includes all of the shape and info validation. @@ -213,6 +212,7 @@ Status CLPadLayer::validate(const ITensorInfo *input, const ITensorInfo *output, { case PaddingMode::CONSTANT: { + ARM_COMPUTE_RETURN_ERROR_ON(padding.size() > 4); ARM_COMPUTE_RETURN_ON_ERROR(CLPadLayerKernel::validate(input, output, padding, constant_value, mode)); break; } diff --git a/tests/validation/CL/PadLayer.cpp b/tests/validation/CL/PadLayer.cpp index 518f541b8b..431b7ebd19 100644 --- a/tests/validation/CL/PadLayer.cpp +++ b/tests/validation/CL/PadLayer.cpp @@ -40,7 +40,8 @@ namespace validation { namespace { -const auto PaddingSizesDataset = framework::dataset::make("PaddingSize", { PaddingList{ { 0, 0 } }, +const auto PaddingSizesDataset3D = framework::dataset::make("PaddingSize", +{ PaddingList{ { 0, 0 } }, PaddingList{ { 1, 1 } }, PaddingList{ { 33, 33 } }, PaddingList{ { 1, 1 }, { 5, 5 } }, @@ -48,6 +49,12 @@ const auto PaddingSizesDataset = framework::dataset::make("PaddingSize", { Paddi PaddingList{ { 0, 0 }, { 1, 0 }, { 0, 1 } }, PaddingList{ { 0, 0 }, { 0, 0 }, { 0, 0 } } }); +const auto PaddingSizesDataset4D = framework::dataset::make("PaddingSize", +{ PaddingList{ { 1, 1 }, { 1, 0 }, { 1, 1 }, { 0, 0 } }, + PaddingList{ { 0, 0 }, { 0, 0 }, { 0, 0 }, { 1, 1 } }, + PaddingList{ { 0, 1 }, { 1, 0 }, { 2, 2 }, { 1, 0 } }, + PaddingList{ { 1, 1 }, { 1, 1 }, { 1, 1 }, { 3, 3 } } +}); } // namespace TEST_SUITE(CL) @@ -57,9 +64,10 @@ TEST_SUITE(PadLayer) // clang-format off DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(zip( - framework::dataset::make("InputInfo", { TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32), // Mismatching data type input/output + framework::dataset::make("InputInfo", { TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32), // Mismatching data type input/output TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32), // Mismatching shapes with padding TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32), + TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32), // Invalid number of pad dimensions TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32), // Mismatching shapes dimension TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32), TensorInfo(TensorShape(32U, 13U), 1, DataType::F32) // Invalid padding list @@ -68,29 +76,33 @@ DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(zip( TensorInfo(TensorShape(28U, 11U, 2U), 1, DataType::F32), TensorInfo(TensorShape(29U, 17U, 2U), 1, DataType::F32), TensorInfo(TensorShape(29U, 15U, 4U, 3U), 1, DataType::F32), + TensorInfo(TensorShape(29U, 15U, 4U, 3U), 1, DataType::F32), TensorInfo(TensorShape(29U, 17U, 2U), 1, DataType::F32), TensorInfo(TensorShape(32U, 13U), 1, DataType::F32) })), framework::dataset::make("PaddingSize", { PaddingList{{0, 0}}, PaddingList{{1, 1}}, PaddingList{{1, 1}, {2, 2}}, + PaddingList{{1,1}, {1,1}, {1,1}, {1,1}}, PaddingList{{1,1}, {1,1}, {1,1}}, PaddingList{{1, 1}, {2, 2}}, PaddingList{{0,0}, {0,0}, {1,1}} })), framework::dataset::make("PaddingMode", { PaddingMode::CONSTANT, + PaddingMode::CONSTANT, PaddingMode::CONSTANT, PaddingMode::CONSTANT, PaddingMode::SYMMETRIC, PaddingMode::REFLECT, PaddingMode::REFLECT -})), + })), framework::dataset::make("Expected", { false, - false, - true, - false, - true, - false })), + false, + true, + false, + false, + true, + false })), input_info, output_info, padding, mode, expected) { ARM_COMPUTE_EXPECT(bool(CLPadLayer::validate(&input_info.clone()->set_is_resizable(true), &output_info.clone()->set_is_resizable(true), padding, PixelValue(), mode)) == expected, framework::LogLevel::ERRORS); @@ -106,14 +118,21 @@ TEST_SUITE(Float) TEST_SUITE(FP32) FIXTURE_DATA_TEST_CASE(RunSmall, CLPaddingFixture, framework::DatasetMode::ALL, - combine(combine(combine(datasets::Small3DShapes(), framework::dataset::make("DataType", { DataType::F32 })), PaddingSizesDataset), + combine(combine(combine(datasets::Small3DShapes(), framework::dataset::make("DataType", { DataType::F32 })), PaddingSizesDataset3D), framework::dataset::make("PaddingMode", { PaddingMode::CONSTANT, PaddingMode::REFLECT, PaddingMode::SYMMETRIC }))) { // Validate output validate(CLAccessor(_target), _reference); } +FIXTURE_DATA_TEST_CASE(RunSmall4D, CLPaddingFixture, framework::DatasetMode::ALL, + combine(combine(combine(datasets::Small4DShapes(), framework::dataset::make("DataType", { DataType::F32 })), PaddingSizesDataset4D), + framework::dataset::make("PaddingMode", { PaddingMode::CONSTANT }))) +{ + // Validate output + validate(CLAccessor(_target), _reference); +} FIXTURE_DATA_TEST_CASE(RunLarge, CLPaddingFixture, framework::DatasetMode::NIGHTLY, - combine(combine(combine(datasets::Large3DShapes(), framework::dataset::make("DataType", { DataType::F32 })), PaddingSizesDataset), + combine(combine(combine(datasets::Large3DShapes(), framework::dataset::make("DataType", { DataType::F32 })), PaddingSizesDataset3D), framework::dataset::make("PaddingMode", { PaddingMode::CONSTANT, PaddingMode::REFLECT, PaddingMode::SYMMETRIC }))) { // Validate output @@ -123,7 +142,7 @@ TEST_SUITE_END() // FP32 TEST_SUITE(FP16) FIXTURE_DATA_TEST_CASE(RunLarge, CLPaddingFixture, framework::DatasetMode::NIGHTLY, - combine(combine(combine(datasets::Large3DShapes(), framework::dataset::make("DataType", { DataType::F16 })), PaddingSizesDataset), + combine(combine(combine(datasets::Large3DShapes(), framework::dataset::make("DataType", { DataType::F16 })), PaddingSizesDataset3D), framework::dataset::make("PaddingMode", { PaddingMode::CONSTANT, PaddingMode::REFLECT }))) { // Validate output @@ -135,14 +154,21 @@ TEST_SUITE_END() // Float TEST_SUITE(Quantized) TEST_SUITE(QASYMM8) FIXTURE_DATA_TEST_CASE(RunSmall, CLPaddingFixture, framework::DatasetMode::PRECOMMIT, - combine(combine(combine(datasets::Small3DShapes(), framework::dataset::make("DataType", { DataType::QASYMM8 })), PaddingSizesDataset), + combine(combine(combine(datasets::Small3DShapes(), framework::dataset::make("DataType", { DataType::QASYMM8 })), PaddingSizesDataset3D), framework::dataset::make("PaddingMode", { PaddingMode::CONSTANT, PaddingMode::REFLECT }))) { // Validate output validate(CLAccessor(_target), _reference); } +FIXTURE_DATA_TEST_CASE(RunSmall4D, CLPaddingFixture, framework::DatasetMode::PRECOMMIT, + combine(combine(combine(datasets::Small4DShapes(), framework::dataset::make("DataType", { DataType::QASYMM8 })), PaddingSizesDataset4D), + framework::dataset::make("PaddingMode", { PaddingMode::CONSTANT }))) +{ + // Validate output + validate(CLAccessor(_target), _reference); +} FIXTURE_DATA_TEST_CASE(RunLarge, CLPaddingFixture, framework::DatasetMode::NIGHTLY, - combine(combine(combine(datasets::Large3DShapes(), framework::dataset::make("DataType", { DataType::QASYMM8 })), PaddingSizesDataset), + combine(combine(combine(datasets::Large3DShapes(), framework::dataset::make("DataType", { DataType::QASYMM8 })), PaddingSizesDataset3D), framework::dataset::make("PaddingMode", { PaddingMode::CONSTANT, PaddingMode::REFLECT }))) { // Validate output -- cgit v1.2.1