From 0b72aa4b2abdba7ab48aaa8a45c624ba1e27a411 Mon Sep 17 00:00:00 2001 From: Gunes Bayir Date: Sat, 7 Oct 2023 23:52:48 +0100 Subject: Optimize NEStackLayer Optimize the stack operation in Cpu by leveraging block memcpy. Resolves: COMPMID-6498 Change-Id: I49d79d179f0375a73d654edd59fb33072112569b Signed-off-by: Gunes Bayir Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/10451 Reviewed-by: SiCong Li Comments-Addressed: Arm Jenkins Tested-by: Arm Jenkins Benchmark: Arm Jenkins --- arm_compute/runtime/NEON/functions/NEStackLayer.h | 13 +- docs/user_guide/release_version_and_change_log.dox | 1 + src/core/NEON/kernels/NEStackLayerKernel.cpp | 196 +++++++++++++------ src/core/NEON/kernels/NEStackLayerKernel.h | 62 +++--- src/runtime/NEON/functions/NEStackLayer.cpp | 31 +-- tests/validation/NEON/StackLayer.cpp | 211 ++++++++++++--------- tests/validation/fixtures/StackLayerFixture.h | 34 +++- 7 files changed, 335 insertions(+), 213 deletions(-) diff --git a/arm_compute/runtime/NEON/functions/NEStackLayer.h b/arm_compute/runtime/NEON/functions/NEStackLayer.h index ae4e468f21..98dacde0c1 100644 --- a/arm_compute/runtime/NEON/functions/NEStackLayer.h +++ b/arm_compute/runtime/NEON/functions/NEStackLayer.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2021 Arm Limited. + * Copyright (c) 2018-2021, 2023 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -21,8 +21,8 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#ifndef ARM_COMPUTE_NESTACKLAYER_H -#define ARM_COMPUTE_NESTACKLAYER_H +#ifndef ACL_ARM_COMPUTE_RUNTIME_NEON_FUNCTIONS_NESTACKLAYER_H +#define ACL_ARM_COMPUTE_RUNTIME_NEON_FUNCTIONS_NESTACKLAYER_H #include "arm_compute/core/Types.h" #include "arm_compute/runtime/IFunction.h" @@ -91,9 +91,8 @@ public: void run() override; private: - std::vector _input; - std::vector> _stack_kernels; - unsigned int _num_inputs; + std::unique_ptr _stack_kernel; + bool _is_prepared; }; } // namespace arm_compute -#endif /* ARM_COMPUTE_NESTACKLAYER_H */ +#endif // ACL_ARM_COMPUTE_RUNTIME_NEON_FUNCTIONS_NESTACKLAYER_H diff --git a/docs/user_guide/release_version_and_change_log.dox b/docs/user_guide/release_version_and_change_log.dox index 882244d2f2..d1429b61d7 100644 --- a/docs/user_guide/release_version_and_change_log.dox +++ b/docs/user_guide/release_version_and_change_log.dox @@ -55,6 +55,7 @@ v23.11 Public major release - Performance optimizations: - Optimize @ref cpu::CpuReshape - Optimize @ref opencl::ClTranspose + - Optimize @ref NEStackLayer - Add new OpenCLâ„¢ kernels: - @ref opencl::kernels::ClMatMulLowpNativeMMULKernel support for QASYMM8 and QASYMM8_SIGNED, with batch support - Deprecate support for Bfloat16 in @ref cpu::CpuCast. diff --git a/src/core/NEON/kernels/NEStackLayerKernel.cpp b/src/core/NEON/kernels/NEStackLayerKernel.cpp index e23b40a9aa..225e4fcfd2 100644 --- a/src/core/NEON/kernels/NEStackLayerKernel.cpp +++ b/src/core/NEON/kernels/NEStackLayerKernel.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2021 Arm Limited. + * Copyright (c) 2018-2021, 2023 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -33,6 +33,7 @@ #include "arm_compute/core/Window.h" #include "src/core/helpers/AutoConfiguration.h" +#include "src/core/helpers/Utils.h" #include "src/core/helpers/WindowHelpers.h" namespace arm_compute @@ -42,9 +43,10 @@ using namespace arm_compute::misc::shape_calculator; namespace { Status validate_arguments(const ITensorInfo *input, - unsigned int axis, - unsigned int idx_input, - unsigned int num_tensors, + uint32_t axis, + uint32_t idx_input, + uint32_t num_tensors, + uint32_t rank, const ITensorInfo *output) { ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, output); @@ -53,6 +55,7 @@ Status validate_arguments(const ITensorInfo *input, ARM_COMPUTE_RETURN_ERROR_ON(idx_input >= num_tensors); ARM_COMPUTE_RETURN_ERROR_ON(axis > input->num_dimensions()); ARM_COMPUTE_RETURN_ERROR_ON(input->num_dimensions() > 4); + ARM_COMPUTE_RETURN_ERROR_ON(input->num_dimensions() != rank); if (output->total_size() != 0) { @@ -65,93 +68,162 @@ Status validate_arguments(const ITensorInfo *input, return Status{}; } -std::pair -validate_and_configure_window(ITensorInfo *input, unsigned int axis, unsigned int num_tensors, ITensorInfo *output) +inline Coordinates +shift_from_axis_and_replace_coordinate(const Coordinates &id, uint32_t axis, uint32_t idx_input, uint32_t num_dims) { - // Output auto inizialitation if not yet initialized - auto_init_if_empty(*output, input->clone()->set_tensor_shape(compute_stack_shape(*input, axis, num_tensors))); + Coordinates id_out = id; + for (uint32_t i = num_dims; i > axis; --i) + { + id_out.set(i, id[i - 1]); + } + id_out.set(axis, idx_input); + return id_out; +} - // Configure kernel window - Window win = calculate_max_window(*input); +void elementwise_stack(const std::vector &input, ITensor *output, uint32_t axis, const Window &window) +{ + Window window_out; + window_out.use_tensor_dimensions(output->info()->tensor_shape()); + + const int32_t num_tensors = input.size(); + const size_t element_size = input[0]->info()->element_size(); + const uint32_t num_dims = static_cast(input[0]->info()->num_dimensions()); - return std::make_pair(Status{}, win); + for (int32_t idx_input = 0; idx_input < num_tensors; ++idx_input) + { + Iterator input_it(input[idx_input], window); + + execute_window_loop( + window, + [&](const Coordinates &id) + { + Coordinates id_out = shift_from_axis_and_replace_coordinate(id, axis, idx_input, num_dims); + std::memcpy(output->ptr_to_element(id_out), input_it.ptr(), element_size); + }, + input_it); + } } -inline Coordinates -shift_from_axis_and_replace_coordinate(const Coordinates &id, unsigned int axis, unsigned int idx_input) +void memcpy_stack(const std::vector &input, ITensor *output, uint32_t axis, const Window &window) { - constexpr int max_out_coord = 5; // Input shape is max a 4D shape, output is max 5D - Coordinates id_out = id; - for (unsigned int i = max_out_coord - 1; i > axis; --i) + const int32_t element_size = input[0]->info()->element_size(); + const int32_t chunk_size = input[0]->info()->tensor_shape().total_size_lower(axis) * element_size; + const int32_t num_tensors = input.size(); + const int32_t out_chunk_step = chunk_size * num_tensors; + + const int32_t start_x = window.x().start(); + const int32_t end_x = window.x().end(); + const int32_t start_y = window.y().start(); + const int32_t end_y = window.y().end(); + + uint8_t *out_ptr_base = output->buffer() + output->info()->offset_first_element_in_bytes() + start_x * chunk_size; + + for (int32_t x = start_x; x < end_x; ++x) { - id_out.set(i, id[i - 1]); + const uint8_t *in_ptr = + input[x]->buffer() + input[x]->info()->offset_first_element_in_bytes() + start_y * chunk_size; + uint8_t *out_ptr = out_ptr_base + start_y * out_chunk_step; + + for (int32_t y = start_y; y < end_y; ++y) + { + std::memcpy(out_ptr, in_ptr, chunk_size); + + in_ptr += chunk_size; + out_ptr += out_chunk_step; + } + + out_ptr_base += chunk_size; } - id_out.set(axis, idx_input); - return id_out; } + } // namespace -NEStackLayerKernel::NEStackLayerKernel() : _input(nullptr), _output(nullptr), _axis(), _idx_input() +NEStackLayerKernel::NEStackLayerKernel() : _input(), _output(nullptr), _axis(), _split_dimension(Window::DimY) { } -void NEStackLayerKernel::configure( - const ITensor *input, unsigned int axis, unsigned int idx_input, unsigned int num_tensors, ITensor *output) +void NEStackLayerKernel::configure(const std::vector &input, uint32_t axis, ITensor *output) { - ARM_COMPUTE_ERROR_ON_NULLPTR(input, output); - ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input->info(), axis, idx_input, num_tensors, output->info())); + ARM_COMPUTE_ERROR_ON_NULLPTR(output); - _input = input; - _output = output; - _axis = axis; - _idx_input = idx_input; + const int32_t num_tensors = input.size(); + ARM_COMPUTE_ERROR_ON(num_tensors == 0); - // Configure kernel window - auto win_config = validate_and_configure_window(input->info(), axis, num_tensors, output->info()); + const uint32_t rank = input[0]->info()->num_dimensions(); + ARM_COMPUTE_UNUSED(rank); - ARM_COMPUTE_ERROR_THROW_ON(win_config.first); - INEKernel::configure(win_config.second); + for (int32_t i = 0; i < num_tensors; ++i) + { + ARM_COMPUTE_ERROR_ON_NULLPTR(input[i]); + ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input[i]->info(), axis, i, num_tensors, rank, output->info())); + } + + auto_init_if_empty(*output->info(), input[0]->info()->clone()->set_tensor_shape( + compute_stack_shape(*input[0]->info(), axis, num_tensors))); + + _input = input; + _output = output; + _axis = axis; } -Status NEStackLayerKernel::validate(const ITensorInfo *input, - unsigned int axis, - unsigned int idx_input, - unsigned int num_tensors, - const ITensorInfo *output) +Status NEStackLayerKernel::validate(const std::vector &input, uint32_t axis, const ITensorInfo *output) { - ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input, axis, idx_input, num_tensors, output)); - ARM_COMPUTE_RETURN_ON_ERROR( - validate_and_configure_window(input->clone().get(), axis, num_tensors, output->clone().get()).first); + const int32_t num_tensors = input.size(); + const size_t rank = input[0]->num_dimensions(); + + for (int32_t i = 0; i < num_tensors; ++i) + { + ARM_COMPUTE_ERROR_ON_NULLPTR(input[i]); + ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input[i], axis, i, num_tensors, rank, output)); + } + return Status{}; } +void NEStackLayerKernel::prepare() +{ + // Prepare calculates the window at runtime, in case there is padding being added after configure() + const ITensorInfo *input_info = _input[0]->info(); + const int32_t num_dims = input_info->num_dimensions(); + const int32_t num_tensors = _input.size(); + + // Check if there are any paddings in the input tensors + bool has_padding = false; + for (const ITensor *in : _input) + { + if (has_holes(*in->info(), num_dims - 1)) + { + has_padding = true; + break; + } + } + + has_padding = has_padding || has_holes(*_output->info(), num_dims); + + Window win; + if (!has_padding) + { + _stack_fn = memcpy_stack; + + // 2D execution window (X,Y): [Num_tensors, Dimensions >= axis] + win.set(Window::DimX, Window::Dimension(0, num_tensors, 1)); + win.set(Window::DimY, Window::Dimension(0, input_info->tensor_shape().total_size_upper(_axis), 1)); + } + else + { + _stack_fn = elementwise_stack; + win = calculate_max_window(*input_info); + } + + INEKernel::configure(win); +} + void NEStackLayerKernel::run(const Window &window, const ThreadInfo &info) { ARM_COMPUTE_UNUSED(info); ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(INEKernel::window(), window); - Window window_out; - window_out.use_tensor_dimensions(_output->info()->tensor_shape()); - - Iterator input(_input, window); - Iterator output(_output, window_out); - - const int stride_x = _output->info()->strides_in_bytes()[0]; - const int stride_y = _output->info()->num_dimensions() >= 1 ? _output->info()->strides_in_bytes()[1] : 0; - const int stride_z = _output->info()->num_dimensions() >= 2 ? _output->info()->strides_in_bytes()[2] : 0; - const int stride_w = _output->info()->num_dimensions() >= 3 ? _output->info()->strides_in_bytes()[3] : 0; - const int stride_k = _output->info()->num_dimensions() >= 4 ? _output->info()->strides_in_bytes()[4] : 0; - - execute_window_loop( - window, - [&](const Coordinates &id) - { - Coordinates id_out = shift_from_axis_and_replace_coordinate(id, _axis, _idx_input); - const int idx = id_out[0] * stride_x + id_out[1] * stride_y + id_out[2] * stride_z + id_out[3] * stride_w + - id_out[4] * stride_k; - std::memcpy(output.ptr() + idx, input.ptr(), _input->info()->element_size()); - }, - input); + _stack_fn(_input, _output, _axis, window); } } // namespace arm_compute diff --git a/src/core/NEON/kernels/NEStackLayerKernel.h b/src/core/NEON/kernels/NEStackLayerKernel.h index 685812b56d..02ee776ea4 100644 --- a/src/core/NEON/kernels/NEStackLayerKernel.h +++ b/src/core/NEON/kernels/NEStackLayerKernel.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2021 Arm Limited. + * Copyright (c) 2018-2021, 2023 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -22,13 +22,16 @@ * SOFTWARE. */ -#ifndef ARM_COMPUTE_NESTACKLAYERKERNEL_H -#define ARM_COMPUTE_NESTACKLAYERKERNEL_H +#ifndef ACL_SRC_CORE_NEON_KERNELS_NESTACKLAYERKERNEL_H +#define ACL_SRC_CORE_NEON_KERNELS_NESTACKLAYERKERNEL_H #include "arm_compute/core/Types.h" #include "src/core/NEON/INEKernel.h" +#include +#include + namespace arm_compute { class ITensor; @@ -57,43 +60,48 @@ public: * * @note Supported input tensor rank: up to 4 * - * @param[in] input Input tensor. Data types supported: All - * @param[in] axis The dimension to stack the tensors along. It must be smaller than the number of input dimensions. - * @param[in] idx_input Index of the input tensor in the list of tensors to stack. - * All tensors in the list must have the same shape - * @param[in] num_tensors Number of tensors to stack - * @param[out] output Output tensor. Data types supported: Same as @p input. + * @param[in] input Input tensors. Data types supported: All + * @param[in] axis The dimension to stack the tensors along. It must be smaller than the number of input dimensions. + * @param[out] output Output tensor. Data types supported: Same as @p input. * */ - void configure( - const ITensor *input, unsigned int axis, unsigned int idx_input, unsigned int num_tensors, ITensor *output); + void configure(const std::vector &input, uint32_t axis, ITensor *output); /** Static function to check if given info will lead to a valid configuration of @ref NEStackLayerKernel * * @note Supported input tensor rank: up to 4 * - * @param[in] input Input tensor info. Data types supported: All - * @param[in] axis The dimension to stack the tensors along. It must be smaller than the number of input dimensions. - * @param[in] idx_input Index of the input tensor in the list of tensors to stack - * All tensors in the list must have the same shape - * @param[in] num_tensors Number of tensors to stack - * @param[in] output Output tensor info. Data types supported: Same as @p input. + * @param[in] input Input tensor infos. Data types supported: All + * @param[in] axis The dimension to stack the tensors along. It must be smaller than the number of input dimensions. + * @param[in] output Output tensor info. Data types supported: Same as @p input. * * @return a status */ - static Status validate(const ITensorInfo *input, - unsigned int axis, - unsigned int idx_input, - unsigned int num_tensors, - const ITensorInfo *output); + static Status validate(const std::vector &input, uint32_t axis, const ITensorInfo *output); + + /** Prepare the reshape kernel for execution (Only executed once) for + * choosing the window and the algorithm. + */ + void prepare(); // Inherited methods overridden void run(const Window &window, const ThreadInfo &info) override; + /** Get the dimension to split the kernel workload + * + * @return the split dimension + */ + uint32_t get_split_dimension() const + { + return _split_dimension; + } + private: - const ITensor *_input; - ITensor *_output; - unsigned int _axis; - unsigned int _idx_input; + std::vector _input; + ITensor *_output; + uint32_t _axis; + uint32_t _split_dimension; + + std::function &, ITensor *, uint32_t, const Window &)> _stack_fn{}; }; } // namespace arm_compute -#endif /* ARM_COMPUTE_NESTACKLAYERKERNEL_H */ +#endif // ACL_SRC_CORE_NEON_KERNELS_NESTACKLAYERKERNEL_H diff --git a/src/runtime/NEON/functions/NEStackLayer.cpp b/src/runtime/NEON/functions/NEStackLayer.cpp index 03e7026691..2f88ffca2a 100644 --- a/src/runtime/NEON/functions/NEStackLayer.cpp +++ b/src/runtime/NEON/functions/NEStackLayer.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2021 Arm Limited. + * Copyright (c) 2018-2021, 2023 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -39,7 +39,7 @@ namespace arm_compute NEStackLayer::~NEStackLayer() = default; NEStackLayer::NEStackLayer() // NOLINT - : _input(), _stack_kernels(), _num_inputs(0) + : _stack_kernel(std::make_unique()), _is_prepared(false) { } @@ -47,17 +47,10 @@ void NEStackLayer::configure(const std::vector &input, int axis, ITen { ARM_COMPUTE_LOG_PARAMS(input, axis, output); - _num_inputs = input.size(); - _stack_kernels.resize(_num_inputs); - // Wrap around negative values const unsigned int axis_u = wrap_around(axis, static_cast(input[0]->info()->num_dimensions() + 1)); - for (unsigned int i = 0; i < _num_inputs; i++) - { - _stack_kernels[i] = std::make_unique(); - _stack_kernels[i]->configure(input[i], axis_u, i, _num_inputs, output); - } + _stack_kernel->configure(input, axis_u, output); } Status NEStackLayer::validate(const std::vector &input, int axis, const ITensorInfo *output) @@ -69,24 +62,20 @@ Status NEStackLayer::validate(const std::vector &input, int axis, const size_t rank = input[0]->num_dimensions(); const unsigned int axis_u = wrap_around(axis, static_cast(rank + 1)); - const unsigned int num_inputs = input.size(); - - for (unsigned int i = 0; i < num_inputs; i++) - { - // All the tensors must have the same rank - ARM_COMPUTE_RETURN_ERROR_ON(input[i]->num_dimensions() != rank); - // Validate Kernel - ARM_COMPUTE_RETURN_ON_ERROR(NEStackLayerKernel::validate(input[i], axis_u, i, num_inputs, output)); - } + // Validate Kernel + ARM_COMPUTE_RETURN_ON_ERROR(NEStackLayerKernel::validate(input, axis_u, output)); return Status{}; } void NEStackLayer::run() { - for (unsigned i = 0; i < _num_inputs; i++) + if (!_is_prepared) { - NEScheduler::get().schedule(_stack_kernels[i].get(), Window::DimY); + _stack_kernel->prepare(); + _is_prepared = true; } + + NEScheduler::get().schedule(_stack_kernel.get(), _stack_kernel->get_split_dimension()); } } // namespace arm_compute diff --git a/tests/validation/NEON/StackLayer.cpp b/tests/validation/NEON/StackLayer.cpp index d88f713ccd..3828010c7b 100644 --- a/tests/validation/NEON/StackLayer.cpp +++ b/tests/validation/NEON/StackLayer.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2021 Arm Limited. + * Copyright (c) 2018-2021, 2023 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -44,69 +44,74 @@ namespace test { namespace validation { + +using framework::dataset::make; namespace { // *INDENT-OFF* // clang-format off /** Data types */ -const auto data_types = framework::dataset::make("DataType", { DataType::QASYMM8, DataType::F16, DataType::F32 }); +const auto data_types = make("DataType", { DataType::QASYMM8, DataType::F16, DataType::F32 }); /** Num tensors values to test */ -const auto n_values = framework::dataset::make("NumTensors", { 3, 4 }); +const auto n_values = make("NumTensors", { 3, 4 }); /** Shapes 1D to test */ -const auto shapes_1d_small = combine(datasets::Small1DShapes(), framework::dataset::make("Axis", -1, 2)); +const auto shapes_1d_small = combine(datasets::Small1DShapes(), make("Axis", -1, 2)); /** Shapes 2D to test */ -const auto shapes_2d_small = combine(datasets::Small2DShapes(), framework::dataset::make("Axis", -2, 3)); +const auto shapes_2d_small = combine(datasets::Small2DShapes(), make("Axis", -2, 3)); /** Shapes 3D to test */ -const auto shapes_3d_small = combine(datasets::Small3DShapes(), framework::dataset::make("Axis", -3, 4)); +const auto shapes_3d_small = combine(datasets::Small3DShapes(), make("Axis", -3, 4)); /** Shapes 4D to test */ -const auto shapes_4d_small = combine(datasets::Small4DShapes(), framework::dataset::make("Axis", -4, 5)); +const auto shapes_4d_small = combine(datasets::Small4DShapes(), make("Axis", -4, 5)); /** Shapes 1D to test */ -const auto shapes_1d_large = combine(datasets::Large1DShapes(), framework::dataset::make("Axis", -1, 2)); +const auto shapes_1d_large = combine(datasets::Large1DShapes(), make("Axis", -1, 2)); /** Shapes 2D to test */ -const auto shapes_2d_large = combine(datasets::Medium2DShapes(), framework::dataset::make("Axis", -2, 3)); +const auto shapes_2d_large = combine(datasets::Medium2DShapes(), make("Axis", -2, 3)); /** Shapes 3D to test */ -const auto shapes_3d_large = combine(datasets::Medium3DShapes(), framework::dataset::make("Axis", -3, 4)); +const auto shapes_3d_large = combine(datasets::Medium3DShapes(), make("Axis", -3, 4)); /** Shapes 4D to test */ -const auto shapes_4d_large = combine(datasets::Medium4DShapes(), framework::dataset::make("Axis", -4, 5)); +const auto shapes_4d_large = combine(datasets::Medium4DShapes(), make("Axis", -4, 5)); } // namespace /** Fixture to use */ template using NEStackLayerFixture = StackLayerValidationFixture; +template +using NEStackLayerWithPaddingFixture = StackLayerWithPaddingValidationFixture; + using namespace arm_compute::misc::shape_calculator; TEST_SUITE(NEON) TEST_SUITE(StackLayer) -DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip( - framework::dataset::make("InputInfo", +DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip( +make("InputInfo", { std::vector{ TensorInfo(TensorShape(9U, 8U), 1, DataType::U8) }, - std::vector{ TensorInfo(TensorShape(1U, 2U), 1, DataType::U8) , TensorInfo(TensorShape(1U, 2U), 1, DataType::U8), TensorInfo(TensorShape(1U, 2U), 1, DataType::U8)}, + std::vector{ TensorInfo(TensorShape(1U, 2U), 1, DataType::U8) , TensorInfo(TensorShape(1U, 2U), 1, DataType::U8), TensorInfo(TensorShape(1U, 2U), 1, DataType::U8)}, std::vector{ TensorInfo(TensorShape(2U, 3U), 1, DataType::S32) }, - std::vector{ TensorInfo(TensorShape(7U, 5U, 3U, 8U, 2U), 1, DataType::S32), TensorInfo(TensorShape(7U, 5U, 3U, 8U, 2U), 1, DataType::S32)}, + std::vector{ TensorInfo(TensorShape(7U, 5U, 3U, 8U, 2U), 1, DataType::S32), TensorInfo(TensorShape(7U, 5U, 3U, 8U, 2U), 1, DataType::S32)}, std::vector{ TensorInfo(TensorShape(9U, 8U), 1, DataType::S32) }, }), -framework::dataset::make("OutputInfo", +make("OutputInfo", { TensorInfo(TensorShape(1U, 9U, 8U), 1, DataType::U8), // Passes, stack 1 tensor on x axis TensorInfo(TensorShape(1U, 3U, 2U), 1, DataType::U8), // Passes, stack 3 tensors on y axis TensorInfo(TensorShape(1U, 2U, 3U), 1, DataType::S32), // fails axis < (- input's rank) TensorInfo(TensorShape(3U, 7U, 5U), 1, DataType::S32), // fails, input dimensions > 4 TensorInfo(TensorShape(1U, 2U, 3U), 1, DataType::U8), // fails mismatching data types -})), -framework::dataset::make("Axis", { -3, 1, -4, -3, 1 })), -framework::dataset::make("Expected", { true, true, false, false, false })), +}), +make("Axis", { -3, 1, -4, -3, 1 }), +make("Expected", { true, true, false, false, false })), input_info, output_info, axis, expected) { std::vector ti(input_info); @@ -121,18 +126,18 @@ input_info, output_info, axis, expected) TEST_SUITE(Shapes1D) TEST_SUITE(S32) FIXTURE_DATA_TEST_CASE(RunSmall, NEStackLayerFixture, framework::DatasetMode::ALL, - combine(combine(shapes_1d_small, - framework::dataset::make("DataType", { DataType::S32 })), - n_values)) + combine(shapes_1d_small, + make("DataType", { DataType::S32 }), + n_values)) { // Validate output validate(Accessor(_target), _reference); } FIXTURE_DATA_TEST_CASE(RunLarge, NEStackLayerFixture, framework::DatasetMode::NIGHTLY, - combine(combine(shapes_1d_large, - framework::dataset::make("DataType", { DataType::S32 })), - n_values)) + combine(shapes_1d_large, + make("DataType", { DataType::S32 }), + n_values)) { // Validate output validate(Accessor(_target), _reference); @@ -141,18 +146,18 @@ TEST_SUITE_END() // S32 TEST_SUITE(S16) FIXTURE_DATA_TEST_CASE(RunSmall, NEStackLayerFixture, framework::DatasetMode::ALL, - combine(combine(shapes_1d_small, - framework::dataset::make("DataType", { DataType::S16 })), - n_values)) + combine(shapes_1d_small, + make("DataType", { DataType::S16 }), + n_values)) { // Validate output validate(Accessor(_target), _reference); } FIXTURE_DATA_TEST_CASE(RunLarge, NEStackLayerFixture, framework::DatasetMode::NIGHTLY, - combine(combine(shapes_1d_large, - framework::dataset::make("DataType", { DataType::S16 })), - n_values)) + combine(shapes_1d_large, + make("DataType", { DataType::S16 }), + n_values)) { // Validate output validate(Accessor(_target), _reference); @@ -161,18 +166,18 @@ TEST_SUITE_END() // S16 TEST_SUITE(S8) FIXTURE_DATA_TEST_CASE(RunSmall, NEStackLayerFixture, framework::DatasetMode::ALL, - combine(combine(shapes_1d_small, - framework::dataset::make("DataType", { DataType::S8 })), - n_values)) + combine(shapes_1d_small, + make("DataType", { DataType::S8 }), + n_values)) { // Validate output validate(Accessor(_target), _reference); } FIXTURE_DATA_TEST_CASE(RunLarge, NEStackLayerFixture, framework::DatasetMode::NIGHTLY, - combine(combine(shapes_1d_large, - framework::dataset::make("DataType", { DataType::S8 })), - n_values)) + combine(shapes_1d_large, + make("DataType", { DataType::S8 }), + n_values)) { // Validate output validate(Accessor(_target), _reference); @@ -183,18 +188,18 @@ TEST_SUITE_END() // Shapes1D TEST_SUITE(Shapes2D) TEST_SUITE(S32) FIXTURE_DATA_TEST_CASE(RunSmall, NEStackLayerFixture, framework::DatasetMode::ALL, - combine(combine(shapes_2d_small, - framework::dataset::make("DataType", { DataType::S32 })), - n_values)) + combine(shapes_2d_small, + make("DataType", { DataType::S32 }), + n_values)) { // Validate output validate(Accessor(_target), _reference); } FIXTURE_DATA_TEST_CASE(RunLarge, NEStackLayerFixture, framework::DatasetMode::NIGHTLY, - combine(combine(shapes_2d_large, - framework::dataset::make("DataType", { DataType::S32 })), - n_values)) + combine(shapes_2d_large, + make("DataType", { DataType::S32 }), + n_values)) { // Validate output validate(Accessor(_target), _reference); @@ -203,18 +208,18 @@ TEST_SUITE_END() // S32 TEST_SUITE(S16) FIXTURE_DATA_TEST_CASE(RunSmall, NEStackLayerFixture, framework::DatasetMode::ALL, - combine(combine(shapes_2d_small, - framework::dataset::make("DataType", { DataType::S16 })), - n_values)) + combine(shapes_2d_small, + make("DataType", { DataType::S16 }), + n_values)) { // Validate output validate(Accessor(_target), _reference); } FIXTURE_DATA_TEST_CASE(RunLarge, NEStackLayerFixture, framework::DatasetMode::NIGHTLY, - combine(combine(shapes_2d_large, - framework::dataset::make("DataType", { DataType::S16 })), - n_values)) + combine(shapes_2d_large, + make("DataType", { DataType::S16 }), + n_values)) { // Validate output validate(Accessor(_target), _reference); @@ -223,18 +228,18 @@ TEST_SUITE_END() // S16 TEST_SUITE(S8) FIXTURE_DATA_TEST_CASE(RunSmall, NEStackLayerFixture, framework::DatasetMode::ALL, - combine(combine(shapes_2d_small, - framework::dataset::make("DataType", { DataType::S8 })), - n_values)) + combine(shapes_2d_small, + make("DataType", { DataType::S8 }), + n_values)) { // Validate output validate(Accessor(_target), _reference); } FIXTURE_DATA_TEST_CASE(RunLarge, NEStackLayerFixture, framework::DatasetMode::NIGHTLY, - combine(combine(shapes_2d_large, - framework::dataset::make("DataType", { DataType::S8 })), - n_values)) + combine(shapes_2d_large, + make("DataType", { DataType::S8 }), + n_values)) { // Validate output validate(Accessor(_target), _reference); @@ -245,18 +250,18 @@ TEST_SUITE_END() // Shapes2D TEST_SUITE(Shapes3D) TEST_SUITE(S32) FIXTURE_DATA_TEST_CASE(RunSmall, NEStackLayerFixture, framework::DatasetMode::ALL, - combine(combine(shapes_3d_small, - framework::dataset::make("DataType", { DataType::S32 })), - n_values)) + combine(shapes_3d_small, + make("DataType", { DataType::S32 }), + n_values)) { // Validate output validate(Accessor(_target), _reference); } FIXTURE_DATA_TEST_CASE(RunLarge, NEStackLayerFixture, framework::DatasetMode::NIGHTLY, - combine(combine(shapes_3d_large, - framework::dataset::make("DataType", { DataType::S32 })), - n_values)) + combine(shapes_3d_large, + make("DataType", { DataType::S32 }), + n_values)) { // Validate output validate(Accessor(_target), _reference); @@ -265,18 +270,18 @@ TEST_SUITE_END() // S32 TEST_SUITE(S16) FIXTURE_DATA_TEST_CASE(RunSmall, NEStackLayerFixture, framework::DatasetMode::ALL, - combine(combine(shapes_3d_small, - framework::dataset::make("DataType", { DataType::S16 })), - n_values)) + combine(shapes_3d_small, + make("DataType", { DataType::S16 }), + n_values)) { // Validate output validate(Accessor(_target), _reference); } FIXTURE_DATA_TEST_CASE(RunLarge, NEStackLayerFixture, framework::DatasetMode::NIGHTLY, - combine(combine(shapes_3d_large, - framework::dataset::make("DataType", { DataType::S16 })), - n_values)) + combine(shapes_3d_large, + make("DataType", { DataType::S16 }), + n_values)) { // Validate output validate(Accessor(_target), _reference); @@ -285,18 +290,18 @@ TEST_SUITE_END() // S16 TEST_SUITE(S8) FIXTURE_DATA_TEST_CASE(RunSmall, NEStackLayerFixture, framework::DatasetMode::ALL, - combine(combine(shapes_3d_small, - framework::dataset::make("DataType", { DataType::S8 })), - n_values)) + combine(shapes_3d_small, + make("DataType", { DataType::S8 }), + n_values)) { // Validate output validate(Accessor(_target), _reference); } FIXTURE_DATA_TEST_CASE(RunLarge, NEStackLayerFixture, framework::DatasetMode::NIGHTLY, - combine(combine(shapes_3d_large, - framework::dataset::make("DataType", { DataType::S8 })), - n_values)) + combine(shapes_3d_large, + make("DataType", { DataType::S8 }), + n_values)) { // Validate output validate(Accessor(_target), _reference); @@ -307,18 +312,29 @@ TEST_SUITE_END() // Shapes3D TEST_SUITE(Shapes4D) TEST_SUITE(S32) FIXTURE_DATA_TEST_CASE(RunSmall, NEStackLayerFixture, framework::DatasetMode::ALL, - combine(combine(shapes_4d_small, - framework::dataset::make("DataType", { DataType::S32 })), - n_values)) + combine(shapes_4d_small, + make("DataType", { DataType::S32 }), + n_values)) +{ + // Validate output + validate(Accessor(_target), _reference); +} + +// Testing the case with padding for only 4d shapes and for one data type. This is because the underlying code +// path depends only on the padding, which isn't affected by the shapes or data types. +FIXTURE_DATA_TEST_CASE(RunSmallWithPadding, NEStackLayerWithPaddingFixture, framework::DatasetMode::ALL, + combine(shapes_4d_small, + make("DataType", { DataType::S32 }), + n_values)) { // Validate output validate(Accessor(_target), _reference); } FIXTURE_DATA_TEST_CASE(RunLarge, NEStackLayerFixture, framework::DatasetMode::NIGHTLY, - combine(combine(shapes_4d_large, - framework::dataset::make("DataType", { DataType::S32 })), - n_values)) + combine(shapes_4d_large, + make("DataType", { DataType::S32 }), + n_values)) { // Validate output validate(Accessor(_target), _reference); @@ -327,18 +343,18 @@ TEST_SUITE_END() // S32 TEST_SUITE(S16) FIXTURE_DATA_TEST_CASE(RunSmall, NEStackLayerFixture, framework::DatasetMode::ALL, - combine(combine(shapes_4d_small, - framework::dataset::make("DataType", { DataType::S16 })), - n_values)) + combine(shapes_4d_small, + make("DataType", { DataType::S16 }), + n_values)) { // Validate output validate(Accessor(_target), _reference); } FIXTURE_DATA_TEST_CASE(RunLarge, NEStackLayerFixture, framework::DatasetMode::NIGHTLY, - combine(combine(shapes_4d_large, - framework::dataset::make("DataType", { DataType::S16 })), - n_values)) + combine(shapes_4d_large, + make("DataType", { DataType::S16 }), + n_values)) { // Validate output validate(Accessor(_target), _reference); @@ -347,24 +363,37 @@ TEST_SUITE_END() // S16 TEST_SUITE(S8) FIXTURE_DATA_TEST_CASE(RunSmall, NEStackLayerFixture, framework::DatasetMode::ALL, - combine(combine(shapes_4d_small, - framework::dataset::make("DataType", { DataType::S8 })), - n_values)) + combine(shapes_4d_small, + make("DataType", { DataType::S8 }), + n_values)) { // Validate output validate(Accessor(_target), _reference); } FIXTURE_DATA_TEST_CASE(RunLarge, NEStackLayerFixture, framework::DatasetMode::NIGHTLY, - combine(combine(shapes_4d_large, - framework::dataset::make("DataType", { DataType::S8 })), - n_values)) + combine(shapes_4d_large, + make("DataType", { DataType::S8 }), + n_values)) { // Validate output validate(Accessor(_target), _reference); } TEST_SUITE_END() // S8 TEST_SUITE_END() // Shapes4D + +TEST_SUITE(HighDimensional) +// The Cpu implementation supports tensors of size 4D+, but reference implementation does not. +FIXTURE_DATA_TEST_CASE(RunHighDimensional, NEStackLayerFixture, framework::DatasetMode::DISABLED, + combine(make("Shape", { TensorShape{2U, 3U, 4U, 5U, 3U} }), + make("Axis", { 5, 0, -3, 2 }), + make("DataType", { DataType::S8 }), + make("NumTensors", { 3 }))) +{ + // Validate output + validate(Accessor(_target), _reference); +} +TEST_SUITE_END() // HighDimensional TEST_SUITE_END() // StackLayer TEST_SUITE_END() // Neon } // namespace validation diff --git a/tests/validation/fixtures/StackLayerFixture.h b/tests/validation/fixtures/StackLayerFixture.h index 7320a032bd..7dd8fe47dc 100644 --- a/tests/validation/fixtures/StackLayerFixture.h +++ b/tests/validation/fixtures/StackLayerFixture.h @@ -21,8 +21,8 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#ifndef ARM_COMPUTE_TEST_STACK_LAYER_FIXTURE -#define ARM_COMPUTE_TEST_STACK_LAYER_FIXTURE +#ifndef ACL_TESTS_VALIDATION_FIXTURES_STACKLAYERFIXTURE_H +#define ACL_TESTS_VALIDATION_FIXTURES_STACKLAYERFIXTURE_H #include "arm_compute/core/Helpers.h" #include "arm_compute/core/TensorShape.h" @@ -54,7 +54,7 @@ class StackLayerValidationFixture : public framework::Fixture public: void setup(TensorShape shape_src, int axis, DataType data_type, int num_tensors) { - _target = compute_target(shape_src, axis, data_type, num_tensors); + _target = compute_target(shape_src, axis, data_type, num_tensors, false /* add_x_padding */); _reference = compute_reference(shape_src, axis, data_type, num_tensors); } @@ -65,7 +65,7 @@ protected: library->fill_tensor_uniform(tensor, i); } - TensorType compute_target(TensorShape shape_src, int axis, DataType data_type, int num_tensors) + TensorType compute_target(TensorShape shape_src, int axis, DataType data_type, int num_tensors, bool add_x_padding) { std::vector tensors(num_tensors); std::vector src(num_tensors); @@ -90,6 +90,11 @@ protected: // Allocate and fill the input tensors for(int i = 0; i < num_tensors; ++i) { + if(add_x_padding) + { + add_padding_x({&tensors[i]}, DataLayout::NHWC); + } + ARM_COMPUTE_ASSERT(tensors[i].info()->is_resizable()); tensors[i].allocator()->allocate(); ARM_COMPUTE_ASSERT(!tensors[i].info()->is_resizable()); @@ -98,6 +103,11 @@ protected: fill(AccessorType(tensors[i]), i); } + if(add_x_padding) + { + add_padding_x({&dst}, DataLayout::NHWC); + } + // Allocate output tensor dst.allocator()->allocate(); @@ -131,7 +141,21 @@ protected: TensorType _target{}; SimpleTensor _reference{}; }; + +template +class StackLayerWithPaddingValidationFixture : + public StackLayerValidationFixture +{ +public: + using Parent = StackLayerValidationFixture; + + void setup(TensorShape shape_src, int axis, DataType data_type, int num_tensors) + { + Parent::_target = Parent::compute_target(shape_src, axis, data_type, num_tensors, true /* add_x_padding */); + Parent::_reference = Parent::compute_reference(shape_src, axis, data_type, num_tensors); + } +}; } // namespace validation } // namespace test } // namespace arm_compute -#endif /* ARM_COMPUTE_TEST_STACK_LAYER_FIXTURE */ +#endif // ACL_TESTS_VALIDATION_FIXTURES_STACKLAYERFIXTURE_H -- cgit v1.2.1