From 0b72aa4b2abdba7ab48aaa8a45c624ba1e27a411 Mon Sep 17 00:00:00 2001 From: Gunes Bayir Date: Sat, 7 Oct 2023 23:52:48 +0100 Subject: Optimize NEStackLayer Optimize the stack operation in Cpu by leveraging block memcpy. Resolves: COMPMID-6498 Change-Id: I49d79d179f0375a73d654edd59fb33072112569b Signed-off-by: Gunes Bayir Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/10451 Reviewed-by: SiCong Li Comments-Addressed: Arm Jenkins Tested-by: Arm Jenkins Benchmark: Arm Jenkins --- tests/validation/NEON/StackLayer.cpp | 211 +++++++++++++++----------- tests/validation/fixtures/StackLayerFixture.h | 34 ++++- 2 files changed, 149 insertions(+), 96 deletions(-) (limited to 'tests/validation') diff --git a/tests/validation/NEON/StackLayer.cpp b/tests/validation/NEON/StackLayer.cpp index d88f713ccd..3828010c7b 100644 --- a/tests/validation/NEON/StackLayer.cpp +++ b/tests/validation/NEON/StackLayer.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2021 Arm Limited. + * Copyright (c) 2018-2021, 2023 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -44,69 +44,74 @@ namespace test { namespace validation { + +using framework::dataset::make; namespace { // *INDENT-OFF* // clang-format off /** Data types */ -const auto data_types = framework::dataset::make("DataType", { DataType::QASYMM8, DataType::F16, DataType::F32 }); +const auto data_types = make("DataType", { DataType::QASYMM8, DataType::F16, DataType::F32 }); /** Num tensors values to test */ -const auto n_values = framework::dataset::make("NumTensors", { 3, 4 }); +const auto n_values = make("NumTensors", { 3, 4 }); /** Shapes 1D to test */ -const auto shapes_1d_small = combine(datasets::Small1DShapes(), framework::dataset::make("Axis", -1, 2)); +const auto shapes_1d_small = combine(datasets::Small1DShapes(), make("Axis", -1, 2)); /** Shapes 2D to test */ -const auto shapes_2d_small = combine(datasets::Small2DShapes(), framework::dataset::make("Axis", -2, 3)); +const auto shapes_2d_small = combine(datasets::Small2DShapes(), make("Axis", -2, 3)); /** Shapes 3D to test */ -const auto shapes_3d_small = combine(datasets::Small3DShapes(), framework::dataset::make("Axis", -3, 4)); +const auto shapes_3d_small = combine(datasets::Small3DShapes(), make("Axis", -3, 4)); /** Shapes 4D to test */ -const auto shapes_4d_small = combine(datasets::Small4DShapes(), framework::dataset::make("Axis", -4, 5)); +const auto shapes_4d_small = combine(datasets::Small4DShapes(), make("Axis", -4, 5)); /** Shapes 1D to test */ -const auto shapes_1d_large = combine(datasets::Large1DShapes(), framework::dataset::make("Axis", -1, 2)); +const auto shapes_1d_large = combine(datasets::Large1DShapes(), make("Axis", -1, 2)); /** Shapes 2D to test */ -const auto shapes_2d_large = combine(datasets::Medium2DShapes(), framework::dataset::make("Axis", -2, 3)); +const auto shapes_2d_large = combine(datasets::Medium2DShapes(), make("Axis", -2, 3)); /** Shapes 3D to test */ -const auto shapes_3d_large = combine(datasets::Medium3DShapes(), framework::dataset::make("Axis", -3, 4)); +const auto shapes_3d_large = combine(datasets::Medium3DShapes(), make("Axis", -3, 4)); /** Shapes 4D to test */ -const auto shapes_4d_large = combine(datasets::Medium4DShapes(), framework::dataset::make("Axis", -4, 5)); +const auto shapes_4d_large = combine(datasets::Medium4DShapes(), make("Axis", -4, 5)); } // namespace /** Fixture to use */ template using NEStackLayerFixture = StackLayerValidationFixture; +template +using NEStackLayerWithPaddingFixture = StackLayerWithPaddingValidationFixture; + using namespace arm_compute::misc::shape_calculator; TEST_SUITE(NEON) TEST_SUITE(StackLayer) -DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip( - framework::dataset::make("InputInfo", +DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip( +make("InputInfo", { std::vector{ TensorInfo(TensorShape(9U, 8U), 1, DataType::U8) }, - std::vector{ TensorInfo(TensorShape(1U, 2U), 1, DataType::U8) , TensorInfo(TensorShape(1U, 2U), 1, DataType::U8), TensorInfo(TensorShape(1U, 2U), 1, DataType::U8)}, + std::vector{ TensorInfo(TensorShape(1U, 2U), 1, DataType::U8) , TensorInfo(TensorShape(1U, 2U), 1, DataType::U8), TensorInfo(TensorShape(1U, 2U), 1, DataType::U8)}, std::vector{ TensorInfo(TensorShape(2U, 3U), 1, DataType::S32) }, - std::vector{ TensorInfo(TensorShape(7U, 5U, 3U, 8U, 2U), 1, DataType::S32), TensorInfo(TensorShape(7U, 5U, 3U, 8U, 2U), 1, DataType::S32)}, + std::vector{ TensorInfo(TensorShape(7U, 5U, 3U, 8U, 2U), 1, DataType::S32), TensorInfo(TensorShape(7U, 5U, 3U, 8U, 2U), 1, DataType::S32)}, std::vector{ TensorInfo(TensorShape(9U, 8U), 1, DataType::S32) }, }), -framework::dataset::make("OutputInfo", +make("OutputInfo", { TensorInfo(TensorShape(1U, 9U, 8U), 1, DataType::U8), // Passes, stack 1 tensor on x axis TensorInfo(TensorShape(1U, 3U, 2U), 1, DataType::U8), // Passes, stack 3 tensors on y axis TensorInfo(TensorShape(1U, 2U, 3U), 1, DataType::S32), // fails axis < (- input's rank) TensorInfo(TensorShape(3U, 7U, 5U), 1, DataType::S32), // fails, input dimensions > 4 TensorInfo(TensorShape(1U, 2U, 3U), 1, DataType::U8), // fails mismatching data types -})), -framework::dataset::make("Axis", { -3, 1, -4, -3, 1 })), -framework::dataset::make("Expected", { true, true, false, false, false })), +}), +make("Axis", { -3, 1, -4, -3, 1 }), +make("Expected", { true, true, false, false, false })), input_info, output_info, axis, expected) { std::vector ti(input_info); @@ -121,18 +126,18 @@ input_info, output_info, axis, expected) TEST_SUITE(Shapes1D) TEST_SUITE(S32) FIXTURE_DATA_TEST_CASE(RunSmall, NEStackLayerFixture, framework::DatasetMode::ALL, - combine(combine(shapes_1d_small, - framework::dataset::make("DataType", { DataType::S32 })), - n_values)) + combine(shapes_1d_small, + make("DataType", { DataType::S32 }), + n_values)) { // Validate output validate(Accessor(_target), _reference); } FIXTURE_DATA_TEST_CASE(RunLarge, NEStackLayerFixture, framework::DatasetMode::NIGHTLY, - combine(combine(shapes_1d_large, - framework::dataset::make("DataType", { DataType::S32 })), - n_values)) + combine(shapes_1d_large, + make("DataType", { DataType::S32 }), + n_values)) { // Validate output validate(Accessor(_target), _reference); @@ -141,18 +146,18 @@ TEST_SUITE_END() // S32 TEST_SUITE(S16) FIXTURE_DATA_TEST_CASE(RunSmall, NEStackLayerFixture, framework::DatasetMode::ALL, - combine(combine(shapes_1d_small, - framework::dataset::make("DataType", { DataType::S16 })), - n_values)) + combine(shapes_1d_small, + make("DataType", { DataType::S16 }), + n_values)) { // Validate output validate(Accessor(_target), _reference); } FIXTURE_DATA_TEST_CASE(RunLarge, NEStackLayerFixture, framework::DatasetMode::NIGHTLY, - combine(combine(shapes_1d_large, - framework::dataset::make("DataType", { DataType::S16 })), - n_values)) + combine(shapes_1d_large, + make("DataType", { DataType::S16 }), + n_values)) { // Validate output validate(Accessor(_target), _reference); @@ -161,18 +166,18 @@ TEST_SUITE_END() // S16 TEST_SUITE(S8) FIXTURE_DATA_TEST_CASE(RunSmall, NEStackLayerFixture, framework::DatasetMode::ALL, - combine(combine(shapes_1d_small, - framework::dataset::make("DataType", { DataType::S8 })), - n_values)) + combine(shapes_1d_small, + make("DataType", { DataType::S8 }), + n_values)) { // Validate output validate(Accessor(_target), _reference); } FIXTURE_DATA_TEST_CASE(RunLarge, NEStackLayerFixture, framework::DatasetMode::NIGHTLY, - combine(combine(shapes_1d_large, - framework::dataset::make("DataType", { DataType::S8 })), - n_values)) + combine(shapes_1d_large, + make("DataType", { DataType::S8 }), + n_values)) { // Validate output validate(Accessor(_target), _reference); @@ -183,18 +188,18 @@ TEST_SUITE_END() // Shapes1D TEST_SUITE(Shapes2D) TEST_SUITE(S32) FIXTURE_DATA_TEST_CASE(RunSmall, NEStackLayerFixture, framework::DatasetMode::ALL, - combine(combine(shapes_2d_small, - framework::dataset::make("DataType", { DataType::S32 })), - n_values)) + combine(shapes_2d_small, + make("DataType", { DataType::S32 }), + n_values)) { // Validate output validate(Accessor(_target), _reference); } FIXTURE_DATA_TEST_CASE(RunLarge, NEStackLayerFixture, framework::DatasetMode::NIGHTLY, - combine(combine(shapes_2d_large, - framework::dataset::make("DataType", { DataType::S32 })), - n_values)) + combine(shapes_2d_large, + make("DataType", { DataType::S32 }), + n_values)) { // Validate output validate(Accessor(_target), _reference); @@ -203,18 +208,18 @@ TEST_SUITE_END() // S32 TEST_SUITE(S16) FIXTURE_DATA_TEST_CASE(RunSmall, NEStackLayerFixture, framework::DatasetMode::ALL, - combine(combine(shapes_2d_small, - framework::dataset::make("DataType", { DataType::S16 })), - n_values)) + combine(shapes_2d_small, + make("DataType", { DataType::S16 }), + n_values)) { // Validate output validate(Accessor(_target), _reference); } FIXTURE_DATA_TEST_CASE(RunLarge, NEStackLayerFixture, framework::DatasetMode::NIGHTLY, - combine(combine(shapes_2d_large, - framework::dataset::make("DataType", { DataType::S16 })), - n_values)) + combine(shapes_2d_large, + make("DataType", { DataType::S16 }), + n_values)) { // Validate output validate(Accessor(_target), _reference); @@ -223,18 +228,18 @@ TEST_SUITE_END() // S16 TEST_SUITE(S8) FIXTURE_DATA_TEST_CASE(RunSmall, NEStackLayerFixture, framework::DatasetMode::ALL, - combine(combine(shapes_2d_small, - framework::dataset::make("DataType", { DataType::S8 })), - n_values)) + combine(shapes_2d_small, + make("DataType", { DataType::S8 }), + n_values)) { // Validate output validate(Accessor(_target), _reference); } FIXTURE_DATA_TEST_CASE(RunLarge, NEStackLayerFixture, framework::DatasetMode::NIGHTLY, - combine(combine(shapes_2d_large, - framework::dataset::make("DataType", { DataType::S8 })), - n_values)) + combine(shapes_2d_large, + make("DataType", { DataType::S8 }), + n_values)) { // Validate output validate(Accessor(_target), _reference); @@ -245,18 +250,18 @@ TEST_SUITE_END() // Shapes2D TEST_SUITE(Shapes3D) TEST_SUITE(S32) FIXTURE_DATA_TEST_CASE(RunSmall, NEStackLayerFixture, framework::DatasetMode::ALL, - combine(combine(shapes_3d_small, - framework::dataset::make("DataType", { DataType::S32 })), - n_values)) + combine(shapes_3d_small, + make("DataType", { DataType::S32 }), + n_values)) { // Validate output validate(Accessor(_target), _reference); } FIXTURE_DATA_TEST_CASE(RunLarge, NEStackLayerFixture, framework::DatasetMode::NIGHTLY, - combine(combine(shapes_3d_large, - framework::dataset::make("DataType", { DataType::S32 })), - n_values)) + combine(shapes_3d_large, + make("DataType", { DataType::S32 }), + n_values)) { // Validate output validate(Accessor(_target), _reference); @@ -265,18 +270,18 @@ TEST_SUITE_END() // S32 TEST_SUITE(S16) FIXTURE_DATA_TEST_CASE(RunSmall, NEStackLayerFixture, framework::DatasetMode::ALL, - combine(combine(shapes_3d_small, - framework::dataset::make("DataType", { DataType::S16 })), - n_values)) + combine(shapes_3d_small, + make("DataType", { DataType::S16 }), + n_values)) { // Validate output validate(Accessor(_target), _reference); } FIXTURE_DATA_TEST_CASE(RunLarge, NEStackLayerFixture, framework::DatasetMode::NIGHTLY, - combine(combine(shapes_3d_large, - framework::dataset::make("DataType", { DataType::S16 })), - n_values)) + combine(shapes_3d_large, + make("DataType", { DataType::S16 }), + n_values)) { // Validate output validate(Accessor(_target), _reference); @@ -285,18 +290,18 @@ TEST_SUITE_END() // S16 TEST_SUITE(S8) FIXTURE_DATA_TEST_CASE(RunSmall, NEStackLayerFixture, framework::DatasetMode::ALL, - combine(combine(shapes_3d_small, - framework::dataset::make("DataType", { DataType::S8 })), - n_values)) + combine(shapes_3d_small, + make("DataType", { DataType::S8 }), + n_values)) { // Validate output validate(Accessor(_target), _reference); } FIXTURE_DATA_TEST_CASE(RunLarge, NEStackLayerFixture, framework::DatasetMode::NIGHTLY, - combine(combine(shapes_3d_large, - framework::dataset::make("DataType", { DataType::S8 })), - n_values)) + combine(shapes_3d_large, + make("DataType", { DataType::S8 }), + n_values)) { // Validate output validate(Accessor(_target), _reference); @@ -307,18 +312,29 @@ TEST_SUITE_END() // Shapes3D TEST_SUITE(Shapes4D) TEST_SUITE(S32) FIXTURE_DATA_TEST_CASE(RunSmall, NEStackLayerFixture, framework::DatasetMode::ALL, - combine(combine(shapes_4d_small, - framework::dataset::make("DataType", { DataType::S32 })), - n_values)) + combine(shapes_4d_small, + make("DataType", { DataType::S32 }), + n_values)) +{ + // Validate output + validate(Accessor(_target), _reference); +} + +// Testing the case with padding for only 4d shapes and for one data type. This is because the underlying code +// path depends only on the padding, which isn't affected by the shapes or data types. +FIXTURE_DATA_TEST_CASE(RunSmallWithPadding, NEStackLayerWithPaddingFixture, framework::DatasetMode::ALL, + combine(shapes_4d_small, + make("DataType", { DataType::S32 }), + n_values)) { // Validate output validate(Accessor(_target), _reference); } FIXTURE_DATA_TEST_CASE(RunLarge, NEStackLayerFixture, framework::DatasetMode::NIGHTLY, - combine(combine(shapes_4d_large, - framework::dataset::make("DataType", { DataType::S32 })), - n_values)) + combine(shapes_4d_large, + make("DataType", { DataType::S32 }), + n_values)) { // Validate output validate(Accessor(_target), _reference); @@ -327,18 +343,18 @@ TEST_SUITE_END() // S32 TEST_SUITE(S16) FIXTURE_DATA_TEST_CASE(RunSmall, NEStackLayerFixture, framework::DatasetMode::ALL, - combine(combine(shapes_4d_small, - framework::dataset::make("DataType", { DataType::S16 })), - n_values)) + combine(shapes_4d_small, + make("DataType", { DataType::S16 }), + n_values)) { // Validate output validate(Accessor(_target), _reference); } FIXTURE_DATA_TEST_CASE(RunLarge, NEStackLayerFixture, framework::DatasetMode::NIGHTLY, - combine(combine(shapes_4d_large, - framework::dataset::make("DataType", { DataType::S16 })), - n_values)) + combine(shapes_4d_large, + make("DataType", { DataType::S16 }), + n_values)) { // Validate output validate(Accessor(_target), _reference); @@ -347,24 +363,37 @@ TEST_SUITE_END() // S16 TEST_SUITE(S8) FIXTURE_DATA_TEST_CASE(RunSmall, NEStackLayerFixture, framework::DatasetMode::ALL, - combine(combine(shapes_4d_small, - framework::dataset::make("DataType", { DataType::S8 })), - n_values)) + combine(shapes_4d_small, + make("DataType", { DataType::S8 }), + n_values)) { // Validate output validate(Accessor(_target), _reference); } FIXTURE_DATA_TEST_CASE(RunLarge, NEStackLayerFixture, framework::DatasetMode::NIGHTLY, - combine(combine(shapes_4d_large, - framework::dataset::make("DataType", { DataType::S8 })), - n_values)) + combine(shapes_4d_large, + make("DataType", { DataType::S8 }), + n_values)) { // Validate output validate(Accessor(_target), _reference); } TEST_SUITE_END() // S8 TEST_SUITE_END() // Shapes4D + +TEST_SUITE(HighDimensional) +// The Cpu implementation supports tensors of size 4D+, but reference implementation does not. +FIXTURE_DATA_TEST_CASE(RunHighDimensional, NEStackLayerFixture, framework::DatasetMode::DISABLED, + combine(make("Shape", { TensorShape{2U, 3U, 4U, 5U, 3U} }), + make("Axis", { 5, 0, -3, 2 }), + make("DataType", { DataType::S8 }), + make("NumTensors", { 3 }))) +{ + // Validate output + validate(Accessor(_target), _reference); +} +TEST_SUITE_END() // HighDimensional TEST_SUITE_END() // StackLayer TEST_SUITE_END() // Neon } // namespace validation diff --git a/tests/validation/fixtures/StackLayerFixture.h b/tests/validation/fixtures/StackLayerFixture.h index 7320a032bd..7dd8fe47dc 100644 --- a/tests/validation/fixtures/StackLayerFixture.h +++ b/tests/validation/fixtures/StackLayerFixture.h @@ -21,8 +21,8 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#ifndef ARM_COMPUTE_TEST_STACK_LAYER_FIXTURE -#define ARM_COMPUTE_TEST_STACK_LAYER_FIXTURE +#ifndef ACL_TESTS_VALIDATION_FIXTURES_STACKLAYERFIXTURE_H +#define ACL_TESTS_VALIDATION_FIXTURES_STACKLAYERFIXTURE_H #include "arm_compute/core/Helpers.h" #include "arm_compute/core/TensorShape.h" @@ -54,7 +54,7 @@ class StackLayerValidationFixture : public framework::Fixture public: void setup(TensorShape shape_src, int axis, DataType data_type, int num_tensors) { - _target = compute_target(shape_src, axis, data_type, num_tensors); + _target = compute_target(shape_src, axis, data_type, num_tensors, false /* add_x_padding */); _reference = compute_reference(shape_src, axis, data_type, num_tensors); } @@ -65,7 +65,7 @@ protected: library->fill_tensor_uniform(tensor, i); } - TensorType compute_target(TensorShape shape_src, int axis, DataType data_type, int num_tensors) + TensorType compute_target(TensorShape shape_src, int axis, DataType data_type, int num_tensors, bool add_x_padding) { std::vector tensors(num_tensors); std::vector src(num_tensors); @@ -90,6 +90,11 @@ protected: // Allocate and fill the input tensors for(int i = 0; i < num_tensors; ++i) { + if(add_x_padding) + { + add_padding_x({&tensors[i]}, DataLayout::NHWC); + } + ARM_COMPUTE_ASSERT(tensors[i].info()->is_resizable()); tensors[i].allocator()->allocate(); ARM_COMPUTE_ASSERT(!tensors[i].info()->is_resizable()); @@ -98,6 +103,11 @@ protected: fill(AccessorType(tensors[i]), i); } + if(add_x_padding) + { + add_padding_x({&dst}, DataLayout::NHWC); + } + // Allocate output tensor dst.allocator()->allocate(); @@ -131,7 +141,21 @@ protected: TensorType _target{}; SimpleTensor _reference{}; }; + +template +class StackLayerWithPaddingValidationFixture : + public StackLayerValidationFixture +{ +public: + using Parent = StackLayerValidationFixture; + + void setup(TensorShape shape_src, int axis, DataType data_type, int num_tensors) + { + Parent::_target = Parent::compute_target(shape_src, axis, data_type, num_tensors, true /* add_x_padding */); + Parent::_reference = Parent::compute_reference(shape_src, axis, data_type, num_tensors); + } +}; } // namespace validation } // namespace test } // namespace arm_compute -#endif /* ARM_COMPUTE_TEST_STACK_LAYER_FIXTURE */ +#endif // ACL_TESTS_VALIDATION_FIXTURES_STACKLAYERFIXTURE_H -- cgit v1.2.1