diff options
-rw-r--r-- | docs/user_guide/release_version_and_change_log.dox | 1 | ||||
-rw-r--r-- | src/cpu/operators/CpuConv2d.cpp | 22 | ||||
-rw-r--r-- | tests/datasets/LargeConvolutionLayerDataset.h | 12 | ||||
-rw-r--r-- | tests/validation/NEON/ConvolutionLayer.cpp | 19 |
4 files changed, 48 insertions, 6 deletions
diff --git a/docs/user_guide/release_version_and_change_log.dox b/docs/user_guide/release_version_and_change_log.dox index b788957dda..21a5a368ad 100644 --- a/docs/user_guide/release_version_and_change_log.dox +++ b/docs/user_guide/release_version_and_change_log.dox @@ -43,6 +43,7 @@ If there is more than one release in a month then an extra sequential number is v24.04 Public major release - Optimize start-up time of @ref NEConvolutionLayer for some input configurations where GeMM is selected as the convolution algorithm + - Optimize @ref NEConvolutionLayer for input tensor size > 1e7 bytes and weight tensor height > 7 v24.02 Public major release - Replace template writer with compute kernel writer in dynamic fusion. diff --git a/src/cpu/operators/CpuConv2d.cpp b/src/cpu/operators/CpuConv2d.cpp index 19311733db..26ca2ee783 100644 --- a/src/cpu/operators/CpuConv2d.cpp +++ b/src/cpu/operators/CpuConv2d.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2021, 2023 Arm Limited. + * Copyright (c) 2017-2021, 2023-2024 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -209,12 +209,24 @@ ConvolutionMethod CpuConv2d::get_convolution_method(const ITensorInfo *i } else { + const bool gemmDirectConv2d_validates = + bool(CpuGemmDirectConv2d::validate(input, weights, nullptr, output, info)); + // SRGAN // Output might not be initialized when it is an internal tensor of the layer using the convolution - if (input->total_size() > 1e7 && (weights->dimension(idx_h) > 7) && - (CpuDirectConv2d::validate(input, weights, nullptr, output, conv_info, act_info))) + if (input->total_size() > 1e7 && weights->dimension(idx_h) > 7) { - return ConvolutionMethod::DIRECT; + // This configuration is memory demanding for GEMM method. GEMM_CONV2D which uses indirect convolution + // kernels underneath is the best option. + if (gemmDirectConv2d_validates) + { + return ConvolutionMethod::GEMM_CONV2D; + } + else if (bool(CpuDirectConv2d::validate(input, weights, nullptr, output, conv_info, act_info))) + { + // NCHW data layout is not supported by GEMM_CONV2D + return ConvolutionMethod::DIRECT; + } } if (input->dimension(idx_c) < 16) { @@ -270,7 +282,7 @@ ConvolutionMethod CpuConv2d::get_convolution_method(const ITensorInfo *i { return ConvolutionMethod::WINOGRAD; } - if (bool(CpuGemmDirectConv2d::validate(input, weights, nullptr, output, info))) + if (gemmDirectConv2d_validates) { return ConvolutionMethod::GEMM_CONV2D; } diff --git a/tests/datasets/LargeConvolutionLayerDataset.h b/tests/datasets/LargeConvolutionLayerDataset.h index 72f73ba6d9..c299f2460b 100644 --- a/tests/datasets/LargeConvolutionLayerDataset.h +++ b/tests/datasets/LargeConvolutionLayerDataset.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2020, 2023 Arm Limited. + * Copyright (c) 2017-2020, 2023-2024 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -294,6 +294,16 @@ public: } }; +class VeryLargeConvolutionLayerDataset final : public ConvolutionLayerDataset +{ +public: + VeryLargeConvolutionLayerDataset() + { + // Tensor size > 1e7 bytes && weight dimensions > 7 + add_config(TensorShape(336U, 336U, 32U), TensorShape(9U, 9U, 32U, 64U), TensorShape(64U), TensorShape(168U, 168U, 64U), PadStrideInfo(2, 2, 4, 4)); + } +}; + class LargeGroupedConvolutionLayerDataset final : public ConvolutionLayerDataset { public: diff --git a/tests/validation/NEON/ConvolutionLayer.cpp b/tests/validation/NEON/ConvolutionLayer.cpp index 62690c053e..7a9230d37a 100644 --- a/tests/validation/NEON/ConvolutionLayer.cpp +++ b/tests/validation/NEON/ConvolutionLayer.cpp @@ -109,6 +109,11 @@ const auto ActivationFunctionsDataset = make("ActivationInfo", ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::BOUNDED_RELU, 0.5f) }); +const auto NoActivation = make("ActivationInfo", +{ + ActivationLayerInfo(), +}); + const auto ActivationFunctionsDatasetNightly = make("ActivationInfo", { ActivationLayerInfo(), @@ -1201,6 +1206,20 @@ FIXTURE_DATA_TEST_CASE(RunPaddedWeights, NEGEMMConvolutionLayerPaddedWeightsFixt // Validate output validate(Accessor(_target), _reference, rel_tolerance_f32, 0.f, float(abs_tolerance_f32)); } + +// This very large shape test is required to test heuristic paths where the tensor size is > 1e7 bytes +// and weight dimensions larger than 7 +FIXTURE_DATA_TEST_CASE(RunVeryLarge, NEGEMMConvolutionLayerFixture<float>, framework::DatasetMode::NIGHTLY, + combine(datasets::VeryLargeConvolutionLayerDataset(), + framework::dataset::make("ReshapeWeights", { true }), + framework::dataset::make("DataType", DataType::F32), + framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC }), + NoActivation)) +{ + // Validate output + validate(Accessor(_target), _reference, rel_tolerance_f32, 0.f, float(abs_tolerance_f32)); +} + TEST_SUITE_END() // FP32 TEST_SUITE_END() // Float |