aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--docs/user_guide/release_version_and_change_log.dox1
-rw-r--r--src/cpu/operators/CpuConv2d.cpp22
-rw-r--r--tests/datasets/LargeConvolutionLayerDataset.h12
-rw-r--r--tests/validation/NEON/ConvolutionLayer.cpp19
4 files changed, 48 insertions, 6 deletions
diff --git a/docs/user_guide/release_version_and_change_log.dox b/docs/user_guide/release_version_and_change_log.dox
index b788957dda..21a5a368ad 100644
--- a/docs/user_guide/release_version_and_change_log.dox
+++ b/docs/user_guide/release_version_and_change_log.dox
@@ -43,6 +43,7 @@ If there is more than one release in a month then an extra sequential number is
v24.04 Public major release
- Optimize start-up time of @ref NEConvolutionLayer for some input configurations where GeMM is selected as the convolution algorithm
+ - Optimize @ref NEConvolutionLayer for input tensor size > 1e7 bytes and weight tensor height > 7
v24.02 Public major release
- Replace template writer with compute kernel writer in dynamic fusion.
diff --git a/src/cpu/operators/CpuConv2d.cpp b/src/cpu/operators/CpuConv2d.cpp
index 19311733db..26ca2ee783 100644
--- a/src/cpu/operators/CpuConv2d.cpp
+++ b/src/cpu/operators/CpuConv2d.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2021, 2023 Arm Limited.
+ * Copyright (c) 2017-2021, 2023-2024 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -209,12 +209,24 @@ ConvolutionMethod CpuConv2d::get_convolution_method(const ITensorInfo *i
}
else
{
+ const bool gemmDirectConv2d_validates =
+ bool(CpuGemmDirectConv2d::validate(input, weights, nullptr, output, info));
+
// SRGAN
// Output might not be initialized when it is an internal tensor of the layer using the convolution
- if (input->total_size() > 1e7 && (weights->dimension(idx_h) > 7) &&
- (CpuDirectConv2d::validate(input, weights, nullptr, output, conv_info, act_info)))
+ if (input->total_size() > 1e7 && weights->dimension(idx_h) > 7)
{
- return ConvolutionMethod::DIRECT;
+ // This configuration is memory demanding for GEMM method. GEMM_CONV2D which uses indirect convolution
+ // kernels underneath is the best option.
+ if (gemmDirectConv2d_validates)
+ {
+ return ConvolutionMethod::GEMM_CONV2D;
+ }
+ else if (bool(CpuDirectConv2d::validate(input, weights, nullptr, output, conv_info, act_info)))
+ {
+ // NCHW data layout is not supported by GEMM_CONV2D
+ return ConvolutionMethod::DIRECT;
+ }
}
if (input->dimension(idx_c) < 16)
{
@@ -270,7 +282,7 @@ ConvolutionMethod CpuConv2d::get_convolution_method(const ITensorInfo *i
{
return ConvolutionMethod::WINOGRAD;
}
- if (bool(CpuGemmDirectConv2d::validate(input, weights, nullptr, output, info)))
+ if (gemmDirectConv2d_validates)
{
return ConvolutionMethod::GEMM_CONV2D;
}
diff --git a/tests/datasets/LargeConvolutionLayerDataset.h b/tests/datasets/LargeConvolutionLayerDataset.h
index 72f73ba6d9..c299f2460b 100644
--- a/tests/datasets/LargeConvolutionLayerDataset.h
+++ b/tests/datasets/LargeConvolutionLayerDataset.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2020, 2023 Arm Limited.
+ * Copyright (c) 2017-2020, 2023-2024 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -294,6 +294,16 @@ public:
}
};
+class VeryLargeConvolutionLayerDataset final : public ConvolutionLayerDataset
+{
+public:
+ VeryLargeConvolutionLayerDataset()
+ {
+ // Tensor size > 1e7 bytes && weight dimensions > 7
+ add_config(TensorShape(336U, 336U, 32U), TensorShape(9U, 9U, 32U, 64U), TensorShape(64U), TensorShape(168U, 168U, 64U), PadStrideInfo(2, 2, 4, 4));
+ }
+};
+
class LargeGroupedConvolutionLayerDataset final : public ConvolutionLayerDataset
{
public:
diff --git a/tests/validation/NEON/ConvolutionLayer.cpp b/tests/validation/NEON/ConvolutionLayer.cpp
index 62690c053e..7a9230d37a 100644
--- a/tests/validation/NEON/ConvolutionLayer.cpp
+++ b/tests/validation/NEON/ConvolutionLayer.cpp
@@ -109,6 +109,11 @@ const auto ActivationFunctionsDataset = make("ActivationInfo",
ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::BOUNDED_RELU, 0.5f)
});
+const auto NoActivation = make("ActivationInfo",
+{
+ ActivationLayerInfo(),
+});
+
const auto ActivationFunctionsDatasetNightly = make("ActivationInfo",
{
ActivationLayerInfo(),
@@ -1201,6 +1206,20 @@ FIXTURE_DATA_TEST_CASE(RunPaddedWeights, NEGEMMConvolutionLayerPaddedWeightsFixt
// Validate output
validate(Accessor(_target), _reference, rel_tolerance_f32, 0.f, float(abs_tolerance_f32));
}
+
+// This very large shape test is required to test heuristic paths where the tensor size is > 1e7 bytes
+// and weight dimensions larger than 7
+FIXTURE_DATA_TEST_CASE(RunVeryLarge, NEGEMMConvolutionLayerFixture<float>, framework::DatasetMode::NIGHTLY,
+ combine(datasets::VeryLargeConvolutionLayerDataset(),
+ framework::dataset::make("ReshapeWeights", { true }),
+ framework::dataset::make("DataType", DataType::F32),
+ framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC }),
+ NoActivation))
+{
+ // Validate output
+ validate(Accessor(_target), _reference, rel_tolerance_f32, 0.f, float(abs_tolerance_f32));
+}
+
TEST_SUITE_END() // FP32
TEST_SUITE_END() // Float