From 9167c9cd1c684218f76a3c0ec97574dd6f381b98 Mon Sep 17 00:00:00 2001 From: Gunes Bayir Date: Wed, 6 Mar 2024 09:58:40 +0000 Subject: Prefer indirect Gemm vs. Direct convolution if supported Indirect GEMM uses optimized assembly path while Direct Conv uses the fallback Acl kernel for convolution. In certain cases, where the input tensor is large and filter size is greater than 7 (e.g. 9x9 filters), heuristics fall back to Direct Conv algorithm where it could still prefer the assembly path if the data layout is NHWC. This is more important when SME2 kernels are present. Resolves: COMPMID-6900 Change-Id: Ia611c975eee0423615113fcaeaa8f9eef0421456 Signed-off-by: Gunes Bayir Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/11254 Benchmark: Arm Jenkins Tested-by: Arm Jenkins Reviewed-by: Anitha Raj Comments-Addressed: Arm Jenkins --- tests/datasets/LargeConvolutionLayerDataset.h | 12 +++++++++++- tests/validation/NEON/ConvolutionLayer.cpp | 19 +++++++++++++++++++ 2 files changed, 30 insertions(+), 1 deletion(-) (limited to 'tests') diff --git a/tests/datasets/LargeConvolutionLayerDataset.h b/tests/datasets/LargeConvolutionLayerDataset.h index 72f73ba6d9..c299f2460b 100644 --- a/tests/datasets/LargeConvolutionLayerDataset.h +++ b/tests/datasets/LargeConvolutionLayerDataset.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2020, 2023 Arm Limited. + * Copyright (c) 2017-2020, 2023-2024 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -294,6 +294,16 @@ public: } }; +class VeryLargeConvolutionLayerDataset final : public ConvolutionLayerDataset +{ +public: + VeryLargeConvolutionLayerDataset() + { + // Tensor size > 1e7 bytes && weight dimensions > 7 + add_config(TensorShape(336U, 336U, 32U), TensorShape(9U, 9U, 32U, 64U), TensorShape(64U), TensorShape(168U, 168U, 64U), PadStrideInfo(2, 2, 4, 4)); + } +}; + class LargeGroupedConvolutionLayerDataset final : public ConvolutionLayerDataset { public: diff --git a/tests/validation/NEON/ConvolutionLayer.cpp b/tests/validation/NEON/ConvolutionLayer.cpp index 62690c053e..7a9230d37a 100644 --- a/tests/validation/NEON/ConvolutionLayer.cpp +++ b/tests/validation/NEON/ConvolutionLayer.cpp @@ -109,6 +109,11 @@ const auto ActivationFunctionsDataset = make("ActivationInfo", ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::BOUNDED_RELU, 0.5f) }); +const auto NoActivation = make("ActivationInfo", +{ + ActivationLayerInfo(), +}); + const auto ActivationFunctionsDatasetNightly = make("ActivationInfo", { ActivationLayerInfo(), @@ -1201,6 +1206,20 @@ FIXTURE_DATA_TEST_CASE(RunPaddedWeights, NEGEMMConvolutionLayerPaddedWeightsFixt // Validate output validate(Accessor(_target), _reference, rel_tolerance_f32, 0.f, float(abs_tolerance_f32)); } + +// This very large shape test is required to test heuristic paths where the tensor size is > 1e7 bytes +// and weight dimensions larger than 7 +FIXTURE_DATA_TEST_CASE(RunVeryLarge, NEGEMMConvolutionLayerFixture, framework::DatasetMode::NIGHTLY, + combine(datasets::VeryLargeConvolutionLayerDataset(), + framework::dataset::make("ReshapeWeights", { true }), + framework::dataset::make("DataType", DataType::F32), + framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC }), + NoActivation)) +{ + // Validate output + validate(Accessor(_target), _reference, rel_tolerance_f32, 0.f, float(abs_tolerance_f32)); +} + TEST_SUITE_END() // FP32 TEST_SUITE_END() // Float -- cgit v1.2.1