aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorGunes Bayir <gunes.bayir@arm.com>2024-03-06 09:58:40 +0000
committerGunes Bayir <gunes.bayir@arm.com>2024-03-11 10:02:41 +0000
commit9167c9cd1c684218f76a3c0ec97574dd6f381b98 (patch)
tree7a9608f1f6861ad164697a0bbdc784be92a8d3e5 /src
parente77736fe4150648d2fd0649cf61c1bade928d69d (diff)
downloadComputeLibrary-9167c9cd1c684218f76a3c0ec97574dd6f381b98.tar.gz
Prefer indirect Gemm vs. Direct convolution if supported
Indirect GEMM uses optimized assembly path while Direct Conv uses the fallback Acl kernel for convolution. In certain cases, where the input tensor is large and filter size is greater than 7 (e.g. 9x9 filters), heuristics fall back to Direct Conv algorithm where it could still prefer the assembly path if the data layout is NHWC. This is more important when SME2 kernels are present. Resolves: COMPMID-6900 Change-Id: Ia611c975eee0423615113fcaeaa8f9eef0421456 Signed-off-by: Gunes Bayir <gunes.bayir@arm.com> Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/11254 Benchmark: Arm Jenkins <bsgcomp@arm.com> Tested-by: Arm Jenkins <bsgcomp@arm.com> Reviewed-by: Anitha Raj <Anitha.Raj@arm.com> Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
Diffstat (limited to 'src')
-rw-r--r--src/cpu/operators/CpuConv2d.cpp22
1 files changed, 17 insertions, 5 deletions
diff --git a/src/cpu/operators/CpuConv2d.cpp b/src/cpu/operators/CpuConv2d.cpp
index 19311733db..26ca2ee783 100644
--- a/src/cpu/operators/CpuConv2d.cpp
+++ b/src/cpu/operators/CpuConv2d.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2021, 2023 Arm Limited.
+ * Copyright (c) 2017-2021, 2023-2024 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -209,12 +209,24 @@ ConvolutionMethod CpuConv2d::get_convolution_method(const ITensorInfo *i
}
else
{
+ const bool gemmDirectConv2d_validates =
+ bool(CpuGemmDirectConv2d::validate(input, weights, nullptr, output, info));
+
// SRGAN
// Output might not be initialized when it is an internal tensor of the layer using the convolution
- if (input->total_size() > 1e7 && (weights->dimension(idx_h) > 7) &&
- (CpuDirectConv2d::validate(input, weights, nullptr, output, conv_info, act_info)))
+ if (input->total_size() > 1e7 && weights->dimension(idx_h) > 7)
{
- return ConvolutionMethod::DIRECT;
+ // This configuration is memory demanding for GEMM method. GEMM_CONV2D which uses indirect convolution
+ // kernels underneath is the best option.
+ if (gemmDirectConv2d_validates)
+ {
+ return ConvolutionMethod::GEMM_CONV2D;
+ }
+ else if (bool(CpuDirectConv2d::validate(input, weights, nullptr, output, conv_info, act_info)))
+ {
+ // NCHW data layout is not supported by GEMM_CONV2D
+ return ConvolutionMethod::DIRECT;
+ }
}
if (input->dimension(idx_c) < 16)
{
@@ -270,7 +282,7 @@ ConvolutionMethod CpuConv2d::get_convolution_method(const ITensorInfo *i
{
return ConvolutionMethod::WINOGRAD;
}
- if (bool(CpuGemmDirectConv2d::validate(input, weights, nullptr, output, info)))
+ if (gemmDirectConv2d_validates)
{
return ConvolutionMethod::GEMM_CONV2D;
}