aboutsummaryrefslogtreecommitdiff
path: root/src/runtime/CL/functions/CLConvolutionLayer.cpp
diff options
context:
space:
mode:
authorGian Marco Iodice <gianmarco.iodice@arm.com>2021-01-02 09:58:51 +0000
committerGeorgios Pinitas <georgios.pinitas@arm.com>2021-01-19 13:43:52 +0000
commitff1fe3e32e25069fed750cdfe3046b7d8d5a2628 (patch)
tree9c01379de63f6ab218c7890dc91b10ac8faac157 /src/runtime/CL/functions/CLConvolutionLayer.cpp
parent6124390be4690ba06c404d56449f7e5d390cef53 (diff)
downloadComputeLibrary-ff1fe3e32e25069fed750cdfe3046b7d8d5a2628.tar.gz
Remove padding from direct convolution - OpenCL
- Refactor direct convolution for NHWC - Remove old kernels for NHWC - Change the heuristic in CLConvolutionLayer.cpp. The new direct convolution implementation is faster than FFT Resolves COMPMID-3908 Change-Id: Iee15ce7b04e21847b6eaae5c6d3c1b18180e7efc Signed-off-by: Gian Marco Iodice <gianmarco.iodice@arm.com> Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/4876 Tested-by: Arm Jenkins <bsgcomp@arm.com> Reviewed-by: Georgios Pinitas <georgios.pinitas@arm.com>
Diffstat (limited to 'src/runtime/CL/functions/CLConvolutionLayer.cpp')
-rw-r--r--src/runtime/CL/functions/CLConvolutionLayer.cpp45
1 files changed, 33 insertions, 12 deletions
diff --git a/src/runtime/CL/functions/CLConvolutionLayer.cpp b/src/runtime/CL/functions/CLConvolutionLayer.cpp
index 5bfbc7ce57..b1351f6747 100644
--- a/src/runtime/CL/functions/CLConvolutionLayer.cpp
+++ b/src/runtime/CL/functions/CLConvolutionLayer.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2020 Arm Limited.
+ * Copyright (c) 2017-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -198,21 +198,42 @@ ConvolutionMethod CLConvolutionLayer::get_convolution_method(const ITensorInfo *
}
else
{
- // SRGAN
- if((input->dimension(idx_h) > 720U) && (output->dimension(idx_h) > 720U) && (weights->dimension(idx_h) == 9) && (conv_info.pad_top() < 3)
- && (CLDirectConvolutionLayer::validate(input, weights, nullptr, output, conv_info, act_info)))
+ if(input->data_layout() == DataLayout::NCHW)
{
- return ConvolutionMethod::DIRECT;
+ // SRGAN
+ if((input->dimension(idx_h) > 720U) && (output->dimension(idx_h) > 720U) && (weights->dimension(idx_h) == 9) && (conv_info.pad_top() < 3)
+ && (CLDirectConvolutionLayer::validate(input, weights, nullptr, output, conv_info, act_info)))
+ {
+ return ConvolutionMethod::DIRECT;
+ }
+ if((weights->dimension(idx_h) > 5) && (input->dimension(idx_c) > output->dimension(idx_c)) && (CLFFTConvolutionLayer::validate(input, weights, nullptr, output, conv_info, act_info, enable_fast_math)))
+ {
+ return ConvolutionMethod::FFT;
+ }
+ if(input->dimension(idx_c) < 16)
+ {
+ return ConvolutionMethod::GEMM;
+ }
+ return bool(CLWinogradConvolutionLayer::validate(input, weights, nullptr, output, conv_info, act_info, enable_fast_math)) ? ConvolutionMethod::WINOGRAD : ConvolutionMethod::GEMM;
}
- if((weights->dimension(idx_h) > 7) && (input->dimension(idx_c) > output->dimension(idx_c)) && (CLFFTConvolutionLayer::validate(input, weights, nullptr, output, conv_info, act_info, enable_fast_math)))
+ else
{
- return ConvolutionMethod::FFT;
+ // SRGAN
+ if((input->dimension(idx_h) > 720U) && (output->dimension(idx_h) > 720U) && (weights->dimension(idx_h) == 9) && (conv_info.pad_top() < 3)
+ && (CLDirectConvolutionLayer::validate(input, weights, nullptr, output, conv_info, act_info)))
+ {
+ return ConvolutionMethod::DIRECT;
+ }
+ if((weights->dimension(idx_h) > 7) && (input->dimension(idx_c) > output->dimension(idx_c)) && (CLDirectConvolutionLayer::validate(input, weights, nullptr, output, conv_info, act_info)))
+ {
+ return ConvolutionMethod::DIRECT;
+ }
+ if(input->dimension(idx_c) < 16)
+ {
+ return ConvolutionMethod::GEMM;
+ }
+ return bool(CLWinogradConvolutionLayer::validate(input, weights, nullptr, output, conv_info, act_info, enable_fast_math)) ? ConvolutionMethod::WINOGRAD : ConvolutionMethod::GEMM;
}
- if(input->dimension(idx_c) < 16)
- {
- return ConvolutionMethod::GEMM;
- }
- return bool(CLWinogradConvolutionLayer::validate(input, weights, nullptr, output, conv_info, act_info, enable_fast_math)) ? ConvolutionMethod::WINOGRAD : ConvolutionMethod::GEMM;
}
}