diff options
author | Gian Marco Iodice <gianmarco.iodice@arm.com> | 2021-01-02 09:58:51 +0000 |
---|---|---|
committer | Georgios Pinitas <georgios.pinitas@arm.com> | 2021-01-19 13:43:52 +0000 |
commit | ff1fe3e32e25069fed750cdfe3046b7d8d5a2628 (patch) | |
tree | 9c01379de63f6ab218c7890dc91b10ac8faac157 /src/runtime/CL | |
parent | 6124390be4690ba06c404d56449f7e5d390cef53 (diff) | |
download | ComputeLibrary-ff1fe3e32e25069fed750cdfe3046b7d8d5a2628.tar.gz |
Remove padding from direct convolution - OpenCL
- Refactor direct convolution for NHWC
- Remove old kernels for NHWC
- Change the heuristic in CLConvolutionLayer.cpp. The new direct
convolution implementation is faster than FFT
Resolves COMPMID-3908
Change-Id: Iee15ce7b04e21847b6eaae5c6d3c1b18180e7efc
Signed-off-by: Gian Marco Iodice <gianmarco.iodice@arm.com>
Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/4876
Tested-by: Arm Jenkins <bsgcomp@arm.com>
Reviewed-by: Georgios Pinitas <georgios.pinitas@arm.com>
Diffstat (limited to 'src/runtime/CL')
-rw-r--r-- | src/runtime/CL/functions/CLConvolutionLayer.cpp | 45 |
1 files changed, 33 insertions, 12 deletions
diff --git a/src/runtime/CL/functions/CLConvolutionLayer.cpp b/src/runtime/CL/functions/CLConvolutionLayer.cpp index 5bfbc7ce57..b1351f6747 100644 --- a/src/runtime/CL/functions/CLConvolutionLayer.cpp +++ b/src/runtime/CL/functions/CLConvolutionLayer.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2020 Arm Limited. + * Copyright (c) 2017-2021 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -198,21 +198,42 @@ ConvolutionMethod CLConvolutionLayer::get_convolution_method(const ITensorInfo * } else { - // SRGAN - if((input->dimension(idx_h) > 720U) && (output->dimension(idx_h) > 720U) && (weights->dimension(idx_h) == 9) && (conv_info.pad_top() < 3) - && (CLDirectConvolutionLayer::validate(input, weights, nullptr, output, conv_info, act_info))) + if(input->data_layout() == DataLayout::NCHW) { - return ConvolutionMethod::DIRECT; + // SRGAN + if((input->dimension(idx_h) > 720U) && (output->dimension(idx_h) > 720U) && (weights->dimension(idx_h) == 9) && (conv_info.pad_top() < 3) + && (CLDirectConvolutionLayer::validate(input, weights, nullptr, output, conv_info, act_info))) + { + return ConvolutionMethod::DIRECT; + } + if((weights->dimension(idx_h) > 5) && (input->dimension(idx_c) > output->dimension(idx_c)) && (CLFFTConvolutionLayer::validate(input, weights, nullptr, output, conv_info, act_info, enable_fast_math))) + { + return ConvolutionMethod::FFT; + } + if(input->dimension(idx_c) < 16) + { + return ConvolutionMethod::GEMM; + } + return bool(CLWinogradConvolutionLayer::validate(input, weights, nullptr, output, conv_info, act_info, enable_fast_math)) ? ConvolutionMethod::WINOGRAD : ConvolutionMethod::GEMM; } - if((weights->dimension(idx_h) > 7) && (input->dimension(idx_c) > output->dimension(idx_c)) && (CLFFTConvolutionLayer::validate(input, weights, nullptr, output, conv_info, act_info, enable_fast_math))) + else { - return ConvolutionMethod::FFT; + // SRGAN + if((input->dimension(idx_h) > 720U) && (output->dimension(idx_h) > 720U) && (weights->dimension(idx_h) == 9) && (conv_info.pad_top() < 3) + && (CLDirectConvolutionLayer::validate(input, weights, nullptr, output, conv_info, act_info))) + { + return ConvolutionMethod::DIRECT; + } + if((weights->dimension(idx_h) > 7) && (input->dimension(idx_c) > output->dimension(idx_c)) && (CLDirectConvolutionLayer::validate(input, weights, nullptr, output, conv_info, act_info))) + { + return ConvolutionMethod::DIRECT; + } + if(input->dimension(idx_c) < 16) + { + return ConvolutionMethod::GEMM; + } + return bool(CLWinogradConvolutionLayer::validate(input, weights, nullptr, output, conv_info, act_info, enable_fast_math)) ? ConvolutionMethod::WINOGRAD : ConvolutionMethod::GEMM; } - if(input->dimension(idx_c) < 16) - { - return ConvolutionMethod::GEMM; - } - return bool(CLWinogradConvolutionLayer::validate(input, weights, nullptr, output, conv_info, act_info, enable_fast_math)) ? ConvolutionMethod::WINOGRAD : ConvolutionMethod::GEMM; } } |