diff options
author | Pablo Tello <pablo.tello@arm.com> | 2019-07-19 17:46:12 +0100 |
---|---|---|
committer | Pablo Marquez <pablo.tello@arm.com> | 2019-07-25 13:14:46 +0000 |
commit | 8f309abb06358e3f8b32655dd5a01837874ad6ca (patch) | |
tree | 75225ff6883077d8356b2869b9c18c4d6773518c | |
parent | b5e75dbc1543ad7f549bf3351ac753614fca25e4 (diff) | |
download | ComputeLibrary-8f309abb06358e3f8b32655dd5a01837874ad6ca.tar.gz |
COMPMID-2466: Improved ConvLayer documentation.
Added two tables in NEConvolutionLayer and CLConvolutionLayer
showing ACL's criteria to choose the different algorithms.
Change-Id: Ie6e44e72381ab8232c050688c86d746eb8d2fb36
Signed-off-by: Pablo Tello <pablo.tello@arm.com>
Reviewed-on: https://review.mlplatform.org/c/1577
Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
Tested-by: Arm Jenkins <bsgcomp@arm.com>
Reviewed-by: Georgios Pinitas <georgios.pinitas@arm.com>
-rw-r--r-- | arm_compute/runtime/CL/functions/CLConvolutionLayer.h | 26 | ||||
-rw-r--r-- | arm_compute/runtime/NEON/functions/NEConvolutionLayer.h | 26 |
2 files changed, 52 insertions, 0 deletions
diff --git a/arm_compute/runtime/CL/functions/CLConvolutionLayer.h b/arm_compute/runtime/CL/functions/CLConvolutionLayer.h index 96f69f21d5..04ce1cf635 100644 --- a/arm_compute/runtime/CL/functions/CLConvolutionLayer.h +++ b/arm_compute/runtime/CL/functions/CLConvolutionLayer.h @@ -41,6 +41,32 @@ namespace arm_compute * -# @ref CLWinogradConvolutionLayer * -# @ref CLDirectConvolutionLayer * -# @ref CLFFTConvolutionLayer + * + * The function selects one of the algorithms mentioned above based on: + * - The size of the kernel + * - Number of input/output feature maps + * - Amount of memory needed + * + * Generally GEMM-based convolution is executed when neither Winograd nor FFT nor Direct convolution can be performed. + * + * FP32 Algorithm| Filter Size | Input/Output feature maps | + * --------------|-------------------------------------------------------------|-------------------------------------------| + * Winograd | 3x3 1x3 3x1 5x1 1x5 5x5(fast maths) 7x1 1x7 | Input channels is greater than 3 | + * FFT | Squared kernels and greater than 9x9 | Input feature maps > Output feature maps | + * DirectConv | 9x9 | | + * GEMM | Any size | | + * + * Winograd 5x5 requires fast maths enabled. + * + * FP16 Algorithm| Filter Size | Input/Output feature maps | + * --------------|----------------------------|-------------------------------------------| + * Winograd | 3x3 1x3 3x1 5x1 1x5 5x5 | Input channels is greater than 3 | + * FFT | Not supported | | + * DirectConv | 9x9 | | + * GEMM | Any size | | + * + * Winograd FP16 requires fast maths enabled. + * */ class CLConvolutionLayer : public IFunction { diff --git a/arm_compute/runtime/NEON/functions/NEConvolutionLayer.h b/arm_compute/runtime/NEON/functions/NEConvolutionLayer.h index 3c1d20aeb8..4310ab4b41 100644 --- a/arm_compute/runtime/NEON/functions/NEConvolutionLayer.h +++ b/arm_compute/runtime/NEON/functions/NEConvolutionLayer.h @@ -43,6 +43,32 @@ class ITensor; * -# @ref NEWinogradConvolutionLayer (executed only in case Winograd is required for the operation) * -# @ref NEDirectConvolutionLayer (executed only in case Direct Convolution is required for the operation) * -# @ref NEFFTConvolutionLayer (executed only in case FFT is required for the operation) + * + * + * The function selects one of the algorithms mentioned above based on: + * - The size of the kernel + * - Number of input/output feature maps + * - Amount of memory needed + * + * Generally GEMM-based convolution is executed when neither Winograd nor FFT nor Direct convolution can be performed. + * + * FP32 Algorithm| Filter Size | Input/Output feature maps | + * --------------|----------------------------------------------------|-------------------------------------------| + * Winograd | 3x3 1x3 3x1 5x1 1x5 5x5(fast maths) 7x1 1x7 | Input channels is greater than 3 | + * FFT | Squared kernels and greater than 9x9 | Input feature maps > Output feature maps | + * DirectConv | 9x9 | | + * GEMM | Any size | | + * + * Winograd 5x5 requires fast maths enabled. + * + * FP16 Algorithm| Filter Size | + * --------------|------------------| + * Winograd | Not supported | + * FFT | Not supported | + * DirectConv | 9x9 | + * GEMM | Any size | + * + * */ class NEConvolutionLayer : public IFunction { |