From 8f309abb06358e3f8b32655dd5a01837874ad6ca Mon Sep 17 00:00:00 2001 From: Pablo Tello Date: Fri, 19 Jul 2019 17:46:12 +0100 Subject: COMPMID-2466: Improved ConvLayer documentation. Added two tables in NEConvolutionLayer and CLConvolutionLayer showing ACL's criteria to choose the different algorithms. Change-Id: Ie6e44e72381ab8232c050688c86d746eb8d2fb36 Signed-off-by: Pablo Tello Reviewed-on: https://review.mlplatform.org/c/1577 Comments-Addressed: Arm Jenkins Tested-by: Arm Jenkins Reviewed-by: Georgios Pinitas --- .../runtime/CL/functions/CLConvolutionLayer.h | 26 ++++++++++++++++++++++ .../runtime/NEON/functions/NEConvolutionLayer.h | 26 ++++++++++++++++++++++ 2 files changed, 52 insertions(+) diff --git a/arm_compute/runtime/CL/functions/CLConvolutionLayer.h b/arm_compute/runtime/CL/functions/CLConvolutionLayer.h index 96f69f21d5..04ce1cf635 100644 --- a/arm_compute/runtime/CL/functions/CLConvolutionLayer.h +++ b/arm_compute/runtime/CL/functions/CLConvolutionLayer.h @@ -41,6 +41,32 @@ namespace arm_compute * -# @ref CLWinogradConvolutionLayer * -# @ref CLDirectConvolutionLayer * -# @ref CLFFTConvolutionLayer + * + * The function selects one of the algorithms mentioned above based on: + * - The size of the kernel + * - Number of input/output feature maps + * - Amount of memory needed + * + * Generally GEMM-based convolution is executed when neither Winograd nor FFT nor Direct convolution can be performed. + * + * FP32 Algorithm| Filter Size | Input/Output feature maps | + * --------------|-------------------------------------------------------------|-------------------------------------------| + * Winograd | 3x3 1x3 3x1 5x1 1x5 5x5(fast maths) 7x1 1x7 | Input channels is greater than 3 | + * FFT | Squared kernels and greater than 9x9 | Input feature maps > Output feature maps | + * DirectConv | 9x9 | | + * GEMM | Any size | | + * + * Winograd 5x5 requires fast maths enabled. + * + * FP16 Algorithm| Filter Size | Input/Output feature maps | + * --------------|----------------------------|-------------------------------------------| + * Winograd | 3x3 1x3 3x1 5x1 1x5 5x5 | Input channels is greater than 3 | + * FFT | Not supported | | + * DirectConv | 9x9 | | + * GEMM | Any size | | + * + * Winograd FP16 requires fast maths enabled. + * */ class CLConvolutionLayer : public IFunction { diff --git a/arm_compute/runtime/NEON/functions/NEConvolutionLayer.h b/arm_compute/runtime/NEON/functions/NEConvolutionLayer.h index 3c1d20aeb8..4310ab4b41 100644 --- a/arm_compute/runtime/NEON/functions/NEConvolutionLayer.h +++ b/arm_compute/runtime/NEON/functions/NEConvolutionLayer.h @@ -43,6 +43,32 @@ class ITensor; * -# @ref NEWinogradConvolutionLayer (executed only in case Winograd is required for the operation) * -# @ref NEDirectConvolutionLayer (executed only in case Direct Convolution is required for the operation) * -# @ref NEFFTConvolutionLayer (executed only in case FFT is required for the operation) + * + * + * The function selects one of the algorithms mentioned above based on: + * - The size of the kernel + * - Number of input/output feature maps + * - Amount of memory needed + * + * Generally GEMM-based convolution is executed when neither Winograd nor FFT nor Direct convolution can be performed. + * + * FP32 Algorithm| Filter Size | Input/Output feature maps | + * --------------|----------------------------------------------------|-------------------------------------------| + * Winograd | 3x3 1x3 3x1 5x1 1x5 5x5(fast maths) 7x1 1x7 | Input channels is greater than 3 | + * FFT | Squared kernels and greater than 9x9 | Input feature maps > Output feature maps | + * DirectConv | 9x9 | | + * GEMM | Any size | | + * + * Winograd 5x5 requires fast maths enabled. + * + * FP16 Algorithm| Filter Size | + * --------------|------------------| + * Winograd | Not supported | + * FFT | Not supported | + * DirectConv | 9x9 | + * GEMM | Any size | + * + * */ class NEConvolutionLayer : public IFunction { -- cgit v1.2.1