COMPMID-2466: Improved ConvLayer documentation.

Added two tables in NEConvolutionLayer and CLConvolutionLayer showing ACL's criteria to choose the different algorithms. Change-Id: Ie6e44e72381ab8232c050688c86d746eb8d2fb36 Signed-off-by: Pablo Tello <pablo.tello@arm.com> Reviewed-on: https://review.mlplatform.org/c/1577 Comments-Addressed: Arm Jenkins <bsgcomp@arm.com> Tested-by: Arm Jenkins <bsgcomp@arm.com> Reviewed-by: Georgios Pinitas <georgios.pinitas@arm.com>
author: Pablo Tello <pablo.tello@arm.com> 2019-07-19 17:46:12 +0100
committer: Pablo Marquez <pablo.tello@arm.com> 2019-07-25 13:14:46 +0000
commit: 8f309abb06358e3f8b32655dd5a01837874ad6ca (patch)
tree: 75225ff6883077d8356b2869b9c18c4d6773518c
parent: b5e75dbc1543ad7f549bf3351ac753614fca25e4 (diff)
download: ComputeLibrary-8f309abb06358e3f8b32655dd5a01837874ad6ca.tar.gz
2 files changed, 52 insertions, 0 deletions
diff --git a/arm_compute/runtime/CL/functions/CLConvolutionLayer.h b/arm_compute/runtime/CL/functions/CLConvolutionLayer.h
index 96f69f21d5..04ce1cf635 100644
--- a/arm_compute/runtime/CL/functions/CLConvolutionLayer.h
+++ b/arm_compute/runtime/CL/functions/CLConvolutionLayer.h
@@ -41,6 +41,32 @@ namespace arm_compute
  * -# @ref CLWinogradConvolutionLayer
  * -# @ref CLDirectConvolutionLayer
  * -# @ref CLFFTConvolutionLayer
+ *
+ * The function selects one of the algorithms mentioned above based on:
+ *      - The size of the kernel
+ *      - Number of input/output feature maps
+ *      - Amount of memory needed
+ *
+ * Generally GEMM-based convolution is executed when neither Winograd nor FFT nor Direct convolution can be performed.
+ *
+ * FP32 Algorithm| Filter Size                                                 |   Input/Output feature maps               |
+ * --------------|-------------------------------------------------------------|-------------------------------------------|
+ * Winograd      | 3x3 1x3 3x1 5x1 1x5 5x5(fast maths) 7x1 1x7                 |  Input channels is greater than 3         |
+ * FFT           | Squared kernels and greater than 9x9                        |  Input feature maps > Output feature maps |
+ * DirectConv    | 9x9                                                         |                                           |
+ * GEMM          | Any size                                                    |                                           |
+ *
+ * Winograd 5x5 requires fast maths enabled.
+ *
+ * FP16 Algorithm| Filter Size                |   Input/Output feature maps               |
+ * --------------|----------------------------|-------------------------------------------|
+ * Winograd      | 3x3 1x3 3x1 5x1 1x5 5x5    |  Input channels is greater than 3         |
+ * FFT           | Not supported              |                                           |
+ * DirectConv    | 9x9                        |                                           |
+ * GEMM          | Any size                   |                                           |
+ *
+ * Winograd FP16 requires fast maths enabled.
+ *
  */
 class CLConvolutionLayer : public IFunction
 {
diff --git a/arm_compute/runtime/NEON/functions/NEConvolutionLayer.h b/arm_compute/runtime/NEON/functions/NEConvolutionLayer.h
index 3c1d20aeb8..4310ab4b41 100644
--- a/arm_compute/runtime/NEON/functions/NEConvolutionLayer.h
+++ b/arm_compute/runtime/NEON/functions/NEConvolutionLayer.h
@@ -43,6 +43,32 @@ class ITensor;
  * -# @ref NEWinogradConvolutionLayer (executed only in case Winograd is required for the operation)
  * -# @ref NEDirectConvolutionLayer   (executed only in case Direct Convolution is required for the operation)
  * -# @ref NEFFTConvolutionLayer      (executed only in case FFT is required for the operation)
+ *
+ *
+ * The function selects one of the algorithms mentioned above based on:
+ *      - The size of the kernel
+ *      - Number of input/output feature maps
+ *      - Amount of memory needed
+ *
+ * Generally GEMM-based convolution is executed when neither Winograd nor FFT nor Direct convolution can be performed.
+ *
+ * FP32 Algorithm| Filter Size                                        |   Input/Output feature maps               |
+ * --------------|----------------------------------------------------|-------------------------------------------|
+ * Winograd      | 3x3 1x3 3x1 5x1 1x5 5x5(fast maths) 7x1 1x7        |  Input channels is greater than 3         |
+ * FFT           | Squared kernels and greater than 9x9               |  Input feature maps > Output feature maps |
+ * DirectConv    | 9x9                                                |                                           |
+ * GEMM          | Any size                                           |                                           |
+ *
+ * Winograd 5x5 requires fast maths enabled.
+ *
+ * FP16 Algorithm| Filter Size      |
+ * --------------|------------------|
+ * Winograd      | Not supported    |
+ * FFT           | Not supported    |
+ * DirectConv    | 9x9              |
+ * GEMM          | Any size         |
+ *
+ *
  */
 class NEConvolutionLayer : public IFunction
 {
author	Pablo Tello <pablo.tello@arm.com>	2019-07-19 17:46:12 +0100
committer	Pablo Marquez <pablo.tello@arm.com>	2019-07-25 13:14:46 +0000
commit	8f309abb06358e3f8b32655dd5a01837874ad6ca (patch)
tree	75225ff6883077d8356b2869b9c18c4d6773518c
parent	b5e75dbc1543ad7f549bf3351ac753614fca25e4 (diff)
download	ComputeLibrary-8f309abb06358e3f8b32655dd5a01837874ad6ca.tar.gz