From 8f309abb06358e3f8b32655dd5a01837874ad6ca Mon Sep 17 00:00:00 2001
From: Pablo Tello <pablo.tello@arm.com>
Date: Fri, 19 Jul 2019 17:46:12 +0100
Subject: COMPMID-2466: Improved ConvLayer documentation.

Added two tables in NEConvolutionLayer and CLConvolutionLayer
showing ACL's criteria to choose the different algorithms.

Change-Id: Ie6e44e72381ab8232c050688c86d746eb8d2fb36
Signed-off-by: Pablo Tello <pablo.tello@arm.com>
Reviewed-on: https://review.mlplatform.org/c/1577
Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
Tested-by: Arm Jenkins <bsgcomp@arm.com>
Reviewed-by: Georgios Pinitas <georgios.pinitas@arm.com>
---
 .../runtime/CL/functions/CLConvolutionLayer.h      | 26 ++++++++++++++++++++++
 .../runtime/NEON/functions/NEConvolutionLayer.h    | 26 ++++++++++++++++++++++
 2 files changed, 52 insertions(+)

diff --git a/arm_compute/runtime/CL/functions/CLConvolutionLayer.h b/arm_compute/runtime/CL/functions/CLConvolutionLayer.h
index 96f69f21d5..04ce1cf635 100644
--- a/arm_compute/runtime/CL/functions/CLConvolutionLayer.h
+++ b/arm_compute/runtime/CL/functions/CLConvolutionLayer.h
@@ -41,6 +41,32 @@ namespace arm_compute
  * -# @ref CLWinogradConvolutionLayer
  * -# @ref CLDirectConvolutionLayer
  * -# @ref CLFFTConvolutionLayer
+ *
+ * The function selects one of the algorithms mentioned above based on:
+ *      - The size of the kernel
+ *      - Number of input/output feature maps
+ *      - Amount of memory needed
+ *
+ * Generally GEMM-based convolution is executed when neither Winograd nor FFT nor Direct convolution can be performed.
+ *
+ * FP32 Algorithm| Filter Size                                                 |   Input/Output feature maps               |
+ * --------------|-------------------------------------------------------------|-------------------------------------------|
+ * Winograd      | 3x3 1x3 3x1 5x1 1x5 5x5(fast maths) 7x1 1x7                 |  Input channels is greater than 3         |
+ * FFT           | Squared kernels and greater than 9x9                        |  Input feature maps > Output feature maps |
+ * DirectConv    | 9x9                                                         |                                           |
+ * GEMM          | Any size                                                    |                                           |
+ *
+ * Winograd 5x5 requires fast maths enabled.
+ *
+ * FP16 Algorithm| Filter Size                |   Input/Output feature maps               |
+ * --------------|----------------------------|-------------------------------------------|
+ * Winograd      | 3x3 1x3 3x1 5x1 1x5 5x5    |  Input channels is greater than 3         |
+ * FFT           | Not supported              |                                           |
+ * DirectConv    | 9x9                        |                                           |
+ * GEMM          | Any size                   |                                           |
+ *
+ * Winograd FP16 requires fast maths enabled.
+ *
  */
 class CLConvolutionLayer : public IFunction
 {
diff --git a/arm_compute/runtime/NEON/functions/NEConvolutionLayer.h b/arm_compute/runtime/NEON/functions/NEConvolutionLayer.h
index 3c1d20aeb8..4310ab4b41 100644
--- a/arm_compute/runtime/NEON/functions/NEConvolutionLayer.h
+++ b/arm_compute/runtime/NEON/functions/NEConvolutionLayer.h
@@ -43,6 +43,32 @@ class ITensor;
  * -# @ref NEWinogradConvolutionLayer (executed only in case Winograd is required for the operation)
  * -# @ref NEDirectConvolutionLayer   (executed only in case Direct Convolution is required for the operation)
  * -# @ref NEFFTConvolutionLayer      (executed only in case FFT is required for the operation)
+ *
+ *
+ * The function selects one of the algorithms mentioned above based on:
+ *      - The size of the kernel
+ *      - Number of input/output feature maps
+ *      - Amount of memory needed
+ *
+ * Generally GEMM-based convolution is executed when neither Winograd nor FFT nor Direct convolution can be performed.
+ *
+ * FP32 Algorithm| Filter Size                                        |   Input/Output feature maps               |
+ * --------------|----------------------------------------------------|-------------------------------------------|
+ * Winograd      | 3x3 1x3 3x1 5x1 1x5 5x5(fast maths) 7x1 1x7        |  Input channels is greater than 3         |
+ * FFT           | Squared kernels and greater than 9x9               |  Input feature maps > Output feature maps |
+ * DirectConv    | 9x9                                                |                                           |
+ * GEMM          | Any size                                           |                                           |
+ *
+ * Winograd 5x5 requires fast maths enabled.
+ *
+ * FP16 Algorithm| Filter Size      |
+ * --------------|------------------|
+ * Winograd      | Not supported    |
+ * FFT           | Not supported    |
+ * DirectConv    | 9x9              |
+ * GEMM          | Any size         |
+ *
+ *
  */
 class NEConvolutionLayer : public IFunction
 {
-- 
cgit v1.2.1