From 8ec0bb6d9027bb7505d6fa0eada42a52c6e1073b Mon Sep 17 00:00:00 2001
From: Vidhya Sudhan Loganathan <vidhyasudhan.loganathan@arm.com>
Date: Tue, 23 Apr 2019 10:40:44 +0100
Subject: COMPMID-2117 : Use FFT convolution if output feature map depth is
 less than input

Change-Id: I588f467258aaa86a9fab8eb96603e02f24362d05
Signed-off-by: Vidhya Sudhan Loganathan <vidhyasudhan.loganathan@arm.com>
Reviewed-on: https://review.mlplatform.org/c/1010
Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
Reviewed-by: Gian Marco Iodice <gianmarco.iodice@arm.com>
Tested-by: Arm Jenkins <bsgcomp@arm.com>
---
 arm_compute/core/Types.h                           |  7 +++---
 .../runtime/CL/functions/CLConvolutionLayer.h      |  4 +++-
 src/runtime/CL/functions/CLConvolutionLayer.cpp    | 25 ++++++++++++++++++++--
 src/runtime/CL/functions/CLFFTConvolutionLayer.cpp |  2 +-
 4 files changed, 31 insertions(+), 7 deletions(-)

diff --git a/arm_compute/core/Types.h b/arm_compute/core/Types.h
index 5a469a6c9a..24e91bd3c5 100644
--- a/arm_compute/core/Types.h
+++ b/arm_compute/core/Types.h
@@ -128,9 +128,10 @@ enum class DataLayoutDimension
 /** Available ConvolutionMethod*/
 enum class ConvolutionMethod
 {
-    GEMM,    /**< Convolution using GEMM */
-    DIRECT,  /**< Direct convolution */
-    WINOGRAD /**< Convolution using Winograd */
+    GEMM,     /**< Convolution using GEMM */
+    DIRECT,   /**< Direct convolution */
+    WINOGRAD, /**< Convolution using Winograd */
+    FFT       /**< Convolution using FFT */
 };
 
 /** Padding mode to use for PadLayer */
diff --git a/arm_compute/runtime/CL/functions/CLConvolutionLayer.h b/arm_compute/runtime/CL/functions/CLConvolutionLayer.h
index 8270e9723e..96f69f21d5 100644
--- a/arm_compute/runtime/CL/functions/CLConvolutionLayer.h
+++ b/arm_compute/runtime/CL/functions/CLConvolutionLayer.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017-2018 ARM Limited.
+ * Copyright (c) 2017-2019 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -25,6 +25,7 @@
 #define __ARM_COMPUTE_CLCONVOLUTIONLAYER_H__
 
 #include "arm_compute/runtime/CL/functions/CLDirectConvolutionLayer.h"
+#include "arm_compute/runtime/CL/functions/CLFFTConvolutionLayer.h"
 #include "arm_compute/runtime/CL/functions/CLGEMMConvolutionLayer.h"
 #include "arm_compute/runtime/CL/functions/CLWinogradConvolutionLayer.h"
 #include "arm_compute/runtime/IFunction.h"
@@ -39,6 +40,7 @@ namespace arm_compute
  * -# @ref CLGEMMConvolutionLayer
  * -# @ref CLWinogradConvolutionLayer
  * -# @ref CLDirectConvolutionLayer
+ * -# @ref CLFFTConvolutionLayer
  */
 class CLConvolutionLayer : public IFunction
 {
diff --git a/src/runtime/CL/functions/CLConvolutionLayer.cpp b/src/runtime/CL/functions/CLConvolutionLayer.cpp
index 0014e71734..165d523100 100644
--- a/src/runtime/CL/functions/CLConvolutionLayer.cpp
+++ b/src/runtime/CL/functions/CLConvolutionLayer.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017-2018 ARM Limited.
+ * Copyright (c) 2017-2019 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -75,6 +75,13 @@ void CLConvolutionLayer::configure(ICLTensor *input, const ICLTensor *weights, c
             _function = std::move(f);
             break;
         }
+        case ConvolutionMethod::FFT:
+        {
+            auto f = arm_compute::support::cpp14::make_unique<CLFFTConvolutionLayer>(_memory_manager);
+            f->configure(input, weights, biases, output, conv_info, act_info);
+            _function = std::move(f);
+            break;
+        }
         default:
             ARM_COMPUTE_ERROR("Not supported.");
             break;
@@ -111,6 +118,12 @@ Status CLConvolutionLayer::validate(const ITensorInfo *input, const ITensorInfo
             ARM_COMPUTE_RETURN_ON_ERROR(CLGEMMConvolutionLayer::validate(input, weights, biases, output, conv_info, weights_info, dilation, act_info, num_groups));
             break;
         }
+        case ConvolutionMethod::FFT:
+        {
+            // Validate FFT-based convolution layer
+            ARM_COMPUTE_RETURN_ON_ERROR(CLFFTConvolutionLayer::validate(input, weights, nullptr, output, conv_info, act_info));
+            break;
+        }
         default:
             ARM_COMPUTE_ERROR("Not supported.");
             break;
@@ -169,12 +182,20 @@ ConvolutionMethod CLConvolutionLayer::get_convolution_method(const ITensorInfo *
         return (*found).second;
     }
 
-    if(dilation != Size2D(1U, 1U) || (input->dimension(idx_c) < 16))
+    if(dilation != Size2D(1U, 1U))
     {
         return ConvolutionMethod::GEMM;
     }
     else
     {
+        if((weights->dimension(idx_h) > 7) && (input->dimension(idx_c) > output->dimension(idx_c)) && ( CLFFTConvolutionLayer::validate(input, weights, nullptr, output, conv_info, act_info)))
+        {
+            return ConvolutionMethod::FFT;
+        }
+        if (input->dimension(idx_c) < 16)
+        {
+            return ConvolutionMethod::GEMM;
+        }
         return bool(CLWinogradConvolutionLayer::validate(input, weights, nullptr, output, conv_info, act_info, enable_fast_math)) ? ConvolutionMethod::WINOGRAD : ConvolutionMethod::GEMM;
     }
 }
diff --git a/src/runtime/CL/functions/CLFFTConvolutionLayer.cpp b/src/runtime/CL/functions/CLFFTConvolutionLayer.cpp
index 6a89b9d304..77097d2b63 100644
--- a/src/runtime/CL/functions/CLFFTConvolutionLayer.cpp
+++ b/src/runtime/CL/functions/CLFFTConvolutionLayer.cpp
@@ -279,7 +279,7 @@ Status CLFFTConvolutionLayer::validate(const ITensorInfo *input, const ITensorIn
     if((output != nullptr) && (output->total_size() != 0))
     {
         ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, output);
-        ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(input, output);
+        ARM_COMPUTE_RETURN_ERROR_ON((input->tensor_shape()[idx_height] != output->tensor_shape()[idx_height]) || (input->tensor_shape()[idx_width] != output->tensor_shape()[idx_width]));
 
         // Validate Activation Layer
         if(act_info.enabled())
-- 
cgit v1.2.1