From f6c572ce404c8ac99b0b00c65b757fbadab33dc1 Mon Sep 17 00:00:00 2001
From: Pablo Tello <pablo.tello@arm.com>
Date: Wed, 14 Feb 2018 12:47:30 +0000
Subject: COMPMID-784: Productise Winograd.

a) Added support for kernel size 5.
b) Templatised data type for transforms and batched gemms kernels.

Change-Id: Idb83dda7a5eec19e015888ab31902bd791913297
Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/120540
Reviewed-by: Anthony Barbier <anthony.barbier@arm.com>
Tested-by: Jenkins <bsgcomp@arm.com>
---
 arm_compute/runtime/NEON/functions/NEWinogradLayer.h | 18 ++++++++++++------
 1 file changed, 12 insertions(+), 6 deletions(-)

(limited to 'arm_compute/runtime/NEON')
diff --git a/arm_compute/runtime/NEON/functions/NEWinogradLayer.h b/arm_compute/runtime/NEON/functions/NEWinogradLayer.h
index 63cac3a3b4..f57be697b5 100644
--- a/arm_compute/runtime/NEON/functions/NEWinogradLayer.h
+++ b/arm_compute/runtime/NEON/functions/NEWinogradLayer.h
@@ -26,7 +26,7 @@
 
 #include "arm_compute/runtime/IFunction.h"
 
-#include "arm_compute/core/NEON/kernels/NEWinogradLayerKernel.h"
+#include "arm_compute/core/NEON/INEKernel.h"
 #include "arm_compute/core/Types.h"
 #include "arm_compute/runtime/CPP/functions/CPPPermute.h"
 #include "arm_compute/runtime/MemoryGroup.h"
@@ -38,6 +38,11 @@ namespace arm_compute
 {
 class ITensor;
 /** Basic function to simulate a convolution layer. This function calls the following NEON kernels:
+ * -# @ref NEWinogradLayerTransformWeightsKernel (executed only once in the first call to the run() method )
+ * -# @ref NEWinogradLayerTransformInputKernel
+ * -# @ref NEWinogradLayerTransformOutputKernel
+ * -# @ref NEWinogradLayerBatchedGEMMKernel
+ * -# @ref CPPPermute (three times: weights, input and output)
  */
 class NEWinogradLayer : public IFunction
 {
@@ -68,11 +73,12 @@ public:
     NEWinogradLayer &operator=(const NEWinogradLayer &) = delete;
 
 private:
-    MemoryGroup _memory_group;
-    NEWinogradLayerKernel<2, 2, 3, 3>                 _winograd_kernel;
-    NEWinogradLayerTransformInputKernel<2, 2, 3, 3>   _transform_input_kernel;
-    NEWinogradLayerTransformOutputKernel<2, 2, 3, 3>  _transform_output_kernel;
-    NEWinogradLayerTransformWeightsKernel<2, 2, 3, 3> _transform_weights_kernel;
+    MemoryGroup                _memory_group;
+    std::unique_ptr<INEKernel> _batched_gemm_kernel;
+    std::unique_ptr<INEKernel> _transform_input_kernel;
+    std::unique_ptr<INEKernel> _transform_output_kernel;
+    std::unique_ptr<INEKernel> _transform_weights_kernel;
+
     CPPPermute     _permute_input;
     CPPPermute     _permute_weights;
     CPPPermute     _permute_output;
-- 
cgit v1.2.1