COMPMID-920: Introduce prepare() stage

Change-Id: I08ddb7f6e061178e7566518b48e4e18f8f078596 Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/129825 Tested-by: Jenkins <bsgcomp@arm.com> Reviewed-by: Anthony Barbier <anthony.barbier@arm.com>
author: Georgios Pinitas <georgios.pinitas@arm.com> 2018-05-02 14:07:55 +0100
committer: Anthony Barbier <anthony.barbier@arm.com> 2018-11-02 16:51:17 +0000
commit: e043767d068da389308507011d944e6db9e4d676 (patch)
tree: 30c8965d8d03d141c7630420c6e945f78485efc7 /src/runtime/CL/functions/CLWinogradConvolutionLayer.cpp
parent: 019634f8befde24b19bae9b749e75a9f3ae44801 (diff)
download: ComputeLibrary-e043767d068da389308507011d944e6db9e4d676.tar.gz
1 files changed, 25 insertions, 15 deletions
diff --git a/src/runtime/CL/functions/CLWinogradConvolutionLayer.cpp b/src/runtime/CL/functions/CLWinogradConvolutionLayer.cpp
index 5ff4fbceee..025a16b4fb 100644
--- a/src/runtime/CL/functions/CLWinogradConvolutionLayer.cpp
+++ b/src/runtime/CL/functions/CLWinogradConvolutionLayer.cpp
@@ -69,7 +69,7 @@ bool check_support_fast_math(const Size2D &output_tile, const Size2D &kernel_siz
 
 CLWinogradConvolutionLayer::CLWinogradConvolutionLayer(std::shared_ptr<IMemoryManager> memory_manager)
     : _memory_group(memory_manager), _batched_mm(memory_manager), _input_transform(), _filter_transform(), _output_transform(), _activationlayer_function(), _input0(), _input1(), _batched_mm_output(),
-      _is_first_run(true), _is_activationlayer_enabled(false)
+      _original_weights(nullptr), _is_prepared(false), _is_activationlayer_enabled(false)
 {
 }
 
@@ -97,6 +97,9 @@ void CLWinogradConvolutionLayer::configure(ICLTensor *input, const ICLTensor *we
                                                     conv_info,
                                                     input->info()->data_layout());
 
+    _is_prepared      = false;
+    _original_weights = weights;
+
     // Manage intermediate tensors
     _memory_group.manage(&_input0);
     _memory_group.manage(&_batched_mm_output);
@@ -124,7 +127,6 @@ void CLWinogradConvolutionLayer::configure(ICLTensor *input, const ICLTensor *we
 
     // Allocate temporary tensors
     _input0.allocator()->allocate();
-    _input1.allocator()->allocate();
     _batched_mm_output.allocator()->allocate();
 }
 
@@ -182,11 +184,7 @@ Status CLWinogradConvolutionLayer::validate(const ITensorInfo *input, const ITen
 
 void CLWinogradConvolutionLayer::run()
 {
-    if(_is_first_run)
-    {
-        // Run filter transform
-        CLScheduler::get().enqueue(_filter_transform, false);
-    }
+    prepare();
 
     _memory_group.acquire();
 
@@ -196,13 +194,6 @@ void CLWinogradConvolutionLayer::run()
     // Run batched matrix multiplication
     _batched_mm.run();
 
-    // Release reshaped weights if marked unused by CLGEMM
-    if(_is_first_run && !_input1.is_used())
-    {
-        CLScheduler::get().queue().finish();
-        _input1.allocator()->free();
-    }
-
     // Run output transform
     CLScheduler::get().enqueue(_output_transform);
 
@@ -212,6 +203,25 @@ void CLWinogradConvolutionLayer::run()
     }
 
     _memory_group.release();
+}
+
+void CLWinogradConvolutionLayer::prepare()
+{
+    if(!_is_prepared)
+    {
+        // Run filter transform and mark original weights as unused
+        _input1.allocator()->allocate();
+        CLScheduler::get().enqueue(_filter_transform, false);
+        _original_weights->mark_as_unused();
+
+        // Prepare GEMM and release reshaped weights if marked unused by CLGEMM
+        _batched_mm.prepare();
+        if(!_input1.is_used())
+        {
+            _input1.allocator()->free();
+        }
 
-    _is_first_run = false;
+        CLScheduler::get().queue().finish();
+        _is_prepared = true;
+    }
 }
author	Georgios Pinitas <georgios.pinitas@arm.com>	2018-05-02 14:07:55 +0100
committer	Anthony Barbier <anthony.barbier@arm.com>	2018-11-02 16:51:17 +0000
commit	e043767d068da389308507011d944e6db9e4d676 (patch)
tree	30c8965d8d03d141c7630420c6e945f78485efc7 /src/runtime/CL/functions/CLWinogradConvolutionLayer.cpp
parent	019634f8befde24b19bae9b749e75a9f3ae44801 (diff)
download	ComputeLibrary-e043767d068da389308507011d944e6db9e4d676.tar.gz