From 82b51482479951cf133c223eb81aae291cb4d590 Mon Sep 17 00:00:00 2001
From: Georgios Pinitas <georgios.pinitas@arm.com>
Date: Tue, 24 Apr 2018 15:14:12 +0100
Subject: COMPMID-959: Sets memory manager to CLWinograd

-Sets memory manager to Winograd functions
-Marks CLGEMM inputs as unused if needed

Change-Id: I425a3f864c756e0e2b4da895e1730b8822149ba8
Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/128891
Tested-by: Jenkins <bsgcomp@arm.com>
Reviewed-by: Gian Marco Iodice <gianmarco.iodice@arm.com>
Reviewed-by: Anthony Barbier <anthony.barbier@arm.com>
---
 src/runtime/CL/functions/CLConvolutionLayer.cpp         |  2 +-
 src/runtime/CL/functions/CLGEMM.cpp                     | 15 ++++++++++++---
 src/runtime/CL/functions/CLGEMMConvolutionLayer.cpp     | 10 +++++++++-
 src/runtime/CL/functions/CLWinogradConvolutionLayer.cpp | 11 +++++++++--
 4 files changed, 31 insertions(+), 7 deletions(-)

(limited to 'src/runtime/CL')
diff --git a/src/runtime/CL/functions/CLConvolutionLayer.cpp b/src/runtime/CL/functions/CLConvolutionLayer.cpp
index 643e24d638..97ef895434 100644
--- a/src/runtime/CL/functions/CLConvolutionLayer.cpp
+++ b/src/runtime/CL/functions/CLConvolutionLayer.cpp
@@ -53,7 +53,7 @@ void CLConvolutionLayer::configure(ICLTensor *input, const ICLTensor *weights, c
     {
         case ConvolutionMethod::WINOGRAD:
         {
-            auto f = arm_compute::support::cpp14::make_unique<CLWinogradConvolutionLayer>();
+            auto f = arm_compute::support::cpp14::make_unique<CLWinogradConvolutionLayer>(_memory_manager);
             f->configure(input, weights, biases, output, conv_info);
             _function = std::move(f);
             break;
diff --git a/src/runtime/CL/functions/CLGEMM.cpp b/src/runtime/CL/functions/CLGEMM.cpp
index cf41eccc35..bff5781300 100644
--- a/src/runtime/CL/functions/CLGEMM.cpp
+++ b/src/runtime/CL/functions/CLGEMM.cpp
@@ -92,8 +92,8 @@ Status validate_arguments(const ITensorInfo *a, const ITensorInfo *b, const ICLT
 } // namespace
 
 CLGEMM::CLGEMM(std::shared_ptr<IMemoryManager> memory_manager)
-    : _memory_group(std::move(memory_manager)), _interleave_kernel(), _transpose_kernel(), _mm_kernel(), _ma_kernel(), _tmp_a(), _tmp_b(), _is_interleaved_transposed(false), _run_addition(false),
-      _is_first_run(true), _reshape_b_only_on_first_run(false)
+    : _memory_group(std::move(memory_manager)), _interleave_kernel(), _transpose_kernel(), _mm_kernel(), _ma_kernel(), _tmp_a(), _tmp_b(), _original_b(nullptr), _is_interleaved_transposed(false),
+      _run_addition(false), _is_first_run(true), _reshape_b_only_on_first_run(false)
 {
 }
 
@@ -104,6 +104,9 @@ void CLGEMM::configure(const ICLTensor *a, const ICLTensor *b, const ICLTensor *
     // Perform validation step
     ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(a->info(), b->info(), c, output->info(), alpha, beta, gemm_info));
 
+    // Store original b matrix
+    _original_b = b;
+
     // Check if we need to reshape the matrix B only on the first run
     _reshape_b_only_on_first_run = gemm_info.reshape_b_only_on_first_run();
 
@@ -192,7 +195,11 @@ void CLGEMM::run()
             // Run transpose kernel
             CLScheduler::get().enqueue(_transpose_kernel, false);
 
-            _is_first_run = false;
+            // Mark original b matrix as unused
+            if(_reshape_b_only_on_first_run)
+            {
+                _original_b->mark_as_unused();
+            }
         }
         else if(!_reshape_b_only_on_first_run)
         {
@@ -211,4 +218,6 @@ void CLGEMM::run()
     }
 
     _memory_group.release();
+
+    _is_first_run = false;
 }
diff --git a/src/runtime/CL/functions/CLGEMMConvolutionLayer.cpp b/src/runtime/CL/functions/CLGEMMConvolutionLayer.cpp
index 084c4df718..87c4a306aa 100644
--- a/src/runtime/CL/functions/CLGEMMConvolutionLayer.cpp
+++ b/src/runtime/CL/functions/CLGEMMConvolutionLayer.cpp
@@ -374,7 +374,6 @@ void CLGEMMConvolutionLayer::run()
         ARM_COMPUTE_ERROR_ON(!_original_weights->is_used());
 
         _reshape_weights.run();
-        _is_first_run = false;
 
         // Mark original weights tensor as unused
         _original_weights->mark_as_unused();
@@ -398,6 +397,13 @@ void CLGEMMConvolutionLayer::run()
     {
         // Run gemm
         _mm_gemm.run();
+
+        // Release reshaped weights if marked unused by CLGEMM
+        if(_is_first_run && !_weights_reshaped.is_used())
+        {
+            CLScheduler::get().queue().finish();
+            _weights_reshaped.allocator()->free();
+        }
     }
 
     // Reshape output matrix
@@ -410,4 +416,6 @@ void CLGEMMConvolutionLayer::run()
     }
 
     _memory_group.release();
+
+    _is_first_run = false;
 }
diff --git a/src/runtime/CL/functions/CLWinogradConvolutionLayer.cpp b/src/runtime/CL/functions/CLWinogradConvolutionLayer.cpp
index 86ccddac88..65747cf5d7 100644
--- a/src/runtime/CL/functions/CLWinogradConvolutionLayer.cpp
+++ b/src/runtime/CL/functions/CLWinogradConvolutionLayer.cpp
@@ -151,8 +151,6 @@ void CLWinogradConvolutionLayer::run()
     {
         // Run filter transform
         CLScheduler::get().enqueue(_filter_transform, false);
-
-        _is_first_run = false;
     }
 
     _memory_group.acquire();
@@ -163,6 +161,13 @@ void CLWinogradConvolutionLayer::run()
     // Run batched matrix multiplication
     _batched_mm.run();
 
+    // Release reshaped weights if marked unused by CLGEMM
+    if(_is_first_run && !_input1.is_used())
+    {
+        CLScheduler::get().queue().finish();
+        _input1.allocator()->free();
+    }
+
     // Run output transform
     CLScheduler::get().enqueue(_output_transform);
 
@@ -172,4 +177,6 @@ void CLWinogradConvolutionLayer::run()
     }
 
     _memory_group.release();
+
+    _is_first_run = false;
 }
-- 
cgit v1.2.1