COMPMID-920: Introduce prepare() stage

Change-Id: I08ddb7f6e061178e7566518b48e4e18f8f078596 Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/129825 Tested-by: Jenkins <bsgcomp@arm.com> Reviewed-by: Anthony Barbier <anthony.barbier@arm.com>
author: Georgios Pinitas <georgios.pinitas@arm.com> 2018-05-02 14:07:55 +0100
committer: Anthony Barbier <anthony.barbier@arm.com> 2018-11-02 16:51:17 +0000
commit: e043767d068da389308507011d944e6db9e4d676 (patch)
tree: 30c8965d8d03d141c7630420c6e945f78485efc7 /src/runtime/CL/functions/CLGEMM.cpp
parent: 019634f8befde24b19bae9b749e75a9f3ae44801 (diff)
download: ComputeLibrary-e043767d068da389308507011d944e6db9e4d676.tar.gz
1 files changed, 24 insertions, 15 deletions
diff --git a/src/runtime/CL/functions/CLGEMM.cpp b/src/runtime/CL/functions/CLGEMM.cpp
index 37fa0c5ba2..e735adba39 100644
--- a/src/runtime/CL/functions/CLGEMM.cpp
+++ b/src/runtime/CL/functions/CLGEMM.cpp
@@ -98,7 +98,7 @@ Status validate_arguments(const ITensorInfo *a, const ITensorInfo *b, const ICLT
 
 CLGEMM::CLGEMM(std::shared_ptr<IMemoryManager> memory_manager)
     : _memory_group(std::move(memory_manager)), _interleave_kernel(), _transpose_kernel(), _mm_kernel(), _ma_kernel(), _tmp_a(), _tmp_b(), _original_b(nullptr), _is_interleaved_transposed(false),
-      _run_addition(false), _is_first_run(true), _reshape_b_only_on_first_run(false)
+      _run_addition(false), _reshape_b_only_on_first_run(false), _is_prepared(false)
 {
 }
 
@@ -114,6 +114,7 @@ void CLGEMM::configure(const ICLTensor *a, const ICLTensor *b, const ICLTensor *
 
     // Check if we need to reshape the matrix B only on the first run
     _reshape_b_only_on_first_run = gemm_info.reshape_b_only_on_first_run();
+    _is_prepared                 = false;
 
     const ICLTensor *matrix_a = a;
     const ICLTensor *matrix_b = b;
@@ -169,7 +170,10 @@ void CLGEMM::configure(const ICLTensor *a, const ICLTensor *b, const ICLTensor *
     {
         // Allocate intermediate tensors
         _tmp_a.allocator()->allocate();
-        _tmp_b.allocator()->allocate();
+        if(!_reshape_b_only_on_first_run)
+        {
+            _tmp_b.allocator()->allocate();
+        }
     }
 
     // Configure matrix addition kernel
@@ -188,6 +192,8 @@ Status CLGEMM::validate(const ITensorInfo *a, const ITensorInfo *b, const ICLTen
 
 void CLGEMM::run()
 {
+    prepare();
+
     _memory_group.acquire();
 
     if(_is_interleaved_transposed)
@@ -195,18 +201,7 @@ void CLGEMM::run()
         // Run interleave kernel
         CLScheduler::get().enqueue(_interleave_kernel, false);
 
-        if(_is_first_run)
-        {
-            // Run transpose kernel
-            CLScheduler::get().enqueue(_transpose_kernel, false);
-
-            // Mark original b matrix as unused
-            if(_reshape_b_only_on_first_run)
-            {
-                _original_b->mark_as_unused();
-            }
-        }
-        else if(!_reshape_b_only_on_first_run)
+        if(!_reshape_b_only_on_first_run)
         {
             // Run transpose kernel
             CLScheduler::get().enqueue(_transpose_kernel, false);
@@ -223,6 +218,20 @@ void CLGEMM::run()
     }
 
     _memory_group.release();
+}
 
-    _is_first_run = false;
+void CLGEMM::prepare()
+{
+    if(!_is_prepared)
+    {
+        if(_is_interleaved_transposed && _reshape_b_only_on_first_run)
+        {
+            // Run transpose kernel
+            _tmp_b.allocator()->allocate();
+            CLScheduler::get().enqueue(_transpose_kernel, false);
+            _original_b->mark_as_unused();
+        }
+        CLScheduler::get().queue().finish();
+        _is_prepared = true;
+    }
 }
author	Georgios Pinitas <georgios.pinitas@arm.com>	2018-05-02 14:07:55 +0100
committer	Anthony Barbier <anthony.barbier@arm.com>	2018-11-02 16:51:17 +0000
commit	e043767d068da389308507011d944e6db9e4d676 (patch)
tree	30c8965d8d03d141c7630420c6e945f78485efc7 /src/runtime/CL/functions/CLGEMM.cpp
parent	019634f8befde24b19bae9b749e75a9f3ae44801 (diff)
download	ComputeLibrary-e043767d068da389308507011d944e6db9e4d676.tar.gz