COMPMID-1920: Failure exit order destruction when CLTuner enabled.

Creates a profiling queue in every tuning iteration as it gives better control of the lifetime of the opencl objects. Change-Id: Id8629cc06086877b088c787cbb5f238c0979b6d9 Signed-off-by: Georgios Pinitas <georgios.pinitas@arm.com> Reviewed-on: https://review.mlplatform.org/631 Tested-by: Arm Jenkins <bsgcomp@arm.com> Reviewed-by: Michalis Spyrou <michalis.spyrou@arm.com>
author: Georgios Pinitas <georgios.pinitas@arm.com> 2019-02-06 14:47:59 +0000
committer: Georgios Pinitas <georgios.pinitas@arm.com> 2019-02-06 17:48:26 +0000
commit: 4632e5e44e9a78b15884d0947007bb030fde0aea (patch)
tree: 2993a95123b098f7b4ed1545e66781def5bd0b24
parent: a69a88b0b69c4c4018562afcfd560ae94412ec99 (diff)
download: ComputeLibrary-4632e5e44e9a78b15884d0947007bb030fde0aea.tar.gz
2 files changed, 25 insertions, 22 deletions
diff --git a/arm_compute/runtime/CL/CLTuner.h b/arm_compute/runtime/CL/CLTuner.h
index f789500de3..ee83f6933c 100644
--- a/arm_compute/runtime/CL/CLTuner.h
+++ b/arm_compute/runtime/CL/CLTuner.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017-2018 ARM Limited.
+ * Copyright (c) 2017-2019 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -118,10 +118,8 @@ private:
     cl::NDRange find_optimal_lws(ICLKernel &kernel);
 
     std::unordered_map<std::string, cl::NDRange> _lws_table;
-    cl::CommandQueue _queue;
-    cl::CommandQueue _queue_profiler;
-    cl::Event        _kernel_event;
-    bool             _tune_new_kernels;
+    cl::Event _kernel_event;
+    bool      _tune_new_kernels;
 };
-}
+} // namespace arm_compute
 #endif /*__ARM_COMPUTE_CLTUNER_H__ */
diff --git a/src/runtime/CL/CLTuner.cpp b/src/runtime/CL/CLTuner.cpp
index c09914cea2..56baad82c3 100644
--- a/src/runtime/CL/CLTuner.cpp
+++ b/src/runtime/CL/CLTuner.cpp
@@ -66,7 +66,7 @@ void initialize_lws_values(std::vector<unsigned int> &lws, unsigned int gws, uns
 } // namespace
 
 CLTuner::CLTuner(bool tune_new_kernels)
-    : real_clEnqueueNDRangeKernel(nullptr), _lws_table(), _queue(), _queue_profiler(), _kernel_event(), _tune_new_kernels(tune_new_kernels)
+    : real_clEnqueueNDRangeKernel(nullptr), _lws_table(), _kernel_event(), _tune_new_kernels(tune_new_kernels)
 {
 }
 
@@ -132,26 +132,31 @@ void CLTuner::add_lws_to_table(const std::string &kernel_id, cl::NDRange optimal
 
 cl::NDRange CLTuner::find_optimal_lws(ICLKernel &kernel)
 {
+    // Profiling queue
+    cl::CommandQueue queue_profiler;
+
+    // Extract real OpenCL function to intercept
     if(real_clEnqueueNDRangeKernel == nullptr)
     {
         real_clEnqueueNDRangeKernel = CLSymbols::get().clEnqueueNDRangeKernel_ptr;
+    }
 
-        // Get the default queue
-        _queue = CLScheduler::get().queue();
+    // Get the default queue
+    cl::CommandQueue default_queue = CLScheduler::get().queue();
 
-        // Check if we can use the OpenCL timer with the default queue
-        cl_command_queue_properties props = _queue.getInfo<CL_QUEUE_PROPERTIES>();
+    // Check if we can use the OpenCL timer with the default queue
+    cl_command_queue_properties props = default_queue.getInfo<CL_QUEUE_PROPERTIES>();
 
-        if((props & CL_QUEUE_PROFILING_ENABLE) == 0)
-        {
-            // Set the queue for profiling
-            _queue_profiler = cl::CommandQueue(CLScheduler::get().context(), props | CL_QUEUE_PROFILING_ENABLE);
-        }
-        else
-        {
-            _queue_profiler = _queue;
-        }
+    if((props & CL_QUEUE_PROFILING_ENABLE) == 0)
+    {
+        // Set the queue for profiling
+        queue_profiler = cl::CommandQueue(CLScheduler::get().context(), props | CL_QUEUE_PROFILING_ENABLE);
     }
+    else
+    {
+        queue_profiler = default_queue;
+    }
+
     // Start intercepting enqueues:
     auto interceptor = [this](cl_command_queue command_queue, cl_kernel kernel, cl_uint work_dim, const size_t *gwo, const size_t *gws, const size_t *lws, cl_uint num_events_in_wait_list,
                               const cl_event * event_wait_list, cl_event * event)
@@ -212,9 +217,9 @@ cl::NDRange CLTuner::find_optimal_lws(ICLKernel &kernel)
                 kernel.set_lws_hint(lws_test);
 
                 // Run the kernel
-                kernel.run(kernel.window(), _queue_profiler);
+                kernel.run(kernel.window(), queue_profiler);
 
-                _queue_profiler.finish();
+                queue_profiler.finish();
 
                 const cl_ulong start = _kernel_event.getProfilingInfo<CL_PROFILING_COMMAND_START>();
                 const cl_ulong end   = _kernel_event.getProfilingInfo<CL_PROFILING_COMMAND_END>();
author	Georgios Pinitas <georgios.pinitas@arm.com>	2019-02-06 14:47:59 +0000
committer	Georgios Pinitas <georgios.pinitas@arm.com>	2019-02-06 17:48:26 +0000
commit	4632e5e44e9a78b15884d0947007bb030fde0aea (patch)
tree	2993a95123b098f7b4ed1545e66781def5bd0b24
parent	a69a88b0b69c4c4018562afcfd560ae94412ec99 (diff)
download	ComputeLibrary-4632e5e44e9a78b15884d0947007bb030fde0aea.tar.gz