aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGeorgios Pinitas <georgios.pinitas@arm.com>2019-02-06 14:47:59 +0000
committerGeorgios Pinitas <georgios.pinitas@arm.com>2019-02-06 17:48:26 +0000
commit4632e5e44e9a78b15884d0947007bb030fde0aea (patch)
tree2993a95123b098f7b4ed1545e66781def5bd0b24
parenta69a88b0b69c4c4018562afcfd560ae94412ec99 (diff)
downloadComputeLibrary-4632e5e44e9a78b15884d0947007bb030fde0aea.tar.gz
COMPMID-1920: Failure exit order destruction when CLTuner enabled.
Creates a profiling queue in every tuning iteration as it gives better control of the lifetime of the opencl objects. Change-Id: Id8629cc06086877b088c787cbb5f238c0979b6d9 Signed-off-by: Georgios Pinitas <georgios.pinitas@arm.com> Reviewed-on: https://review.mlplatform.org/631 Tested-by: Arm Jenkins <bsgcomp@arm.com> Reviewed-by: Michalis Spyrou <michalis.spyrou@arm.com>
-rw-r--r--arm_compute/runtime/CL/CLTuner.h10
-rw-r--r--src/runtime/CL/CLTuner.cpp37
2 files changed, 25 insertions, 22 deletions
diff --git a/arm_compute/runtime/CL/CLTuner.h b/arm_compute/runtime/CL/CLTuner.h
index f789500de3..ee83f6933c 100644
--- a/arm_compute/runtime/CL/CLTuner.h
+++ b/arm_compute/runtime/CL/CLTuner.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2018 ARM Limited.
+ * Copyright (c) 2017-2019 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -118,10 +118,8 @@ private:
cl::NDRange find_optimal_lws(ICLKernel &kernel);
std::unordered_map<std::string, cl::NDRange> _lws_table;
- cl::CommandQueue _queue;
- cl::CommandQueue _queue_profiler;
- cl::Event _kernel_event;
- bool _tune_new_kernels;
+ cl::Event _kernel_event;
+ bool _tune_new_kernels;
};
-}
+} // namespace arm_compute
#endif /*__ARM_COMPUTE_CLTUNER_H__ */
diff --git a/src/runtime/CL/CLTuner.cpp b/src/runtime/CL/CLTuner.cpp
index c09914cea2..56baad82c3 100644
--- a/src/runtime/CL/CLTuner.cpp
+++ b/src/runtime/CL/CLTuner.cpp
@@ -66,7 +66,7 @@ void initialize_lws_values(std::vector<unsigned int> &lws, unsigned int gws, uns
} // namespace
CLTuner::CLTuner(bool tune_new_kernels)
- : real_clEnqueueNDRangeKernel(nullptr), _lws_table(), _queue(), _queue_profiler(), _kernel_event(), _tune_new_kernels(tune_new_kernels)
+ : real_clEnqueueNDRangeKernel(nullptr), _lws_table(), _kernel_event(), _tune_new_kernels(tune_new_kernels)
{
}
@@ -132,26 +132,31 @@ void CLTuner::add_lws_to_table(const std::string &kernel_id, cl::NDRange optimal
cl::NDRange CLTuner::find_optimal_lws(ICLKernel &kernel)
{
+ // Profiling queue
+ cl::CommandQueue queue_profiler;
+
+ // Extract real OpenCL function to intercept
if(real_clEnqueueNDRangeKernel == nullptr)
{
real_clEnqueueNDRangeKernel = CLSymbols::get().clEnqueueNDRangeKernel_ptr;
+ }
- // Get the default queue
- _queue = CLScheduler::get().queue();
+ // Get the default queue
+ cl::CommandQueue default_queue = CLScheduler::get().queue();
- // Check if we can use the OpenCL timer with the default queue
- cl_command_queue_properties props = _queue.getInfo<CL_QUEUE_PROPERTIES>();
+ // Check if we can use the OpenCL timer with the default queue
+ cl_command_queue_properties props = default_queue.getInfo<CL_QUEUE_PROPERTIES>();
- if((props & CL_QUEUE_PROFILING_ENABLE) == 0)
- {
- // Set the queue for profiling
- _queue_profiler = cl::CommandQueue(CLScheduler::get().context(), props | CL_QUEUE_PROFILING_ENABLE);
- }
- else
- {
- _queue_profiler = _queue;
- }
+ if((props & CL_QUEUE_PROFILING_ENABLE) == 0)
+ {
+ // Set the queue for profiling
+ queue_profiler = cl::CommandQueue(CLScheduler::get().context(), props | CL_QUEUE_PROFILING_ENABLE);
}
+ else
+ {
+ queue_profiler = default_queue;
+ }
+
// Start intercepting enqueues:
auto interceptor = [this](cl_command_queue command_queue, cl_kernel kernel, cl_uint work_dim, const size_t *gwo, const size_t *gws, const size_t *lws, cl_uint num_events_in_wait_list,
const cl_event * event_wait_list, cl_event * event)
@@ -212,9 +217,9 @@ cl::NDRange CLTuner::find_optimal_lws(ICLKernel &kernel)
kernel.set_lws_hint(lws_test);
// Run the kernel
- kernel.run(kernel.window(), _queue_profiler);
+ kernel.run(kernel.window(), queue_profiler);
- _queue_profiler.finish();
+ queue_profiler.finish();
const cl_ulong start = _kernel_event.getProfilingInfo<CL_PROFILING_COMMAND_START>();
const cl_ulong end = _kernel_event.getProfilingInfo<CL_PROFILING_COMMAND_END>();