From 4632e5e44e9a78b15884d0947007bb030fde0aea Mon Sep 17 00:00:00 2001 From: Georgios Pinitas Date: Wed, 6 Feb 2019 14:47:59 +0000 Subject: COMPMID-1920: Failure exit order destruction when CLTuner enabled. Creates a profiling queue in every tuning iteration as it gives better control of the lifetime of the opencl objects. Change-Id: Id8629cc06086877b088c787cbb5f238c0979b6d9 Signed-off-by: Georgios Pinitas Reviewed-on: https://review.mlplatform.org/631 Tested-by: Arm Jenkins Reviewed-by: Michalis Spyrou --- arm_compute/runtime/CL/CLTuner.h | 10 ++++------ src/runtime/CL/CLTuner.cpp | 37 +++++++++++++++++++++---------------- 2 files changed, 25 insertions(+), 22 deletions(-) diff --git a/arm_compute/runtime/CL/CLTuner.h b/arm_compute/runtime/CL/CLTuner.h index f789500de3..ee83f6933c 100644 --- a/arm_compute/runtime/CL/CLTuner.h +++ b/arm_compute/runtime/CL/CLTuner.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2018 ARM Limited. + * Copyright (c) 2017-2019 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -118,10 +118,8 @@ private: cl::NDRange find_optimal_lws(ICLKernel &kernel); std::unordered_map _lws_table; - cl::CommandQueue _queue; - cl::CommandQueue _queue_profiler; - cl::Event _kernel_event; - bool _tune_new_kernels; + cl::Event _kernel_event; + bool _tune_new_kernels; }; -} +} // namespace arm_compute #endif /*__ARM_COMPUTE_CLTUNER_H__ */ diff --git a/src/runtime/CL/CLTuner.cpp b/src/runtime/CL/CLTuner.cpp index c09914cea2..56baad82c3 100644 --- a/src/runtime/CL/CLTuner.cpp +++ b/src/runtime/CL/CLTuner.cpp @@ -66,7 +66,7 @@ void initialize_lws_values(std::vector &lws, unsigned int gws, uns } // namespace CLTuner::CLTuner(bool tune_new_kernels) - : real_clEnqueueNDRangeKernel(nullptr), _lws_table(), _queue(), _queue_profiler(), _kernel_event(), _tune_new_kernels(tune_new_kernels) + : real_clEnqueueNDRangeKernel(nullptr), _lws_table(), _kernel_event(), _tune_new_kernels(tune_new_kernels) { } @@ -132,26 +132,31 @@ void CLTuner::add_lws_to_table(const std::string &kernel_id, cl::NDRange optimal cl::NDRange CLTuner::find_optimal_lws(ICLKernel &kernel) { + // Profiling queue + cl::CommandQueue queue_profiler; + + // Extract real OpenCL function to intercept if(real_clEnqueueNDRangeKernel == nullptr) { real_clEnqueueNDRangeKernel = CLSymbols::get().clEnqueueNDRangeKernel_ptr; + } - // Get the default queue - _queue = CLScheduler::get().queue(); + // Get the default queue + cl::CommandQueue default_queue = CLScheduler::get().queue(); - // Check if we can use the OpenCL timer with the default queue - cl_command_queue_properties props = _queue.getInfo(); + // Check if we can use the OpenCL timer with the default queue + cl_command_queue_properties props = default_queue.getInfo(); - if((props & CL_QUEUE_PROFILING_ENABLE) == 0) - { - // Set the queue for profiling - _queue_profiler = cl::CommandQueue(CLScheduler::get().context(), props | CL_QUEUE_PROFILING_ENABLE); - } - else - { - _queue_profiler = _queue; - } + if((props & CL_QUEUE_PROFILING_ENABLE) == 0) + { + // Set the queue for profiling + queue_profiler = cl::CommandQueue(CLScheduler::get().context(), props | CL_QUEUE_PROFILING_ENABLE); } + else + { + queue_profiler = default_queue; + } + // Start intercepting enqueues: auto interceptor = [this](cl_command_queue command_queue, cl_kernel kernel, cl_uint work_dim, const size_t *gwo, const size_t *gws, const size_t *lws, cl_uint num_events_in_wait_list, const cl_event * event_wait_list, cl_event * event) @@ -212,9 +217,9 @@ cl::NDRange CLTuner::find_optimal_lws(ICLKernel &kernel) kernel.set_lws_hint(lws_test); // Run the kernel - kernel.run(kernel.window(), _queue_profiler); + kernel.run(kernel.window(), queue_profiler); - _queue_profiler.finish(); + queue_profiler.finish(); const cl_ulong start = _kernel_event.getProfilingInfo(); const cl_ulong end = _kernel_event.getProfilingInfo(); -- cgit v1.2.1