aboutsummaryrefslogtreecommitdiff
path: root/src/runtime/CL/CLTuner.cpp
diff options
context:
space:
mode:
authorAnthony Barbier <anthony.barbier@arm.com>2018-02-28 18:04:45 +0000
committerAnthony Barbier <anthony.barbier@arm.com>2018-11-02 16:48:33 +0000
commitf5dcf79ba060cc042ed77c73c1a907780c5fe863 (patch)
treec394047ba87482d81e905b287eda89dd9baa3540 /src/runtime/CL/CLTuner.cpp
parent8b81195919eaab90a803fabae57ca05136e9430e (diff)
downloadComputeLibrary-f5dcf79ba060cc042ed77c73c1a907780c5fe863.tar.gz
COMPMID-978: Fixed bug in Load/Store tuning data and in the CLTuner interceptor
Change-Id: I6cdecef623ff5806cf82cb12a60aef8aefec32f5 Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/122712 Reviewed-by: Georgios Pinitas <georgios.pinitas@arm.com> Tested-by: Jenkins <bsgcomp@arm.com>
Diffstat (limited to 'src/runtime/CL/CLTuner.cpp')
-rw-r--r--src/runtime/CL/CLTuner.cpp30
1 files changed, 17 insertions, 13 deletions
diff --git a/src/runtime/CL/CLTuner.cpp b/src/runtime/CL/CLTuner.cpp
index 9fb9d1b396..07c24a85c7 100644
--- a/src/runtime/CL/CLTuner.cpp
+++ b/src/runtime/CL/CLTuner.cpp
@@ -73,7 +73,11 @@ cl_int Interceptor::operator()(cl_command_queue command_queue, cl_kernel kernel,
{
ARM_COMPUTE_ERROR_ON_MSG(event != nullptr, "Not supported");
ARM_COMPUTE_UNUSED(event);
-
+ if(_tuner.kernel_event_is_set())
+ {
+ // If the event is already set it means the kernel enqueue is sliced: given that we only time the first slice we can save time by skipping the other enqueues.
+ return CL_SUCCESS;
+ }
cl_event tmp;
cl_int retval = _tuner.real_clEnqueueNDRangeKernel(command_queue, kernel, work_dim, gwo, gws, lws, num_events_in_wait_list, event_wait_list, &tmp);
@@ -90,6 +94,10 @@ CLTuner::CLTuner(bool tune_new_kernels)
{
}
+bool CLTuner::kernel_event_is_set() const
+{
+ return _kernel_event() != nullptr;
+}
void CLTuner::set_cl_kernel_event(cl_event kernel_event)
{
_kernel_event = kernel_event;
@@ -163,9 +171,6 @@ cl::NDRange CLTuner::find_optimal_lws(ICLKernel &kernel)
_queue_profiler = _queue;
}
}
- // Set profiler queue
- CLScheduler::get().set_queue(_queue_profiler);
-
// Start intercepting enqueues:
CLSymbols::get().clEnqueueNDRangeKernel_ptr = Interceptor(*this);
@@ -189,11 +194,12 @@ cl::NDRange CLTuner::find_optimal_lws(ICLKernel &kernel)
// Run the kernel
kernel.run(kernel.window(), _queue_profiler);
- CLScheduler::get().sync();
+ _queue_profiler.finish();
const cl_ulong start = _kernel_event.getProfilingInfo<CL_PROFILING_COMMAND_START>();
const cl_ulong end = _kernel_event.getProfilingInfo<CL_PROFILING_COMMAND_END>();
const cl_ulong diff = end - start;
+ _kernel_event = nullptr;
min_exec_time = diff;
}
@@ -219,11 +225,12 @@ cl::NDRange CLTuner::find_optimal_lws(ICLKernel &kernel)
// Run the kernel
kernel.run(kernel.window(), _queue_profiler);
- CLScheduler::get().sync();
+ _queue_profiler.finish();
const cl_ulong start = _kernel_event.getProfilingInfo<CL_PROFILING_COMMAND_START>();
const cl_ulong end = _kernel_event.getProfilingInfo<CL_PROFILING_COMMAND_END>();
const cl_ulong diff = end - start;
+ _kernel_event = nullptr;
// Check the execution time
if(diff < min_exec_time)
@@ -238,9 +245,6 @@ cl::NDRange CLTuner::find_optimal_lws(ICLKernel &kernel)
// Restore real function
CLSymbols::get().clEnqueueNDRangeKernel_ptr = real_clEnqueueNDRangeKernel;
- // Restore queue
- CLScheduler::get().set_queue(_queue);
-
return opt_lws;
}
@@ -258,7 +262,7 @@ const std::unordered_map<std::string, cl::NDRange> &CLTuner::lws_table() const
void CLTuner::load_from_file(const std::string &filename)
{
std::ifstream fs;
- fs.exceptions(std::ifstream::failbit | std::ifstream::badbit);
+ fs.exceptions(std::ifstream::badbit);
fs.open(filename, std::ios::in);
if(fs.is_open())
{
@@ -267,9 +271,9 @@ void CLTuner::load_from_file(const std::string &filename)
{
std::istringstream ss(line);
std::string token;
- if(!std::getline(ss, token, ';'))
+ if(std::getline(ss, token, ';').fail())
{
- ARM_COMPUTE_ERROR("Malformed row '%s' (Should be of the form 'kernel_id;lws[0];lws[1];lws[2]')", ss.str().c_str());
+ ARM_COMPUTE_ERROR("Malformed row '%s' in %s (Should be of the form 'kernel_id;lws[0];lws[1];lws[2]')", ss.str().c_str(), filename.c_str());
}
std::string kernel_id = token;
cl::NDRange lws(1, 1, 1);
@@ -277,7 +281,7 @@ void CLTuner::load_from_file(const std::string &filename)
{
if(std::getline(ss, token, ';').fail())
{
- ARM_COMPUTE_ERROR("Malformed row '%s' (Should be of the form 'kernel_id;lws[0];lws[1];lws[2]')", ss.str().c_str());
+ ARM_COMPUTE_ERROR("Malformed row '%s' in %s (Should be of the form 'kernel_id;lws[0];lws[1];lws[2]')", ss.str().c_str(), filename.c_str());
}
lws.get()[i] = support::cpp11::stoi(token);
}