diff options
Diffstat (limited to 'src')
-rw-r--r-- | src/runtime/CL/CLTuner.cpp | 51 |
1 files changed, 33 insertions, 18 deletions
diff --git a/src/runtime/CL/CLTuner.cpp b/src/runtime/CL/CLTuner.cpp index 7f5be86833..351f6751c3 100644 --- a/src/runtime/CL/CLTuner.cpp +++ b/src/runtime/CL/CLTuner.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017 ARM Limited. + * Copyright (c) 2017-2018 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -72,33 +72,48 @@ cl::NDRange CLTuner::find_optimal_lws(ICLKernel &kernel) double min_exec_time = std::numeric_limits<double>::max(); - cl::NDRange opt_lws = cl::NDRange(1, 1); + cl::NDRange opt_lws = cl::NDRange(1, 1, 1); - for(int y = 1; y <= 16; ++y) + const int x_step = std::max(1, kernel.window().x().step()); + const int y_step = std::max(1, kernel.window().y().step()); + const int z_step = std::max(1, kernel.window().z().step()); + const int x_end = kernel.window().x().end() - kernel.window().x().start() / x_step > 1 ? 16 : 1; + const int y_end = kernel.window().y().end() - kernel.window().y().start() / y_step > 1 ? 16 : 1; + const int z_end = kernel.window().z().end() - kernel.window().z().start() / z_step > 1 ? 8 : 1; + + for(int z = 1; z <= z_end; ++z) { - for(int x = 1; x <= 16; ++x) + for(int y = 1; y <= y_end; ++y) { - cl::NDRange lws_test = cl::NDRange(x, y); + for(int x = 1; x <= x_end; ++x) + { + if(x == 1 && y == 1 && z == 1) + { + continue; + } - //Set the Local-Workgroup-Size - kernel.set_lws_hint(lws_test); + cl::NDRange lws_test = cl::NDRange(x, y, z); - auto t_start = std::chrono::high_resolution_clock::now(); + //Set the Local-Workgroup-Size + kernel.set_lws_hint(lws_test); - // Run - kernel.run(kernel.window(), q); + auto t_start = std::chrono::high_resolution_clock::now(); - CLScheduler::get().sync(); + // Run + kernel.run(kernel.window(), q); - auto t_stop = std::chrono::high_resolution_clock::now(); + CLScheduler::get().sync(); - std::chrono::duration<double, std::nano> fp_nano = t_stop - t_start; + auto t_stop = std::chrono::high_resolution_clock::now(); - // Check the execution time - if(fp_nano.count() < min_exec_time) - { - min_exec_time = fp_nano.count(); - opt_lws = cl::NDRange(x, y); + std::chrono::duration<double, std::nano> fp_nano = t_stop - t_start; + + // Check the execution time + if(fp_nano.count() < min_exec_time) + { + min_exec_time = fp_nano.count(); + opt_lws = cl::NDRange(x, y, z); + } } } } |