aboutsummaryrefslogtreecommitdiff
path: root/src/runtime/CL/CLTuner.cpp
diff options
context:
space:
mode:
authorGian Marco <gianmarco.iodice@arm.com>2018-01-25 13:49:44 +0000
committerAnthony Barbier <anthony.barbier@arm.com>2018-11-02 16:45:00 +0000
commitc78d4bca25886d279edfe3914c179261a1bfe505 (patch)
tree14943572fafb2f00855a192c8f41b1a6edaa32c4 /src/runtime/CL/CLTuner.cpp
parent0434d75454bd83954ab3323e6384dcf0e3adbb5e (diff)
downloadComputeLibrary-c78d4bca25886d279edfe3914c179261a1bfe505.tar.gz
COMPMID-765 - Added third dimension for CLTuner
Change-Id: I0a7ea4cde1dbf8edd28908dfff80928ef7e996c4 Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/117647 Reviewed-by: Georgios Pinitas <georgios.pinitas@arm.com> Tested-by: Jenkins <bsgcomp@arm.com>
Diffstat (limited to 'src/runtime/CL/CLTuner.cpp')
-rw-r--r--src/runtime/CL/CLTuner.cpp51
1 files changed, 33 insertions, 18 deletions
diff --git a/src/runtime/CL/CLTuner.cpp b/src/runtime/CL/CLTuner.cpp
index 7f5be86833..351f6751c3 100644
--- a/src/runtime/CL/CLTuner.cpp
+++ b/src/runtime/CL/CLTuner.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017 ARM Limited.
+ * Copyright (c) 2017-2018 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -72,33 +72,48 @@ cl::NDRange CLTuner::find_optimal_lws(ICLKernel &kernel)
double min_exec_time = std::numeric_limits<double>::max();
- cl::NDRange opt_lws = cl::NDRange(1, 1);
+ cl::NDRange opt_lws = cl::NDRange(1, 1, 1);
- for(int y = 1; y <= 16; ++y)
+ const int x_step = std::max(1, kernel.window().x().step());
+ const int y_step = std::max(1, kernel.window().y().step());
+ const int z_step = std::max(1, kernel.window().z().step());
+ const int x_end = kernel.window().x().end() - kernel.window().x().start() / x_step > 1 ? 16 : 1;
+ const int y_end = kernel.window().y().end() - kernel.window().y().start() / y_step > 1 ? 16 : 1;
+ const int z_end = kernel.window().z().end() - kernel.window().z().start() / z_step > 1 ? 8 : 1;
+
+ for(int z = 1; z <= z_end; ++z)
{
- for(int x = 1; x <= 16; ++x)
+ for(int y = 1; y <= y_end; ++y)
{
- cl::NDRange lws_test = cl::NDRange(x, y);
+ for(int x = 1; x <= x_end; ++x)
+ {
+ if(x == 1 && y == 1 && z == 1)
+ {
+ continue;
+ }
- //Set the Local-Workgroup-Size
- kernel.set_lws_hint(lws_test);
+ cl::NDRange lws_test = cl::NDRange(x, y, z);
- auto t_start = std::chrono::high_resolution_clock::now();
+ //Set the Local-Workgroup-Size
+ kernel.set_lws_hint(lws_test);
- // Run
- kernel.run(kernel.window(), q);
+ auto t_start = std::chrono::high_resolution_clock::now();
- CLScheduler::get().sync();
+ // Run
+ kernel.run(kernel.window(), q);
- auto t_stop = std::chrono::high_resolution_clock::now();
+ CLScheduler::get().sync();
- std::chrono::duration<double, std::nano> fp_nano = t_stop - t_start;
+ auto t_stop = std::chrono::high_resolution_clock::now();
- // Check the execution time
- if(fp_nano.count() < min_exec_time)
- {
- min_exec_time = fp_nano.count();
- opt_lws = cl::NDRange(x, y);
+ std::chrono::duration<double, std::nano> fp_nano = t_stop - t_start;
+
+ // Check the execution time
+ if(fp_nano.count() < min_exec_time)
+ {
+ min_exec_time = fp_nano.count();
+ opt_lws = cl::NDRange(x, y, z);
+ }
}
}
}