aboutsummaryrefslogtreecommitdiff
path: root/src/runtime/CL/CLTuner.cpp
diff options
context:
space:
mode:
authorGian Marco <gianmarco.iodice@arm.com>2017-09-08 16:13:11 +0100
committerAnthony Barbier <anthony.barbier@arm.com>2018-11-02 16:35:24 +0000
commitde691f055ac255c798a766483eef63465ac90c75 (patch)
tree0929f439b048ffa2b2fc8222388f9ee14f3a2a2e /src/runtime/CL/CLTuner.cpp
parent54f366afa63522b8c0ea3b0e5e8d3012a4412681 (diff)
downloadComputeLibrary-de691f055ac255c798a766483eef63465ac90c75.tar.gz
COMPMID-524 - Implemented CLTuner object
Change-Id: Idbdbecca1fc299ed042936119d90e2bed8db0938 Reviewed-on: http://mpd-gerrit.cambridge.arm.com/87101 Reviewed-by: Anthony Barbier <anthony.barbier@arm.com> Tested-by: Kaizen <jeremy.johnson+kaizengerrit@arm.com>
Diffstat (limited to 'src/runtime/CL/CLTuner.cpp')
-rw-r--r--src/runtime/CL/CLTuner.cpp118
1 files changed, 118 insertions, 0 deletions
diff --git a/src/runtime/CL/CLTuner.cpp b/src/runtime/CL/CLTuner.cpp
new file mode 100644
index 0000000000..f3300d3f83
--- /dev/null
+++ b/src/runtime/CL/CLTuner.cpp
@@ -0,0 +1,118 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/runtime/CL/CLTuner.h"
+
+#include "arm_compute/core/CL/ICLKernel.h"
+#include "arm_compute/runtime/CL/CLScheduler.h"
+
+#include <chrono>
+#include <limits>
+#include <string>
+
+using namespace arm_compute;
+
+CLTuner::CLTuner()
+ : _lws_table()
+{
+}
+
+void CLTuner::tune_kernel(ICLKernel &kernel)
+{
+ // Get the configuration ID from the kernel
+ const std::string &config_id = kernel.config_id();
+
+ // Check if we need to find the Optimal LWS. If config_id is empty, the kernel does not require to be tuned
+ if(config_id != "")
+ {
+ auto p = _lws_table.find(config_id);
+
+ if(p == _lws_table.end())
+ {
+ // Find the optimal LWS for the kernel
+ cl::NDRange opt_lws = find_optimal_lws(kernel);
+
+ // Insert the optimal LWS in the table
+ _lws_table.emplace(config_id, opt_lws);
+
+ // Set Local-Workgroup-Size
+ kernel.set_lws_hint(opt_lws);
+ }
+ else
+ {
+ // Set Local-Workgroup-Size
+ kernel.set_lws_hint(p->second);
+ }
+ }
+}
+
+cl::NDRange CLTuner::find_optimal_lws(ICLKernel &kernel)
+{
+ cl::CommandQueue q = CLScheduler::get().queue();
+
+ double min_exec_time = std::numeric_limits<double>::max();
+
+ cl::NDRange opt_lws = cl::NDRange(1, 1);
+
+ for(int y = 1; y <= 16; ++y)
+ {
+ for(int x = 1; x <= 16; ++x)
+ {
+ cl::NDRange lws_test = cl::NDRange(x, y);
+
+ //Set the Local-Workgroup-Size
+ kernel.set_lws_hint(lws_test);
+
+ auto t_start = std::chrono::high_resolution_clock::now();
+
+ // Run
+ kernel.run(kernel.window(), q);
+
+ CLScheduler::get().sync();
+
+ auto t_stop = std::chrono::high_resolution_clock::now();
+
+ std::chrono::duration<double, std::nano> fp_nano = t_stop - t_start;
+
+ // Check the execution time
+ if(fp_nano.count() < min_exec_time)
+ {
+ min_exec_time = fp_nano.count();
+ opt_lws = cl::NDRange(x, y);
+ }
+ }
+ }
+
+ return opt_lws;
+}
+
+void CLTuner::import_lws_table(const std::unordered_map<std::string, cl::NDRange> &lws_table)
+{
+ _lws_table.clear();
+ _lws_table = lws_table;
+}
+
+const std::unordered_map<std::string, cl::NDRange> &CLTuner::export_lws_table()
+{
+ return _lws_table;
+} \ No newline at end of file