aboutsummaryrefslogtreecommitdiff
path: root/src/runtime/CL/CLTuner.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'src/runtime/CL/CLTuner.cpp')
-rw-r--r--src/runtime/CL/CLTuner.cpp102
1 files changed, 70 insertions, 32 deletions
diff --git a/src/runtime/CL/CLTuner.cpp b/src/runtime/CL/CLTuner.cpp
index 906021790e..e16d6808ed 100644
--- a/src/runtime/CL/CLTuner.cpp
+++ b/src/runtime/CL/CLTuner.cpp
@@ -32,13 +32,11 @@
#include <cerrno>
#include <fstream>
#include <limits>
-#include <memory>
-#include <string>
namespace arm_compute
{
CLTuner::CLTuner(bool tune_new_kernels, CLTuningInfo tuning_info)
- : real_clEnqueueNDRangeKernel(nullptr), _tuning_params_table(), _lws_table(), _kernel_event(), _tune_new_kernels(tune_new_kernels), _tuning_info(tuning_info), _tuner_mode(CLTunerMode::NORMAL)
+ : real_clEnqueueNDRangeKernel(nullptr), _tuning_params_table(), _lws_table(), _kernel_event(), _tune_new_kernels(tune_new_kernels), _tuning_info(tuning_info)
{
}
@@ -62,12 +60,12 @@ bool CLTuner::tune_new_kernels() const
void CLTuner::set_tuner_mode(CLTunerMode mode)
{
- _tuner_mode = mode;
+ _tuning_info.tuner_mode = mode;
}
CLTunerMode CLTuner::get_tuner_mode() const
{
- return _tuner_mode;
+ return _tuning_info.tuner_mode;
}
void CLTuner::tune_kernel_static(ICLKernel &kernel)
@@ -103,12 +101,20 @@ void CLTuner::tune_kernel_dynamic(ICLKernel &kernel, ITensorPack &tensors)
// Set Local-Workgroup-Size
kernel.set_lws_hint(opt_tuning_params.get_lws());
+ if(_tuning_info.tune_wbsm)
+ {
+ kernel.set_wbsm_hint(opt_tuning_params.get_wbsm());
+ }
}
}
else
{
// Set Local-Workgroup-Size
kernel.set_lws_hint(p->second.get_lws());
+ if(_tuning_info.tune_wbsm)
+ {
+ kernel.set_wbsm_hint(p->second.get_wbsm());
+ }
}
}
}
@@ -188,13 +194,15 @@ CLTuningParams CLTuner::find_optimal_tuning_params(ICLKernel &kernel, ITensorPac
cl_ulong min_exec_time = end - start;
_kernel_event = nullptr;
- cl::NDRange opt_lws = cl::NullRange;
+ CLTuningParams opt_tuning_params(cl::NullRange, 0);
// Construct the list of tuning parameters values to be tested based on the tuner mode.
- auto lws_list = cl_tuner::get_tuning_parameters_list(_tuner_mode, gws);
- for(size_t i = 0; i < lws_list->size(); ++i)
+ auto tuning_list = cl_tuner::get_tuning_parameters_list(_tuning_info, gws);
+ for(size_t i = 0; i < tuning_list->size(); ++i)
{
- cl::NDRange lws_test = (*lws_list)[i].get_lws();
+ CLTuningParams tuning_test = (*tuning_list)[i];
+ // Setting the lws
+ cl::NDRange lws_test = tuning_test.get_lws();
auto x = lws_test[0];
auto y = lws_test[1];
auto z = lws_test[2];
@@ -205,8 +213,12 @@ CLTuningParams CLTuner::find_optimal_tuning_params(ICLKernel &kernel, ITensorPac
continue;
}
- //Set the Local-Workgroup-Size
kernel.set_lws_hint(lws_test);
+ if(_tuning_info.tune_wbsm && CLKernelLibrary::get().is_wbsm_supported())
+ {
+ cl_int wbsm_test = tuning_test.get_wbsm();
+ kernel.set_wbsm_hint(wbsm_test);
+ }
// Run the kernel
inject_memory ? kernel.run_op(tensors, kernel.window(), queue_profiler) : kernel.run(kernel.window(), queue_profiler);
@@ -222,13 +234,17 @@ CLTuningParams CLTuner::find_optimal_tuning_params(ICLKernel &kernel, ITensorPac
if(diff < min_exec_time)
{
min_exec_time = diff;
- opt_lws = cl::NDRange(x, y, z);
+ opt_tuning_params.set_lws(tuning_test.get_lws());
+ if(_tuning_info.tune_wbsm)
+ {
+ opt_tuning_params.set_wbsm(tuning_test.get_wbsm());
+ }
}
}
// Restore real function
CLSymbols::get().clEnqueueNDRangeKernel_ptr = real_clEnqueueNDRangeKernel;
- return CLTuningParams(opt_lws);
+ return opt_tuning_params;
}
void CLTuner::import_lws_table(const std::unordered_map<std::string, cl::NDRange> &lws_table)
@@ -271,34 +287,46 @@ void CLTuner::load_from_file(const std::string &filename)
ARM_COMPUTE_ERROR_VAR("Failed to open '%s' (%s [%d])", filename.c_str(), strerror(errno), errno);
}
std::string line;
+ bool header_line = true;
while(!std::getline(fs, line).fail())
{
- std::istringstream ss(line);
- std::string token;
- if(std::getline(ss, token, ';').fail())
- {
- ARM_COMPUTE_ERROR_VAR("Malformed row '%s' in %s (Should be of the form 'kernel_id;lws[0];lws[1];lws[2]')", ss.str().c_str(), filename.c_str());
- }
- std::string kernel_id = token;
- cl::NDRange lws(1, 1, 1);
- for(int i = 0; i < 3; i++)
+ if(header_line)
{
- if(std::getline(ss, token, ';').fail())
+ header_line = false;
+ size_t pos_lws = line.find("lws");
+ size_t pos_wbsm = line.find("wbsm");
+ _tuning_info.tune_wbsm = false;
+ if(pos_lws != std::string::npos || pos_wbsm != std::string::npos)
{
- ARM_COMPUTE_ERROR_VAR("Malformed row '%s' in %s (Should be of the form 'kernel_id;lws[0];lws[1];lws[2]')", ss.str().c_str(), filename.c_str());
+ // The file has in the first line the parameters it has been tuned on
+ if(pos_wbsm != std::string::npos)
+ {
+ _tuning_info.tune_wbsm = true;
+ }
+ // Once the line with the tuning parameter is read we can
+ // read the next one to start collecting the values
+ if(std::getline(fs, line).fail())
+ {
+ break;
+ }
}
- lws.get()[i] = support::cpp11::stoi(token);
}
- // If all dimensions are 0: reset to NullRange (i.e nullptr)
- if(lws[0] == 0 && lws[1] == 0 && lws[2] == 0)
+ CLTuningParams tuning_params;
+ size_t pos = line.find(";");
+ if(pos == std::string::npos)
{
- lws = cl::NullRange;
+ ARM_COMPUTE_ERROR_VAR("Malformed row '%s' in %s", line.c_str(), filename.c_str());
}
- add_tuning_params(kernel_id, lws);
+ std::string kernel_id = line.substr(0, pos);
+ line.erase(0, pos + 1);
+ if(!tuning_params.from_string(_tuning_info, line))
+ {
+ ARM_COMPUTE_ERROR_VAR("Malformed row '%s' in %s", line.c_str(), filename.c_str());
+ }
+ add_tuning_params(kernel_id, tuning_params);
}
fs.close();
- _tuning_info.tune_lws = true;
}
bool CLTuner::save_to_file(const std::string &filename) const
@@ -307,14 +335,24 @@ bool CLTuner::save_to_file(const std::string &filename) const
{
return false;
}
-
std::ofstream fs;
fs.exceptions(std::ifstream::failbit | std::ifstream::badbit);
fs.open(filename, std::ios::out);
+ std::string header_string = "";
+ header_string += "lws";
+ if(_tuning_info.tune_wbsm)
+ {
+ if(!header_string.empty())
+ {
+ header_string += " ";
+ }
+ header_string += "wbsm";
+ }
+ fs << header_string << std::endl;
for(auto const &kernel_data : _tuning_params_table)
{
- const cl::NDRange lws = CLTuningParams(kernel_data.second).get_lws();
- fs << kernel_data.first << ";" << lws[0] << ";" << lws[1] << ";" << lws[2] << std::endl;
+ CLTuningParams tun_pams(kernel_data.second);
+ fs << kernel_data.first << tun_pams.to_string(_tuning_info) << std::endl;
}
fs.close();
return true;