diff options
author | Manuel Bottini <manuel.bottini@arm.com> | 2021-01-25 15:07:17 +0000 |
---|---|---|
committer | Georgios Pinitas <georgios.pinitas@arm.com> | 2021-02-03 17:35:00 +0000 |
commit | be9f9f9139b759d314f4f2a6d2ee747079666504 (patch) | |
tree | 461690abb95caeaeca40261fd85816a906c8446c /src/runtime/CL/CLTuner.cpp | |
parent | 7061eb283969f9a020c08349454447564e4dd5b3 (diff) | |
download | ComputeLibrary-be9f9f9139b759d314f4f2a6d2ee747079666504.tar.gz |
Add WBSM tuning to CLTuner
Add WBSM as possible parameter to be tuned
Add helper functions to check WBSM support and setting the value in the kernel
Update tuning parameter lists to use WBSM
Update CLTuner to use WBSM
The WBSM tuning is exposed as a parameter to be set at compile time by setting the CLTuningInfo
CLTuningInfo contains information about the tuning mode and if wbsm tuning enabled
Resolves: COMPMID-3936
Change-Id: Id53697c9c6d2cef41c049f368002f6197351b3ed
Signed-off-by: Manuel Bottini <manuel.bottini@arm.com>
Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/4914
Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
Tested-by: Arm Jenkins <bsgcomp@arm.com>
Reviewed-by: Gian Marco Iodice <gianmarco.iodice@arm.com>
Reviewed-by: Georgios Pinitas <georgios.pinitas@arm.com>
Diffstat (limited to 'src/runtime/CL/CLTuner.cpp')
-rw-r--r-- | src/runtime/CL/CLTuner.cpp | 102 |
1 files changed, 70 insertions, 32 deletions
diff --git a/src/runtime/CL/CLTuner.cpp b/src/runtime/CL/CLTuner.cpp index 906021790e..e16d6808ed 100644 --- a/src/runtime/CL/CLTuner.cpp +++ b/src/runtime/CL/CLTuner.cpp @@ -32,13 +32,11 @@ #include <cerrno> #include <fstream> #include <limits> -#include <memory> -#include <string> namespace arm_compute { CLTuner::CLTuner(bool tune_new_kernels, CLTuningInfo tuning_info) - : real_clEnqueueNDRangeKernel(nullptr), _tuning_params_table(), _lws_table(), _kernel_event(), _tune_new_kernels(tune_new_kernels), _tuning_info(tuning_info), _tuner_mode(CLTunerMode::NORMAL) + : real_clEnqueueNDRangeKernel(nullptr), _tuning_params_table(), _lws_table(), _kernel_event(), _tune_new_kernels(tune_new_kernels), _tuning_info(tuning_info) { } @@ -62,12 +60,12 @@ bool CLTuner::tune_new_kernels() const void CLTuner::set_tuner_mode(CLTunerMode mode) { - _tuner_mode = mode; + _tuning_info.tuner_mode = mode; } CLTunerMode CLTuner::get_tuner_mode() const { - return _tuner_mode; + return _tuning_info.tuner_mode; } void CLTuner::tune_kernel_static(ICLKernel &kernel) @@ -103,12 +101,20 @@ void CLTuner::tune_kernel_dynamic(ICLKernel &kernel, ITensorPack &tensors) // Set Local-Workgroup-Size kernel.set_lws_hint(opt_tuning_params.get_lws()); + if(_tuning_info.tune_wbsm) + { + kernel.set_wbsm_hint(opt_tuning_params.get_wbsm()); + } } } else { // Set Local-Workgroup-Size kernel.set_lws_hint(p->second.get_lws()); + if(_tuning_info.tune_wbsm) + { + kernel.set_wbsm_hint(p->second.get_wbsm()); + } } } } @@ -188,13 +194,15 @@ CLTuningParams CLTuner::find_optimal_tuning_params(ICLKernel &kernel, ITensorPac cl_ulong min_exec_time = end - start; _kernel_event = nullptr; - cl::NDRange opt_lws = cl::NullRange; + CLTuningParams opt_tuning_params(cl::NullRange, 0); // Construct the list of tuning parameters values to be tested based on the tuner mode. - auto lws_list = cl_tuner::get_tuning_parameters_list(_tuner_mode, gws); - for(size_t i = 0; i < lws_list->size(); ++i) + auto tuning_list = cl_tuner::get_tuning_parameters_list(_tuning_info, gws); + for(size_t i = 0; i < tuning_list->size(); ++i) { - cl::NDRange lws_test = (*lws_list)[i].get_lws(); + CLTuningParams tuning_test = (*tuning_list)[i]; + // Setting the lws + cl::NDRange lws_test = tuning_test.get_lws(); auto x = lws_test[0]; auto y = lws_test[1]; auto z = lws_test[2]; @@ -205,8 +213,12 @@ CLTuningParams CLTuner::find_optimal_tuning_params(ICLKernel &kernel, ITensorPac continue; } - //Set the Local-Workgroup-Size kernel.set_lws_hint(lws_test); + if(_tuning_info.tune_wbsm && CLKernelLibrary::get().is_wbsm_supported()) + { + cl_int wbsm_test = tuning_test.get_wbsm(); + kernel.set_wbsm_hint(wbsm_test); + } // Run the kernel inject_memory ? kernel.run_op(tensors, kernel.window(), queue_profiler) : kernel.run(kernel.window(), queue_profiler); @@ -222,13 +234,17 @@ CLTuningParams CLTuner::find_optimal_tuning_params(ICLKernel &kernel, ITensorPac if(diff < min_exec_time) { min_exec_time = diff; - opt_lws = cl::NDRange(x, y, z); + opt_tuning_params.set_lws(tuning_test.get_lws()); + if(_tuning_info.tune_wbsm) + { + opt_tuning_params.set_wbsm(tuning_test.get_wbsm()); + } } } // Restore real function CLSymbols::get().clEnqueueNDRangeKernel_ptr = real_clEnqueueNDRangeKernel; - return CLTuningParams(opt_lws); + return opt_tuning_params; } void CLTuner::import_lws_table(const std::unordered_map<std::string, cl::NDRange> &lws_table) @@ -271,34 +287,46 @@ void CLTuner::load_from_file(const std::string &filename) ARM_COMPUTE_ERROR_VAR("Failed to open '%s' (%s [%d])", filename.c_str(), strerror(errno), errno); } std::string line; + bool header_line = true; while(!std::getline(fs, line).fail()) { - std::istringstream ss(line); - std::string token; - if(std::getline(ss, token, ';').fail()) - { - ARM_COMPUTE_ERROR_VAR("Malformed row '%s' in %s (Should be of the form 'kernel_id;lws[0];lws[1];lws[2]')", ss.str().c_str(), filename.c_str()); - } - std::string kernel_id = token; - cl::NDRange lws(1, 1, 1); - for(int i = 0; i < 3; i++) + if(header_line) { - if(std::getline(ss, token, ';').fail()) + header_line = false; + size_t pos_lws = line.find("lws"); + size_t pos_wbsm = line.find("wbsm"); + _tuning_info.tune_wbsm = false; + if(pos_lws != std::string::npos || pos_wbsm != std::string::npos) { - ARM_COMPUTE_ERROR_VAR("Malformed row '%s' in %s (Should be of the form 'kernel_id;lws[0];lws[1];lws[2]')", ss.str().c_str(), filename.c_str()); + // The file has in the first line the parameters it has been tuned on + if(pos_wbsm != std::string::npos) + { + _tuning_info.tune_wbsm = true; + } + // Once the line with the tuning parameter is read we can + // read the next one to start collecting the values + if(std::getline(fs, line).fail()) + { + break; + } } - lws.get()[i] = support::cpp11::stoi(token); } - // If all dimensions are 0: reset to NullRange (i.e nullptr) - if(lws[0] == 0 && lws[1] == 0 && lws[2] == 0) + CLTuningParams tuning_params; + size_t pos = line.find(";"); + if(pos == std::string::npos) { - lws = cl::NullRange; + ARM_COMPUTE_ERROR_VAR("Malformed row '%s' in %s", line.c_str(), filename.c_str()); } - add_tuning_params(kernel_id, lws); + std::string kernel_id = line.substr(0, pos); + line.erase(0, pos + 1); + if(!tuning_params.from_string(_tuning_info, line)) + { + ARM_COMPUTE_ERROR_VAR("Malformed row '%s' in %s", line.c_str(), filename.c_str()); + } + add_tuning_params(kernel_id, tuning_params); } fs.close(); - _tuning_info.tune_lws = true; } bool CLTuner::save_to_file(const std::string &filename) const @@ -307,14 +335,24 @@ bool CLTuner::save_to_file(const std::string &filename) const { return false; } - std::ofstream fs; fs.exceptions(std::ifstream::failbit | std::ifstream::badbit); fs.open(filename, std::ios::out); + std::string header_string = ""; + header_string += "lws"; + if(_tuning_info.tune_wbsm) + { + if(!header_string.empty()) + { + header_string += " "; + } + header_string += "wbsm"; + } + fs << header_string << std::endl; for(auto const &kernel_data : _tuning_params_table) { - const cl::NDRange lws = CLTuningParams(kernel_data.second).get_lws(); - fs << kernel_data.first << ";" << lws[0] << ";" << lws[1] << ";" << lws[2] << std::endl; + CLTuningParams tun_pams(kernel_data.second); + fs << kernel_data.first << tun_pams.to_string(_tuning_info) << std::endl; } fs.close(); return true; |