From b56c1758dfc233452ff73149fabe30e1c460e9d3 Mon Sep 17 00:00:00 2001 From: Manuel Bottini Date: Wed, 18 Nov 2020 17:56:30 +0000 Subject: Generalization of CLTuner Rename lws to tuning parameters in functions used externally Add new generalized objects for the OpenCL Tuner to accommodate further possible tuning parameters Resolves: COMPMID-3935 Change-Id: I0f2a0f89bca5dae4a4e4adce2f7c7cae32ecb84a Signed-off-by: Manuel Bottini Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/4584 Comments-Addressed: Arm Jenkins Tested-by: Arm Jenkins Reviewed-by: Georgios Pinitas --- src/runtime/CL/CLTuner.cpp | 80 ++++++++++++++++++++++++++++++++-------------- 1 file changed, 56 insertions(+), 24 deletions(-) (limited to 'src/runtime/CL/CLTuner.cpp') diff --git a/src/runtime/CL/CLTuner.cpp b/src/runtime/CL/CLTuner.cpp index ed85e606cf..bcc50f6c28 100644 --- a/src/runtime/CL/CLTuner.cpp +++ b/src/runtime/CL/CLTuner.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2020 Arm Limited. + * Copyright (c) 2017-2021 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -22,7 +22,7 @@ * SOFTWARE. */ #include "arm_compute/runtime/CL/CLTuner.h" -#include "arm_compute/runtime/CL/tuners/CLLWSList.h" +#include "arm_compute/runtime/CL/tuners/CLTuningParametersList.h" #include "arm_compute/core/Error.h" #include "arm_compute/runtime/CL/CLScheduler.h" @@ -38,8 +38,8 @@ namespace arm_compute { -CLTuner::CLTuner(bool tune_new_kernels) - : real_clEnqueueNDRangeKernel(nullptr), _lws_table(), _kernel_event(), _tune_new_kernels(tune_new_kernels), _tuner_mode(CLTunerMode::NORMAL) +CLTuner::CLTuner(bool tune_new_kernels, CLTuningInfo tuning_info) + : real_clEnqueueNDRangeKernel(nullptr), _tuning_params_table(), _lws_table(), _kernel_event(), _tune_new_kernels(tune_new_kernels), _tuning_info(tuning_info), _tuner_mode(CLTunerMode::NORMAL) { } @@ -65,6 +65,7 @@ void CLTuner::set_tuner_mode(CLTunerMode mode) { _tuner_mode = mode; } + CLTunerMode CLTuner::get_tuner_mode() const { return _tuner_mode; @@ -89,36 +90,41 @@ void CLTuner::tune_kernel_dynamic(ICLKernel &kernel, ITensorPack &tensors) // Check if we need to find the Optimal LWS. If the kernel's config_id is equal to default_config_id, the kernel does not require to be tuned if(kernel.config_id() != arm_compute::default_config_id) { - auto p = _lws_table.find(config_id); + auto p = _tuning_params_table.find(config_id); - if(p == _lws_table.end()) + if(p == _tuning_params_table.end()) { if(_tune_new_kernels) { // Find the optimal LWS for the kernel - cl::NDRange opt_lws = find_optimal_lws(kernel, tensors); + CLTuningParams opt_tuning_params = find_optimal_tuning_params(kernel, tensors); // Insert the optimal LWS in the table - add_lws_to_table(config_id, opt_lws); + add_tuning_params(config_id, opt_tuning_params); // Set Local-Workgroup-Size - kernel.set_lws_hint(opt_lws); + kernel.set_lws_hint(opt_tuning_params.get_lws()); } } else { // Set Local-Workgroup-Size - kernel.set_lws_hint(p->second); + kernel.set_lws_hint(p->second.get_lws()); } } } void CLTuner::add_lws_to_table(const std::string &kernel_id, cl::NDRange optimal_lws) { - _lws_table.emplace(kernel_id, optimal_lws); + add_tuning_params(kernel_id, CLTuningParams(optimal_lws)); } -cl::NDRange CLTuner::find_optimal_lws(ICLKernel &kernel, ITensorPack &tensors) +void CLTuner::add_tuning_params(const std::string &kernel_id, CLTuningParams optimal_tuning_params) +{ + _tuning_params_table.emplace(kernel_id, optimal_tuning_params); +} + +CLTuningParams CLTuner::find_optimal_tuning_params(ICLKernel &kernel, ITensorPack &tensors) { // Profiling queue cl::CommandQueue queue_profiler; @@ -185,11 +191,11 @@ cl::NDRange CLTuner::find_optimal_lws(ICLKernel &kernel, ITensorPack &tensors) cl::NDRange opt_lws = cl::NullRange; - // Construct the list of LWS values to be tested based on the tuner mode. - auto lws_list = cl_tuner::CLLWSListFactory::get_lws_list(_tuner_mode, gws); + // Construct the list of tuning parameters values to be tested based on the tuner mode. + auto lws_list = cl_tuner::get_tuning_parameters_list(_tuner_mode, gws); for(size_t i = 0; i < lws_list->size(); ++i) { - cl::NDRange lws_test = (*lws_list)[i]; + cl::NDRange lws_test = (*lws_list)[i].get_lws(); auto x = lws_test[0]; auto y = lws_test[1]; auto z = lws_test[2]; @@ -223,21 +229,39 @@ cl::NDRange CLTuner::find_optimal_lws(ICLKernel &kernel, ITensorPack &tensors) // Restore real function CLSymbols::get().clEnqueueNDRangeKernel_ptr = real_clEnqueueNDRangeKernel; - - return opt_lws; + return CLTuningParams(opt_lws); } void CLTuner::import_lws_table(const std::unordered_map &lws_table) { - _lws_table.clear(); - _lws_table = lws_table; + _tuning_params_table.clear(); + for(auto && params : lws_table) + { + add_tuning_params(params.first, CLTuningParams(params.second)); + } } -const std::unordered_map &CLTuner::lws_table() const +const std::unordered_map &CLTuner::lws_table() { + _lws_table.clear(); + for(auto && params : _tuning_params_table) + { + _lws_table.emplace(params.first, params.second.get_lws()); + } return _lws_table; } +const std::unordered_map &CLTuner::tuning_params_table() const +{ + return _tuning_params_table; +} + +void CLTuner::import_tuning_params(const std::unordered_map &tuning_params_table) +{ + _tuning_params_table.clear(); + _tuning_params_table = tuning_params_table; +} + void CLTuner::load_from_file(const std::string &filename) { std::ifstream fs; @@ -272,20 +296,28 @@ void CLTuner::load_from_file(const std::string &filename) { lws = cl::NullRange; } - add_lws_to_table(kernel_id, lws); + add_tuning_params(kernel_id, lws); } fs.close(); + _tuning_info.tune_lws = true; } -void CLTuner::save_to_file(const std::string &filename) const +bool CLTuner::save_to_file(const std::string &filename) const { + if(!_tune_new_kernels || _tuning_params_table.empty() || filename.empty()) + { + return false; + } + std::ofstream fs; fs.exceptions(std::ifstream::failbit | std::ifstream::badbit); fs.open(filename, std::ios::out); - for(auto const &kernel_data : _lws_table) + for(auto const &kernel_data : _tuning_params_table) { - fs << kernel_data.first << ";" << kernel_data.second[0] << ";" << kernel_data.second[1] << ";" << kernel_data.second[2] << std::endl; + const cl::NDRange lws = CLTuningParams(kernel_data.second).get_lws(); + fs << kernel_data.first << ";" << lws[0] << ";" << lws[1] << ";" << lws[2] << std::endl; } fs.close(); + return true; } } // namespace arm_compute -- cgit v1.2.1