diff options
-rw-r--r-- | arm_compute/runtime/CL/CLTuner.h | 5 | ||||
-rw-r--r-- | arm_compute/runtime/CL/ICLTuner.h | 13 | ||||
-rw-r--r-- | arm_compute/runtime/CL/tuners/BifrostTuner.h | 3 | ||||
-rw-r--r-- | arm_compute/runtime/CL/tuners/MidgardTuner.h | 3 | ||||
-rw-r--r-- | src/runtime/CL/CLScheduler.cpp | 14 | ||||
-rw-r--r-- | src/runtime/CL/CLTuner.cpp | 16 | ||||
-rw-r--r-- | src/runtime/CL/tuners/BifrostTuner.cpp | 5 | ||||
-rw-r--r-- | src/runtime/CL/tuners/MidgardTuner.cpp | 7 |
8 files changed, 44 insertions, 22 deletions
diff --git a/arm_compute/runtime/CL/CLTuner.h b/arm_compute/runtime/CL/CLTuner.h index 745d57a959..aa31181d2d 100644 --- a/arm_compute/runtime/CL/CLTuner.h +++ b/arm_compute/runtime/CL/CLTuner.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2019 Arm Limited. + * Copyright (c) 2017-2020 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -116,6 +116,7 @@ public: // Inherited methods overridden: void tune_kernel_static(ICLKernel &kernel) override; void tune_kernel_dynamic(ICLKernel &kernel) override; + void tune_kernel_dynamic(ICLKernel &kernel, const InputTensorMap &inputs, const OutputTensorMap &outputs) override; /** Is the kernel_event set ? * @@ -130,7 +131,7 @@ private: * * @return The optimal LWS to use */ - cl::NDRange find_optimal_lws(ICLKernel &kernel); + cl::NDRange find_optimal_lws(ICLKernel &kernel, const InputTensorMap &inputs, const OutputTensorMap &outputs); std::unordered_map<std::string, cl::NDRange> _lws_table; cl::Event _kernel_event; diff --git a/arm_compute/runtime/CL/ICLTuner.h b/arm_compute/runtime/CL/ICLTuner.h index 0b238180eb..4bc8ddf632 100644 --- a/arm_compute/runtime/CL/ICLTuner.h +++ b/arm_compute/runtime/CL/ICLTuner.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2019 Arm Limited. + * Copyright (c) 2017-2020 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -24,6 +24,8 @@ #ifndef ARM_COMPUTE_ICLTUNER_H #define ARM_COMPUTE_ICLTUNER_H +#include "arm_compute/core/experimental/Types.h" + namespace arm_compute { class ICLKernel; @@ -49,6 +51,13 @@ public: * @param[in] kernel Kernel to tune */ virtual void tune_kernel_dynamic(ICLKernel &kernel) = 0; + /** Tune OpenCL kernel dynamically + * + * @param[in] kernel Kernel to tune + * @param[in] inputs Inputs for the kernel to use + * @param[in, out] outputs Outputs for the kernel to use + */ + virtual void tune_kernel_dynamic(ICLKernel &kernel, const InputTensorMap &inputs, const OutputTensorMap &outputs) = 0; }; -} +} // namespace arm_compute #endif /*ARM_COMPUTE_ICLTUNER_H */ diff --git a/arm_compute/runtime/CL/tuners/BifrostTuner.h b/arm_compute/runtime/CL/tuners/BifrostTuner.h index b7ce6e96f9..830f7d9067 100644 --- a/arm_compute/runtime/CL/tuners/BifrostTuner.h +++ b/arm_compute/runtime/CL/tuners/BifrostTuner.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2019 Arm Limited. + * Copyright (c) 2018-2020 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -37,6 +37,7 @@ public: // Inherited overriden methods void tune_kernel_static(ICLKernel &kernel) override; void tune_kernel_dynamic(ICLKernel &kernel) override; + void tune_kernel_dynamic(ICLKernel &kernel, const InputTensorMap &inputs, const OutputTensorMap &outputs) override; }; } // namespace tuners } // namespace arm_compute diff --git a/arm_compute/runtime/CL/tuners/MidgardTuner.h b/arm_compute/runtime/CL/tuners/MidgardTuner.h index 418b80728d..c702e7a2aa 100644 --- a/arm_compute/runtime/CL/tuners/MidgardTuner.h +++ b/arm_compute/runtime/CL/tuners/MidgardTuner.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2019 Arm Limited. + * Copyright (c) 2018-2020 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -37,6 +37,7 @@ public: // Inherited overriden methods void tune_kernel_static(ICLKernel &kernel) override; void tune_kernel_dynamic(ICLKernel &kernel) override; + void tune_kernel_dynamic(ICLKernel &kernel, const InputTensorMap &inputs, const OutputTensorMap &outputs) override; }; } // namespace tuners } // namespace arm_compute diff --git a/src/runtime/CL/CLScheduler.cpp b/src/runtime/CL/CLScheduler.cpp index 56f5f212a8..5ef66f456a 100644 --- a/src/runtime/CL/CLScheduler.cpp +++ b/src/runtime/CL/CLScheduler.cpp @@ -157,22 +157,16 @@ void CLScheduler::enqueue_common(ICLKernel &kernel, const InputTensorMap &inputs "The CLScheduler is not initialised yet! Please call the CLScheduler::get().default_init(), \ or CLScheduler::get()::init() and CLKernelLibrary::get()::init() function before running functions!"); + const bool inject_memory = !inputs.empty(); + // Tune the kernel if the CLTuner has been provided if(_cl_tuner != nullptr) { - // Tune the OpenCL kernel - _cl_tuner->tune_kernel_dynamic(kernel); + inject_memory ? _cl_tuner->tune_kernel_dynamic(kernel, inputs, outputs) : _cl_tuner->tune_kernel_dynamic(kernel); } // Run kernel - if(inputs.empty()) - { - kernel.run(kernel.window(), _queue); - } - else - { - kernel.run_op(inputs, outputs, kernel.window(), _queue); - } + inject_memory ? kernel.run_op(inputs, outputs, kernel.window(), _queue) : kernel.run(kernel.window(), _queue); if(flush) { diff --git a/src/runtime/CL/CLTuner.cpp b/src/runtime/CL/CLTuner.cpp index e3119c1db9..b2e3476e20 100644 --- a/src/runtime/CL/CLTuner.cpp +++ b/src/runtime/CL/CLTuner.cpp @@ -77,6 +77,11 @@ void CLTuner::tune_kernel_static(ICLKernel &kernel) void CLTuner::tune_kernel_dynamic(ICLKernel &kernel) { + tune_kernel_dynamic(kernel, {}, {}); +} + +void CLTuner::tune_kernel_dynamic(ICLKernel &kernel, const InputTensorMap &inputs, const OutputTensorMap &outputs) +{ // Get the configuration ID from the kernel and append GPU target name and number of available compute units const std::string config_id = kernel.config_id() + "_" + string_from_target(kernel.get_target()) + "_MP" + support::cpp11::to_string(CLKernelLibrary::get().get_num_compute_units()); @@ -90,7 +95,7 @@ void CLTuner::tune_kernel_dynamic(ICLKernel &kernel) if(_tune_new_kernels) { // Find the optimal LWS for the kernel - cl::NDRange opt_lws = find_optimal_lws(kernel); + cl::NDRange opt_lws = find_optimal_lws(kernel, inputs, outputs); // Insert the optimal LWS in the table add_lws_to_table(config_id, opt_lws); @@ -112,7 +117,7 @@ void CLTuner::add_lws_to_table(const std::string &kernel_id, cl::NDRange optimal _lws_table.emplace(kernel_id, optimal_lws); } -cl::NDRange CLTuner::find_optimal_lws(ICLKernel &kernel) +cl::NDRange CLTuner::find_optimal_lws(ICLKernel &kernel, const InputTensorMap &inputs, const OutputTensorMap &outputs) { // Profiling queue cl::CommandQueue queue_profiler; @@ -167,7 +172,8 @@ cl::NDRange CLTuner::find_optimal_lws(ICLKernel &kernel) cl::NDRange gws = ICLKernel::gws_from_window(kernel.window()); // Run the kernel with default lws to be used as baseline - kernel.run(kernel.window(), queue_profiler); + const bool inject_memory = !inputs.empty(); + inject_memory ? kernel.run_op(inputs, outputs, kernel.window(), queue_profiler) : kernel.run(kernel.window(), queue_profiler); queue_profiler.finish(); @@ -178,7 +184,7 @@ cl::NDRange CLTuner::find_optimal_lws(ICLKernel &kernel) cl::NDRange opt_lws = cl::NullRange; - //Construct the list of LWS values to be tested based on the tuner mode. + // Construct the list of LWS values to be tested based on the tuner mode. auto lws_list = cl_tuner::CLLWSListFactory::get_lws_list(_tuner_mode, gws); for(size_t i = 0; i < lws_list->size(); ++i) { @@ -197,7 +203,7 @@ cl::NDRange CLTuner::find_optimal_lws(ICLKernel &kernel) kernel.set_lws_hint(lws_test); // Run the kernel - kernel.run(kernel.window(), queue_profiler); + inject_memory ? kernel.run_op(inputs, outputs, kernel.window(), queue_profiler) : kernel.run(kernel.window(), queue_profiler); queue_profiler.finish(); diff --git a/src/runtime/CL/tuners/BifrostTuner.cpp b/src/runtime/CL/tuners/BifrostTuner.cpp index 3fecd04455..1797c2ceb1 100644 --- a/src/runtime/CL/tuners/BifrostTuner.cpp +++ b/src/runtime/CL/tuners/BifrostTuner.cpp @@ -315,5 +315,10 @@ void BifrostTuner::tune_kernel_dynamic(ICLKernel &kernel) { ARM_COMPUTE_UNUSED(kernel); } + +void BifrostTuner::tune_kernel_dynamic(ICLKernel &kernel, const InputTensorMap &inputs, const OutputTensorMap &outputs) +{ + ARM_COMPUTE_UNUSED(kernel, inputs, outputs); +} } // namespace tuners } // namespace arm_compute
\ No newline at end of file diff --git a/src/runtime/CL/tuners/MidgardTuner.cpp b/src/runtime/CL/tuners/MidgardTuner.cpp index a95ca1998e..68c98cebe7 100644 --- a/src/runtime/CL/tuners/MidgardTuner.cpp +++ b/src/runtime/CL/tuners/MidgardTuner.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018 Arm Limited. + * Copyright (c) 2018-2020 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -73,5 +73,10 @@ void MidgardTuner::tune_kernel_dynamic(ICLKernel &kernel) { ARM_COMPUTE_UNUSED(kernel); } + +void MidgardTuner::tune_kernel_dynamic(ICLKernel &kernel, const InputTensorMap &inputs, const OutputTensorMap &outputs) +{ + ARM_COMPUTE_UNUSED(kernel, inputs, outputs); +} } // namespace tuners } // namespace arm_compute |