diff options
author | SiCong Li <sicong.li@arm.com> | 2022-04-07 17:41:51 +0100 |
---|---|---|
committer | SiCong Li <sicong.li@arm.com> | 2022-04-14 12:59:27 +0000 |
commit | 0a486cf66c70b4bd9b0ea8ba9dc5b42f52ed16c3 (patch) | |
tree | 88eea0a182ea6dd8ec45b6aca7843d98cfd73764 /src/runtime/CL/CLScheduler.cpp | |
parent | ca364dfd87cab4cdb9179b68c42f10ff16e55002 (diff) | |
download | ComputeLibrary-0a486cf66c70b4bd9b0ea8ba9dc5b42f52ed16c3.tar.gz |
Enable dynamic cl tuning for dynamically fused kernels
* Add new tune_kernel_dynamic interface
* Add generate_config_id
Resolves: COMPMID-5154
Signed-off-by: SiCong Li <sicong.li@arm.com>
Change-Id: I39870e59fceda875487970061ceb2048995c5a45
Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/7400
Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
Reviewed-by: Giorgio Arena <giorgio.arena@arm.com>
Tested-by: Arm Jenkins <bsgcomp@arm.com>
Diffstat (limited to 'src/runtime/CL/CLScheduler.cpp')
-rw-r--r-- | src/runtime/CL/CLScheduler.cpp | 38 |
1 files changed, 23 insertions, 15 deletions
diff --git a/src/runtime/CL/CLScheduler.cpp b/src/runtime/CL/CLScheduler.cpp index 18fd52232d..4cff707f1a 100644 --- a/src/runtime/CL/CLScheduler.cpp +++ b/src/runtime/CL/CLScheduler.cpp @@ -181,19 +181,12 @@ void CLScheduler::enqueue_common(ICLKernel &kernel, ITensorPack &tensors, bool f // Run kernel inject_memory ? kernel.run_op(tensors, kernel.window(), _queue) : kernel.run(kernel.window(), _queue); - if(_job_chaining_enabled) { - if(++_job_chaining_count >= _job_chaining_size) - { - _job_chaining_count = 0; - _queue.flush(); - } - } - else if(flush) - { - _queue.flush(); + ++_job_chaining_count; } + + flush_queue(flush); } #if defined(ENABLE_EXPERIMENTAL_DYNAMIC_FUSION) @@ -204,14 +197,31 @@ void CLScheduler::enqueue_common(ICLKernel &kernel, experimental::dynamic_fusion "The CLScheduler is not initialised yet! Please call the CLScheduler::get().default_init(), \ or CLScheduler::get()::init() and CLKernelLibrary::get()::init() function before running functions!"); - const bool inject_memory = !tensors._binding.empty(); + // ClCompositeKernel is stateless thus alway requires memory injection + + // Tune the kernel if the CLTuner has been provided + if(_cl_tuner != nullptr) + { + _cl_tuner->tune_kernel_dynamic(kernel, tensors, exec_desc); + } // Run kernel - inject_memory ? kernel.run_composite_op(tensors, kernel.window(), _queue, exec_desc) : kernel.run(kernel.window(), _queue); + kernel.run_composite_op(tensors, kernel.window(), _queue, exec_desc); + if(_job_chaining_enabled) + { + ++_job_chaining_count; + } + + flush_queue(flush); +} + +#endif // defined(ENABLE_EXPERIMENTAL_DYNAMIC_FUSION) +void CLScheduler::flush_queue(bool flush) +{ if(_job_chaining_enabled) { - if(++_job_chaining_count >= _job_chaining_size) + if(_job_chaining_count >= _job_chaining_size) { _job_chaining_count = 0; _queue.flush(); @@ -223,8 +233,6 @@ void CLScheduler::enqueue_common(ICLKernel &kernel, experimental::dynamic_fusion } } -#endif // defined(ENABLE_EXPERIMENTAL_DYNAMIC_FUSION) - void CLScheduler::enqueue(ICLKernel &kernel, bool flush) { ITensorPack pack; |