From 8afe6c7631891601897d7dd5b9ac68375ac01b19 Mon Sep 17 00:00:00 2001 From: Giorgio Arena Date: Mon, 6 Sep 2021 14:32:19 +0100 Subject: OpenCL job chaining support Resolve COMPMID-4714 Change-Id: I53b74956da10a8d7ffa2c5681f3b7f74acaa3201 Signed-off-by: Giorgio Arena Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/6215 Reviewed-by: Gian Marco Iodice Comments-Addressed: Arm Jenkins Tested-by: Arm Jenkins --- arm_compute/runtime/CL/CLScheduler.h | 13 +++++++++++-- src/runtime/CL/CLScheduler.cpp | 19 +++++++++++++++++-- 2 files changed, 28 insertions(+), 4 deletions(-) diff --git a/arm_compute/runtime/CL/CLScheduler.h b/arm_compute/runtime/CL/CLScheduler.h index 56852aec6e..37d9e2ad05 100644 --- a/arm_compute/runtime/CL/CLScheduler.h +++ b/arm_compute/runtime/CL/CLScheduler.h @@ -76,14 +76,14 @@ public: /** Schedule the execution of the passed kernel if possible. * * @param[in] kernel Kernel to execute. - * @param[in] flush (Optional) Specifies if the command queue will be flushed after running the kernel. + * @param[in] flush (Optional) Specifies if the command queue will be flushed after running the kernel. This will be ignored if job chaining is enabled. */ void enqueue(ICLKernel &kernel, bool flush = true); /** Schedule the execution of the passed kernel if possible. * * @param[in] kernel Kernel to execute. * @param[in] tensors Vector containing the tensors to operate on. - * @param[in] flush (Optional) Specifies if the command queue will be flushed after running the kernel. + * @param[in] flush (Optional) Specifies if the command queue will be flushed after running the kernel. This will be ignored if job chaining is enabled. */ void enqueue_op(ICLKernel &kernel, ITensorPack &tensors, bool flush = true); @@ -163,6 +163,12 @@ public: */ void tune_kernel_static(ICLKernel &kernel); + /** Enable job chaining. The command queue will only be flushed when @p job_chaining_size kernels have been enqueued. + * + * @param[in] job_chaining_size Kernels to enqueue before flushing + */ + void enable_job_chaining(int job_chaining_size); + bool is_initialised() const; private: @@ -177,6 +183,9 @@ private: ICLTuner *_cl_tuner; CLGEMMHeuristicsHandle *_gemm_heuristics; CLBackendType _backend_type; + bool _job_chaining_enabled; + int _job_chaining_size; + int _job_chaining_count; }; } // namespace arm_compute #endif /* ARM_COMPUTE_CLSCHEDULER_H */ diff --git a/src/runtime/CL/CLScheduler.cpp b/src/runtime/CL/CLScheduler.cpp index cb5f04ce8b..9d340438b8 100644 --- a/src/runtime/CL/CLScheduler.cpp +++ b/src/runtime/CL/CLScheduler.cpp @@ -95,7 +95,8 @@ bool CLScheduler::is_initialised() const std::once_flag CLScheduler::_initialize_symbols; CLScheduler::CLScheduler() - : _context(), _queue(), _target(GPUTarget::MIDGARD), _is_initialised(false), _cl_tuner(nullptr), _gemm_heuristics(nullptr), _backend_type(CLBackendType::Native) + : _context(), _queue(), _target(GPUTarget::MIDGARD), _is_initialised(false), _cl_tuner(nullptr), _gemm_heuristics(nullptr), _backend_type(CLBackendType::Native), _job_chaining_enabled(false), + _job_chaining_size(), _job_chaining_count(0) { } @@ -170,7 +171,15 @@ void CLScheduler::enqueue_common(ICLKernel &kernel, ITensorPack &tensors, bool f // Run kernel inject_memory ? kernel.run_op(tensors, kernel.window(), _queue) : kernel.run(kernel.window(), _queue); - if(flush) + if(_job_chaining_enabled) + { + if(++_job_chaining_count >= _job_chaining_size) + { + _job_chaining_count = 0; + _queue.flush(); + } + } + else if(flush) { _queue.flush(); } @@ -186,4 +195,10 @@ void CLScheduler::enqueue_op(ICLKernel &kernel, ITensorPack &tensors, bool flush { enqueue_common(kernel, tensors, flush); } + +void CLScheduler::enable_job_chaining(int job_chaining_size) +{ + _job_chaining_enabled = true; + _job_chaining_size = job_chaining_size; +} } // namespace arm_compute -- cgit v1.2.1