diff options
author | Giorgio Arena <giorgio.arena@arm.com> | 2021-09-06 14:32:19 +0100 |
---|---|---|
committer | Giorgio Arena <giorgio.arena@arm.com> | 2021-09-06 16:22:26 +0000 |
commit | 8afe6c7631891601897d7dd5b9ac68375ac01b19 (patch) | |
tree | 2d13f9118a59cef22be841892550dce8c8148650 | |
parent | 2c74f138878c5cffc5a9fceb411a9d1592241080 (diff) | |
download | ComputeLibrary-8afe6c7631891601897d7dd5b9ac68375ac01b19.tar.gz |
OpenCL job chaining support
Resolve COMPMID-4714
Change-Id: I53b74956da10a8d7ffa2c5681f3b7f74acaa3201
Signed-off-by: Giorgio Arena <giorgio.arena@arm.com>
Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/6215
Reviewed-by: Gian Marco Iodice <gianmarco.iodice@arm.com>
Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
Tested-by: Arm Jenkins <bsgcomp@arm.com>
-rw-r--r-- | arm_compute/runtime/CL/CLScheduler.h | 13 | ||||
-rw-r--r-- | src/runtime/CL/CLScheduler.cpp | 19 |
2 files changed, 28 insertions, 4 deletions
diff --git a/arm_compute/runtime/CL/CLScheduler.h b/arm_compute/runtime/CL/CLScheduler.h index 56852aec6e..37d9e2ad05 100644 --- a/arm_compute/runtime/CL/CLScheduler.h +++ b/arm_compute/runtime/CL/CLScheduler.h @@ -76,14 +76,14 @@ public: /** Schedule the execution of the passed kernel if possible. * * @param[in] kernel Kernel to execute. - * @param[in] flush (Optional) Specifies if the command queue will be flushed after running the kernel. + * @param[in] flush (Optional) Specifies if the command queue will be flushed after running the kernel. This will be ignored if job chaining is enabled. */ void enqueue(ICLKernel &kernel, bool flush = true); /** Schedule the execution of the passed kernel if possible. * * @param[in] kernel Kernel to execute. * @param[in] tensors Vector containing the tensors to operate on. - * @param[in] flush (Optional) Specifies if the command queue will be flushed after running the kernel. + * @param[in] flush (Optional) Specifies if the command queue will be flushed after running the kernel. This will be ignored if job chaining is enabled. */ void enqueue_op(ICLKernel &kernel, ITensorPack &tensors, bool flush = true); @@ -163,6 +163,12 @@ public: */ void tune_kernel_static(ICLKernel &kernel); + /** Enable job chaining. The command queue will only be flushed when @p job_chaining_size kernels have been enqueued. + * + * @param[in] job_chaining_size Kernels to enqueue before flushing + */ + void enable_job_chaining(int job_chaining_size); + bool is_initialised() const; private: @@ -177,6 +183,9 @@ private: ICLTuner *_cl_tuner; CLGEMMHeuristicsHandle *_gemm_heuristics; CLBackendType _backend_type; + bool _job_chaining_enabled; + int _job_chaining_size; + int _job_chaining_count; }; } // namespace arm_compute #endif /* ARM_COMPUTE_CLSCHEDULER_H */ diff --git a/src/runtime/CL/CLScheduler.cpp b/src/runtime/CL/CLScheduler.cpp index cb5f04ce8b..9d340438b8 100644 --- a/src/runtime/CL/CLScheduler.cpp +++ b/src/runtime/CL/CLScheduler.cpp @@ -95,7 +95,8 @@ bool CLScheduler::is_initialised() const std::once_flag CLScheduler::_initialize_symbols; CLScheduler::CLScheduler() - : _context(), _queue(), _target(GPUTarget::MIDGARD), _is_initialised(false), _cl_tuner(nullptr), _gemm_heuristics(nullptr), _backend_type(CLBackendType::Native) + : _context(), _queue(), _target(GPUTarget::MIDGARD), _is_initialised(false), _cl_tuner(nullptr), _gemm_heuristics(nullptr), _backend_type(CLBackendType::Native), _job_chaining_enabled(false), + _job_chaining_size(), _job_chaining_count(0) { } @@ -170,7 +171,15 @@ void CLScheduler::enqueue_common(ICLKernel &kernel, ITensorPack &tensors, bool f // Run kernel inject_memory ? kernel.run_op(tensors, kernel.window(), _queue) : kernel.run(kernel.window(), _queue); - if(flush) + if(_job_chaining_enabled) + { + if(++_job_chaining_count >= _job_chaining_size) + { + _job_chaining_count = 0; + _queue.flush(); + } + } + else if(flush) { _queue.flush(); } @@ -186,4 +195,10 @@ void CLScheduler::enqueue_op(ICLKernel &kernel, ITensorPack &tensors, bool flush { enqueue_common(kernel, tensors, flush); } + +void CLScheduler::enable_job_chaining(int job_chaining_size) +{ + _job_chaining_enabled = true; + _job_chaining_size = job_chaining_size; +} } // namespace arm_compute |