aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGiorgio Arena <giorgio.arena@arm.com>2021-09-06 14:32:19 +0100
committerGiorgio Arena <giorgio.arena@arm.com>2021-09-06 16:22:26 +0000
commit8afe6c7631891601897d7dd5b9ac68375ac01b19 (patch)
tree2d13f9118a59cef22be841892550dce8c8148650
parent2c74f138878c5cffc5a9fceb411a9d1592241080 (diff)
downloadComputeLibrary-8afe6c7631891601897d7dd5b9ac68375ac01b19.tar.gz
OpenCL job chaining support
Resolve COMPMID-4714 Change-Id: I53b74956da10a8d7ffa2c5681f3b7f74acaa3201 Signed-off-by: Giorgio Arena <giorgio.arena@arm.com> Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/6215 Reviewed-by: Gian Marco Iodice <gianmarco.iodice@arm.com> Comments-Addressed: Arm Jenkins <bsgcomp@arm.com> Tested-by: Arm Jenkins <bsgcomp@arm.com>
-rw-r--r--arm_compute/runtime/CL/CLScheduler.h13
-rw-r--r--src/runtime/CL/CLScheduler.cpp19
2 files changed, 28 insertions, 4 deletions
diff --git a/arm_compute/runtime/CL/CLScheduler.h b/arm_compute/runtime/CL/CLScheduler.h
index 56852aec6e..37d9e2ad05 100644
--- a/arm_compute/runtime/CL/CLScheduler.h
+++ b/arm_compute/runtime/CL/CLScheduler.h
@@ -76,14 +76,14 @@ public:
/** Schedule the execution of the passed kernel if possible.
*
* @param[in] kernel Kernel to execute.
- * @param[in] flush (Optional) Specifies if the command queue will be flushed after running the kernel.
+ * @param[in] flush (Optional) Specifies if the command queue will be flushed after running the kernel. This will be ignored if job chaining is enabled.
*/
void enqueue(ICLKernel &kernel, bool flush = true);
/** Schedule the execution of the passed kernel if possible.
*
* @param[in] kernel Kernel to execute.
* @param[in] tensors Vector containing the tensors to operate on.
- * @param[in] flush (Optional) Specifies if the command queue will be flushed after running the kernel.
+ * @param[in] flush (Optional) Specifies if the command queue will be flushed after running the kernel. This will be ignored if job chaining is enabled.
*/
void enqueue_op(ICLKernel &kernel, ITensorPack &tensors, bool flush = true);
@@ -163,6 +163,12 @@ public:
*/
void tune_kernel_static(ICLKernel &kernel);
+ /** Enable job chaining. The command queue will only be flushed when @p job_chaining_size kernels have been enqueued.
+ *
+ * @param[in] job_chaining_size Kernels to enqueue before flushing
+ */
+ void enable_job_chaining(int job_chaining_size);
+
bool is_initialised() const;
private:
@@ -177,6 +183,9 @@ private:
ICLTuner *_cl_tuner;
CLGEMMHeuristicsHandle *_gemm_heuristics;
CLBackendType _backend_type;
+ bool _job_chaining_enabled;
+ int _job_chaining_size;
+ int _job_chaining_count;
};
} // namespace arm_compute
#endif /* ARM_COMPUTE_CLSCHEDULER_H */
diff --git a/src/runtime/CL/CLScheduler.cpp b/src/runtime/CL/CLScheduler.cpp
index cb5f04ce8b..9d340438b8 100644
--- a/src/runtime/CL/CLScheduler.cpp
+++ b/src/runtime/CL/CLScheduler.cpp
@@ -95,7 +95,8 @@ bool CLScheduler::is_initialised() const
std::once_flag CLScheduler::_initialize_symbols;
CLScheduler::CLScheduler()
- : _context(), _queue(), _target(GPUTarget::MIDGARD), _is_initialised(false), _cl_tuner(nullptr), _gemm_heuristics(nullptr), _backend_type(CLBackendType::Native)
+ : _context(), _queue(), _target(GPUTarget::MIDGARD), _is_initialised(false), _cl_tuner(nullptr), _gemm_heuristics(nullptr), _backend_type(CLBackendType::Native), _job_chaining_enabled(false),
+ _job_chaining_size(), _job_chaining_count(0)
{
}
@@ -170,7 +171,15 @@ void CLScheduler::enqueue_common(ICLKernel &kernel, ITensorPack &tensors, bool f
// Run kernel
inject_memory ? kernel.run_op(tensors, kernel.window(), _queue) : kernel.run(kernel.window(), _queue);
- if(flush)
+ if(_job_chaining_enabled)
+ {
+ if(++_job_chaining_count >= _job_chaining_size)
+ {
+ _job_chaining_count = 0;
+ _queue.flush();
+ }
+ }
+ else if(flush)
{
_queue.flush();
}
@@ -186,4 +195,10 @@ void CLScheduler::enqueue_op(ICLKernel &kernel, ITensorPack &tensors, bool flush
{
enqueue_common(kernel, tensors, flush);
}
+
+void CLScheduler::enable_job_chaining(int job_chaining_size)
+{
+ _job_chaining_enabled = true;
+ _job_chaining_size = job_chaining_size;
+}
} // namespace arm_compute