aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAnthony Barbier <anthony.barbier@arm.com>2018-05-25 14:17:21 +0100
committerAnthony Barbier <anthony.barbier@arm.com>2018-11-02 16:53:09 +0000
commit376c85f3d826526b8b197c55e22c10765a97631e (patch)
tree049c80a392a404b2b3b29e8a245b580ea34ad9d6
parent8e74f4488daf1b628ca718396d5fc72fea95a83d (diff)
downloadComputeLibrary-376c85f3d826526b8b197c55e22c10765a97631e.tar.gz
COMPMID-1180: Add support for bucket multi-threading (Part2)
- Introduced some Hints allowing the function to set its favourite splitting method for a given workload - Implemented the bucket split (Disabled by default) Change-Id: I3a48dfb0bd0ec8b69a44d9c4a4c77ad3f6dc9827 Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/133079 Tested-by: Jenkins <bsgcomp@arm.com> Reviewed-by: Gian Marco Iodice <gianmarco.iodice@arm.com>
-rw-r--r--arm_compute/runtime/CPP/CPPScheduler.h6
-rw-r--r--arm_compute/runtime/IScheduler.h72
-rw-r--r--arm_compute/runtime/OMP/OMPScheduler.h8
-rw-r--r--arm_compute/runtime/SingleThreadScheduler.h6
-rw-r--r--src/runtime/CPP/CPPScheduler.cpp29
-rw-r--r--src/runtime/CPP/SingleThreadScheduler.cpp4
-rw-r--r--src/runtime/OMP/OMPScheduler.cpp6
-rw-r--r--tests/framework/instruments/SchedulerTimer.cpp4
8 files changed, 109 insertions, 26 deletions
diff --git a/arm_compute/runtime/CPP/CPPScheduler.h b/arm_compute/runtime/CPP/CPPScheduler.h
index 6462ac6f2c..30bc4c8b70 100644
--- a/arm_compute/runtime/CPP/CPPScheduler.h
+++ b/arm_compute/runtime/CPP/CPPScheduler.h
@@ -56,10 +56,10 @@ public:
* - ICPPKernel::is_parallelisable() returns false
* - The scheduler has been initialized with only one thread.
*
- * @param[in] kernel Kernel to execute.
- * @param[in] split_dimension Dimension along which to split the kernel's execution window.
+ * @param[in] kernel Kernel to execute.
+ * @param[in] hints Hints for the scheduler.
*/
- void schedule(ICPPKernel *kernel, unsigned int split_dimension) override;
+ void schedule(ICPPKernel *kernel, const Hints &hints) override;
/** Will run the workloads in parallel using num_threads
*
diff --git a/arm_compute/runtime/IScheduler.h b/arm_compute/runtime/IScheduler.h
index 76ff5a3de0..1f90f4ef9c 100644
--- a/arm_compute/runtime/IScheduler.h
+++ b/arm_compute/runtime/IScheduler.h
@@ -36,6 +36,72 @@ class ICPPKernel;
class IScheduler
{
public:
+ /** Strategies available to split a workload */
+ enum class StrategyHint
+ {
+ STATIC, /**< Split the workload evenly among the threads */
+ DYNAMIC, /**< Split the workload dynamically using a bucket system */
+ };
+ /** Scheduler hints
+ *
+ * Collection of preferences set by the function regarding how to split a given workload
+ */
+ class Hints
+ {
+ public:
+ /** Constructor
+ *
+ * @param[in] split_dimension Dimension along which to split the kernel's execution window.
+ * @param[in] strategy (Optional) Split strategy.
+ */
+ Hints(unsigned int split_dimension, StrategyHint strategy = StrategyHint::STATIC)
+ : _split_dimension(split_dimension), _strategy(strategy)
+ {
+ }
+ /** Set the split_dimension hint
+ *
+ * @param[in] split_dimension Dimension along which to split the kernel's execution window.
+ *
+ * @return the Hints object
+ */
+ Hints &set_split_dimension(unsigned int split_dimension)
+ {
+ _split_dimension = split_dimension;
+ return *this;
+ }
+ /** Return the prefered split dimension
+ *
+ * @return The split dimension
+ */
+ unsigned int split_dimension() const
+ {
+ return _split_dimension;
+ }
+
+ /** Set the strategy hint
+ *
+ * @param[in] strategy Prefered strategy to use to split the workload
+ *
+ * @return the Hints object
+ */
+ Hints &set_strategy(StrategyHint strategy)
+ {
+ _strategy = strategy;
+ return *this;
+ }
+ /** Return the prefered strategy to use to split workload.
+ *
+ * @return The strategy
+ */
+ StrategyHint strategy() const
+ {
+ return _strategy;
+ }
+
+ private:
+ unsigned int _split_dimension;
+ StrategyHint _strategy;
+ };
/** Signature for the workloads to execute */
using Workload = std::function<void(const ThreadInfo &)>;
/** Default constructor. */
@@ -58,10 +124,10 @@ public:
/** Runs the kernel in the same thread as the caller synchronously.
*
- * @param[in] kernel Kernel to execute.
- * @param[in] split_dimension Dimension along which to split the kernel's execution window.
+ * @param[in] kernel Kernel to execute.
+ * @param[in] hints Hints for the scheduler.
*/
- virtual void schedule(ICPPKernel *kernel, unsigned int split_dimension) = 0;
+ virtual void schedule(ICPPKernel *kernel, const Hints &hints) = 0;
/** Execute all the passed workloads
*
diff --git a/arm_compute/runtime/OMP/OMPScheduler.h b/arm_compute/runtime/OMP/OMPScheduler.h
index 21df6a699d..681a36aef5 100644
--- a/arm_compute/runtime/OMP/OMPScheduler.h
+++ b/arm_compute/runtime/OMP/OMPScheduler.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017 ARM Limited.
+ * Copyright (c) 2017-2018 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -53,10 +53,10 @@ public:
* - ICPPKernel::is_parallelisable() returns false
* - The scheduler has been initialized with only one thread.
*
- * @param[in] kernel Kernel to execute.
- * @param[in] split_dimension Dimension along which to split the kernel's execution window.
+ * @param[in] kernel Kernel to execute.
+ * @param[in] hints Hints for the scheduler.
*/
- void schedule(ICPPKernel *kernel, unsigned int split_dimension) override;
+ void schedule(ICPPKernel *kernel, const Hints &hints) override;
private:
/** Constructor. */
diff --git a/arm_compute/runtime/SingleThreadScheduler.h b/arm_compute/runtime/SingleThreadScheduler.h
index 5672b622f2..6924601903 100644
--- a/arm_compute/runtime/SingleThreadScheduler.h
+++ b/arm_compute/runtime/SingleThreadScheduler.h
@@ -49,10 +49,10 @@ public:
static SingleThreadScheduler &get();
/** Runs the kernel in the same thread as the caller synchronously.
*
- * @param[in] kernel Kernel to execute.
- * @param[in] split_dimension Dimension along which to split the kernel's execution window.
+ * @param[in] kernel Kernel to execute.
+ * @param[in] hints Hints for the scheduler.
*/
- void schedule(ICPPKernel *kernel, unsigned int split_dimension) override;
+ void schedule(ICPPKernel *kernel, const Hints &hints) override;
/** Will run the workloads sequentially and in order.
*
diff --git a/src/runtime/CPP/CPPScheduler.cpp b/src/runtime/CPP/CPPScheduler.cpp
index ab91b1071c..0da9892cb2 100644
--- a/src/runtime/CPP/CPPScheduler.cpp
+++ b/src/runtime/CPP/CPPScheduler.cpp
@@ -270,12 +270,12 @@ void CPPScheduler::run_workloads(std::vector<IScheduler::Workload> &workloads)
}
}
-void CPPScheduler::schedule(ICPPKernel *kernel, unsigned int split_dimension)
+void CPPScheduler::schedule(ICPPKernel *kernel, const Hints &hints)
{
ARM_COMPUTE_ERROR_ON_MSG(!kernel, "The child class didn't set the kernel");
const Window &max_window = kernel->window();
- const unsigned int num_iterations = max_window.num_iterations(split_dimension);
+ const unsigned int num_iterations = max_window.num_iterations(hints.split_dimension());
const unsigned int num_threads = std::min(num_iterations, _num_threads);
if(num_iterations == 0)
@@ -291,12 +291,29 @@ void CPPScheduler::schedule(ICPPKernel *kernel, unsigned int split_dimension)
}
else
{
- std::vector<IScheduler::Workload> workloads(num_threads);
- for(unsigned int t = 0; t < num_threads; t++)
+ unsigned int num_windows = 0;
+ switch(hints.strategy())
{
- workloads[t] = [&](const ThreadInfo & info)
+ case StrategyHint::STATIC:
+ num_windows = num_threads;
+ break;
+ case StrategyHint::DYNAMIC:
{
- Window win = max_window.split_window(split_dimension, info.thread_id, info.num_threads);
+ // Make sure we don't use some windows which are too small as this might create some contention on the ThreadFeeder
+ const unsigned int max_iterations = static_cast<unsigned int>(_num_threads) * 3;
+ num_windows = num_iterations > max_iterations ? max_iterations : num_iterations;
+ break;
+ }
+ default:
+ ARM_COMPUTE_ERROR("Unknown strategy");
+ }
+ std::vector<IScheduler::Workload> workloads(num_windows);
+ for(unsigned int t = 0; t < num_windows; t++)
+ {
+ //Capture 't' by copy, all the other variables by reference:
+ workloads[t] = [t, &hints, &max_window, &num_windows, &kernel](const ThreadInfo & info)
+ {
+ Window win = max_window.split_window(hints.split_dimension(), t, num_windows);
win.validate();
kernel->run(win, info);
};
diff --git a/src/runtime/CPP/SingleThreadScheduler.cpp b/src/runtime/CPP/SingleThreadScheduler.cpp
index 6099e2cab5..37011595fd 100644
--- a/src/runtime/CPP/SingleThreadScheduler.cpp
+++ b/src/runtime/CPP/SingleThreadScheduler.cpp
@@ -41,9 +41,9 @@ void SingleThreadScheduler::set_num_threads(unsigned int num_threads)
ARM_COMPUTE_ERROR_ON(num_threads != 1);
}
-void SingleThreadScheduler::schedule(ICPPKernel *kernel, unsigned int split_dimension)
+void SingleThreadScheduler::schedule(ICPPKernel *kernel, const Hints &hints)
{
- ARM_COMPUTE_UNUSED(split_dimension);
+ ARM_COMPUTE_UNUSED(hints);
ThreadInfo info;
info.cpu_info = &_cpu_info;
kernel->run(kernel->window(), info);
diff --git a/src/runtime/OMP/OMPScheduler.cpp b/src/runtime/OMP/OMPScheduler.cpp
index 795c96caf0..43a8a6de54 100644
--- a/src/runtime/OMP/OMPScheduler.cpp
+++ b/src/runtime/OMP/OMPScheduler.cpp
@@ -56,7 +56,7 @@ void OMPScheduler::set_num_threads(unsigned int num_threads)
_num_threads = (num_threads == 0) ? num_cores : num_threads;
}
-void OMPScheduler::schedule(ICPPKernel *kernel, unsigned int split_dimension)
+void OMPScheduler::schedule(ICPPKernel *kernel, const Hints &hints)
{
ARM_COMPUTE_ERROR_ON_MSG(!kernel, "The child class didn't set the kernel");
@@ -64,7 +64,7 @@ void OMPScheduler::schedule(ICPPKernel *kernel, unsigned int split_dimension)
info.cpu_info = &_cpu_info;
const Window &max_window = kernel->window();
- const unsigned int num_iterations = max_window.num_iterations(split_dimension);
+ const unsigned int num_iterations = max_window.num_iterations(hints.split_dimension());
info.num_threads = std::min(num_iterations, _num_threads);
if(!kernel->is_parallelisable() || info.num_threads == 1)
@@ -76,7 +76,7 @@ void OMPScheduler::schedule(ICPPKernel *kernel, unsigned int split_dimension)
#pragma omp parallel firstprivate(info) num_threads(info.num_threads)
{
const int tid = omp_get_thread_num();
- Window win = max_window.split_window(split_dimension, tid, info.num_threads);
+ Window win = max_window.split_window(hints.split_dimension(), tid, info.num_threads);
info.thread_id = tid;
kernel->run(win, info);
}
diff --git a/tests/framework/instruments/SchedulerTimer.cpp b/tests/framework/instruments/SchedulerTimer.cpp
index 49d94d76eb..50d77dd5b9 100644
--- a/tests/framework/instruments/SchedulerTimer.cpp
+++ b/tests/framework/instruments/SchedulerTimer.cpp
@@ -63,10 +63,10 @@ public:
_prefix = std::move(prefix);
}
- void schedule(ICPPKernel *kernel, unsigned int split_dimension) override
+ void schedule(ICPPKernel *kernel, const Hints &hints) override
{
_timer.start();
- _real_scheduler.schedule(kernel, split_dimension);
+ _real_scheduler.schedule(kernel, hints.split_dimension());
_timer.stop();
SchedulerTimer::kernel_info info;