From 376c85f3d826526b8b197c55e22c10765a97631e Mon Sep 17 00:00:00 2001 From: Anthony Barbier Date: Fri, 25 May 2018 14:17:21 +0100 Subject: COMPMID-1180: Add support for bucket multi-threading (Part2) - Introduced some Hints allowing the function to set its favourite splitting method for a given workload - Implemented the bucket split (Disabled by default) Change-Id: I3a48dfb0bd0ec8b69a44d9c4a4c77ad3f6dc9827 Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/133079 Tested-by: Jenkins Reviewed-by: Gian Marco Iodice --- arm_compute/runtime/CPP/CPPScheduler.h | 6 +-- arm_compute/runtime/IScheduler.h | 72 ++++++++++++++++++++++++-- arm_compute/runtime/OMP/OMPScheduler.h | 8 +-- arm_compute/runtime/SingleThreadScheduler.h | 6 +-- src/runtime/CPP/CPPScheduler.cpp | 29 ++++++++--- src/runtime/CPP/SingleThreadScheduler.cpp | 4 +- src/runtime/OMP/OMPScheduler.cpp | 6 +-- tests/framework/instruments/SchedulerTimer.cpp | 4 +- 8 files changed, 109 insertions(+), 26 deletions(-) diff --git a/arm_compute/runtime/CPP/CPPScheduler.h b/arm_compute/runtime/CPP/CPPScheduler.h index 6462ac6f2c..30bc4c8b70 100644 --- a/arm_compute/runtime/CPP/CPPScheduler.h +++ b/arm_compute/runtime/CPP/CPPScheduler.h @@ -56,10 +56,10 @@ public: * - ICPPKernel::is_parallelisable() returns false * - The scheduler has been initialized with only one thread. * - * @param[in] kernel Kernel to execute. - * @param[in] split_dimension Dimension along which to split the kernel's execution window. + * @param[in] kernel Kernel to execute. + * @param[in] hints Hints for the scheduler. */ - void schedule(ICPPKernel *kernel, unsigned int split_dimension) override; + void schedule(ICPPKernel *kernel, const Hints &hints) override; /** Will run the workloads in parallel using num_threads * diff --git a/arm_compute/runtime/IScheduler.h b/arm_compute/runtime/IScheduler.h index 76ff5a3de0..1f90f4ef9c 100644 --- a/arm_compute/runtime/IScheduler.h +++ b/arm_compute/runtime/IScheduler.h @@ -36,6 +36,72 @@ class ICPPKernel; class IScheduler { public: + /** Strategies available to split a workload */ + enum class StrategyHint + { + STATIC, /**< Split the workload evenly among the threads */ + DYNAMIC, /**< Split the workload dynamically using a bucket system */ + }; + /** Scheduler hints + * + * Collection of preferences set by the function regarding how to split a given workload + */ + class Hints + { + public: + /** Constructor + * + * @param[in] split_dimension Dimension along which to split the kernel's execution window. + * @param[in] strategy (Optional) Split strategy. + */ + Hints(unsigned int split_dimension, StrategyHint strategy = StrategyHint::STATIC) + : _split_dimension(split_dimension), _strategy(strategy) + { + } + /** Set the split_dimension hint + * + * @param[in] split_dimension Dimension along which to split the kernel's execution window. + * + * @return the Hints object + */ + Hints &set_split_dimension(unsigned int split_dimension) + { + _split_dimension = split_dimension; + return *this; + } + /** Return the prefered split dimension + * + * @return The split dimension + */ + unsigned int split_dimension() const + { + return _split_dimension; + } + + /** Set the strategy hint + * + * @param[in] strategy Prefered strategy to use to split the workload + * + * @return the Hints object + */ + Hints &set_strategy(StrategyHint strategy) + { + _strategy = strategy; + return *this; + } + /** Return the prefered strategy to use to split workload. + * + * @return The strategy + */ + StrategyHint strategy() const + { + return _strategy; + } + + private: + unsigned int _split_dimension; + StrategyHint _strategy; + }; /** Signature for the workloads to execute */ using Workload = std::function; /** Default constructor. */ @@ -58,10 +124,10 @@ public: /** Runs the kernel in the same thread as the caller synchronously. * - * @param[in] kernel Kernel to execute. - * @param[in] split_dimension Dimension along which to split the kernel's execution window. + * @param[in] kernel Kernel to execute. + * @param[in] hints Hints for the scheduler. */ - virtual void schedule(ICPPKernel *kernel, unsigned int split_dimension) = 0; + virtual void schedule(ICPPKernel *kernel, const Hints &hints) = 0; /** Execute all the passed workloads * diff --git a/arm_compute/runtime/OMP/OMPScheduler.h b/arm_compute/runtime/OMP/OMPScheduler.h index 21df6a699d..681a36aef5 100644 --- a/arm_compute/runtime/OMP/OMPScheduler.h +++ b/arm_compute/runtime/OMP/OMPScheduler.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017 ARM Limited. + * Copyright (c) 2017-2018 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -53,10 +53,10 @@ public: * - ICPPKernel::is_parallelisable() returns false * - The scheduler has been initialized with only one thread. * - * @param[in] kernel Kernel to execute. - * @param[in] split_dimension Dimension along which to split the kernel's execution window. + * @param[in] kernel Kernel to execute. + * @param[in] hints Hints for the scheduler. */ - void schedule(ICPPKernel *kernel, unsigned int split_dimension) override; + void schedule(ICPPKernel *kernel, const Hints &hints) override; private: /** Constructor. */ diff --git a/arm_compute/runtime/SingleThreadScheduler.h b/arm_compute/runtime/SingleThreadScheduler.h index 5672b622f2..6924601903 100644 --- a/arm_compute/runtime/SingleThreadScheduler.h +++ b/arm_compute/runtime/SingleThreadScheduler.h @@ -49,10 +49,10 @@ public: static SingleThreadScheduler &get(); /** Runs the kernel in the same thread as the caller synchronously. * - * @param[in] kernel Kernel to execute. - * @param[in] split_dimension Dimension along which to split the kernel's execution window. + * @param[in] kernel Kernel to execute. + * @param[in] hints Hints for the scheduler. */ - void schedule(ICPPKernel *kernel, unsigned int split_dimension) override; + void schedule(ICPPKernel *kernel, const Hints &hints) override; /** Will run the workloads sequentially and in order. * diff --git a/src/runtime/CPP/CPPScheduler.cpp b/src/runtime/CPP/CPPScheduler.cpp index ab91b1071c..0da9892cb2 100644 --- a/src/runtime/CPP/CPPScheduler.cpp +++ b/src/runtime/CPP/CPPScheduler.cpp @@ -270,12 +270,12 @@ void CPPScheduler::run_workloads(std::vector &workloads) } } -void CPPScheduler::schedule(ICPPKernel *kernel, unsigned int split_dimension) +void CPPScheduler::schedule(ICPPKernel *kernel, const Hints &hints) { ARM_COMPUTE_ERROR_ON_MSG(!kernel, "The child class didn't set the kernel"); const Window &max_window = kernel->window(); - const unsigned int num_iterations = max_window.num_iterations(split_dimension); + const unsigned int num_iterations = max_window.num_iterations(hints.split_dimension()); const unsigned int num_threads = std::min(num_iterations, _num_threads); if(num_iterations == 0) @@ -291,12 +291,29 @@ void CPPScheduler::schedule(ICPPKernel *kernel, unsigned int split_dimension) } else { - std::vector workloads(num_threads); - for(unsigned int t = 0; t < num_threads; t++) + unsigned int num_windows = 0; + switch(hints.strategy()) { - workloads[t] = [&](const ThreadInfo & info) + case StrategyHint::STATIC: + num_windows = num_threads; + break; + case StrategyHint::DYNAMIC: { - Window win = max_window.split_window(split_dimension, info.thread_id, info.num_threads); + // Make sure we don't use some windows which are too small as this might create some contention on the ThreadFeeder + const unsigned int max_iterations = static_cast(_num_threads) * 3; + num_windows = num_iterations > max_iterations ? max_iterations : num_iterations; + break; + } + default: + ARM_COMPUTE_ERROR("Unknown strategy"); + } + std::vector workloads(num_windows); + for(unsigned int t = 0; t < num_windows; t++) + { + //Capture 't' by copy, all the other variables by reference: + workloads[t] = [t, &hints, &max_window, &num_windows, &kernel](const ThreadInfo & info) + { + Window win = max_window.split_window(hints.split_dimension(), t, num_windows); win.validate(); kernel->run(win, info); }; diff --git a/src/runtime/CPP/SingleThreadScheduler.cpp b/src/runtime/CPP/SingleThreadScheduler.cpp index 6099e2cab5..37011595fd 100644 --- a/src/runtime/CPP/SingleThreadScheduler.cpp +++ b/src/runtime/CPP/SingleThreadScheduler.cpp @@ -41,9 +41,9 @@ void SingleThreadScheduler::set_num_threads(unsigned int num_threads) ARM_COMPUTE_ERROR_ON(num_threads != 1); } -void SingleThreadScheduler::schedule(ICPPKernel *kernel, unsigned int split_dimension) +void SingleThreadScheduler::schedule(ICPPKernel *kernel, const Hints &hints) { - ARM_COMPUTE_UNUSED(split_dimension); + ARM_COMPUTE_UNUSED(hints); ThreadInfo info; info.cpu_info = &_cpu_info; kernel->run(kernel->window(), info); diff --git a/src/runtime/OMP/OMPScheduler.cpp b/src/runtime/OMP/OMPScheduler.cpp index 795c96caf0..43a8a6de54 100644 --- a/src/runtime/OMP/OMPScheduler.cpp +++ b/src/runtime/OMP/OMPScheduler.cpp @@ -56,7 +56,7 @@ void OMPScheduler::set_num_threads(unsigned int num_threads) _num_threads = (num_threads == 0) ? num_cores : num_threads; } -void OMPScheduler::schedule(ICPPKernel *kernel, unsigned int split_dimension) +void OMPScheduler::schedule(ICPPKernel *kernel, const Hints &hints) { ARM_COMPUTE_ERROR_ON_MSG(!kernel, "The child class didn't set the kernel"); @@ -64,7 +64,7 @@ void OMPScheduler::schedule(ICPPKernel *kernel, unsigned int split_dimension) info.cpu_info = &_cpu_info; const Window &max_window = kernel->window(); - const unsigned int num_iterations = max_window.num_iterations(split_dimension); + const unsigned int num_iterations = max_window.num_iterations(hints.split_dimension()); info.num_threads = std::min(num_iterations, _num_threads); if(!kernel->is_parallelisable() || info.num_threads == 1) @@ -76,7 +76,7 @@ void OMPScheduler::schedule(ICPPKernel *kernel, unsigned int split_dimension) #pragma omp parallel firstprivate(info) num_threads(info.num_threads) { const int tid = omp_get_thread_num(); - Window win = max_window.split_window(split_dimension, tid, info.num_threads); + Window win = max_window.split_window(hints.split_dimension(), tid, info.num_threads); info.thread_id = tid; kernel->run(win, info); } diff --git a/tests/framework/instruments/SchedulerTimer.cpp b/tests/framework/instruments/SchedulerTimer.cpp index 49d94d76eb..50d77dd5b9 100644 --- a/tests/framework/instruments/SchedulerTimer.cpp +++ b/tests/framework/instruments/SchedulerTimer.cpp @@ -63,10 +63,10 @@ public: _prefix = std::move(prefix); } - void schedule(ICPPKernel *kernel, unsigned int split_dimension) override + void schedule(ICPPKernel *kernel, const Hints &hints) override { _timer.start(); - _real_scheduler.schedule(kernel, split_dimension); + _real_scheduler.schedule(kernel, hints.split_dimension()); _timer.stop(); SchedulerTimer::kernel_info info; -- cgit v1.2.1