From 376c85f3d826526b8b197c55e22c10765a97631e Mon Sep 17 00:00:00 2001 From: Anthony Barbier Date: Fri, 25 May 2018 14:17:21 +0100 Subject: COMPMID-1180: Add support for bucket multi-threading (Part2) - Introduced some Hints allowing the function to set its favourite splitting method for a given workload - Implemented the bucket split (Disabled by default) Change-Id: I3a48dfb0bd0ec8b69a44d9c4a4c77ad3f6dc9827 Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/133079 Tested-by: Jenkins Reviewed-by: Gian Marco Iodice --- src/runtime/CPP/CPPScheduler.cpp | 29 +++++++++++++++++++++++------ 1 file changed, 23 insertions(+), 6 deletions(-) (limited to 'src/runtime/CPP/CPPScheduler.cpp') diff --git a/src/runtime/CPP/CPPScheduler.cpp b/src/runtime/CPP/CPPScheduler.cpp index ab91b1071c..0da9892cb2 100644 --- a/src/runtime/CPP/CPPScheduler.cpp +++ b/src/runtime/CPP/CPPScheduler.cpp @@ -270,12 +270,12 @@ void CPPScheduler::run_workloads(std::vector &workloads) } } -void CPPScheduler::schedule(ICPPKernel *kernel, unsigned int split_dimension) +void CPPScheduler::schedule(ICPPKernel *kernel, const Hints &hints) { ARM_COMPUTE_ERROR_ON_MSG(!kernel, "The child class didn't set the kernel"); const Window &max_window = kernel->window(); - const unsigned int num_iterations = max_window.num_iterations(split_dimension); + const unsigned int num_iterations = max_window.num_iterations(hints.split_dimension()); const unsigned int num_threads = std::min(num_iterations, _num_threads); if(num_iterations == 0) @@ -291,12 +291,29 @@ void CPPScheduler::schedule(ICPPKernel *kernel, unsigned int split_dimension) } else { - std::vector workloads(num_threads); - for(unsigned int t = 0; t < num_threads; t++) + unsigned int num_windows = 0; + switch(hints.strategy()) { - workloads[t] = [&](const ThreadInfo & info) + case StrategyHint::STATIC: + num_windows = num_threads; + break; + case StrategyHint::DYNAMIC: { - Window win = max_window.split_window(split_dimension, info.thread_id, info.num_threads); + // Make sure we don't use some windows which are too small as this might create some contention on the ThreadFeeder + const unsigned int max_iterations = static_cast(_num_threads) * 3; + num_windows = num_iterations > max_iterations ? max_iterations : num_iterations; + break; + } + default: + ARM_COMPUTE_ERROR("Unknown strategy"); + } + std::vector workloads(num_windows); + for(unsigned int t = 0; t < num_windows; t++) + { + //Capture 't' by copy, all the other variables by reference: + workloads[t] = [t, &hints, &max_window, &num_windows, &kernel](const ThreadInfo & info) + { + Window win = max_window.split_window(hints.split_dimension(), t, num_windows); win.validate(); kernel->run(win, info); }; -- cgit v1.2.1