aboutsummaryrefslogtreecommitdiff
path: root/src/runtime/OMP
diff options
context:
space:
mode:
authorGeorgios Pinitas <georgios.pinitas@arm.com>2018-06-29 18:25:22 +0100
committerAnthony Barbier <anthony.barbier@arm.com>2018-11-02 16:54:10 +0000
commit584252d2343dc8fa8ac5ba4ab8d3397b69a62c19 (patch)
tree8214c9fcbd7ad6d06e30107cc6370e4032306905 /src/runtime/OMP
parent5f707736413aeac77818c42838296966f8dc6761 (diff)
downloadComputeLibrary-584252d2343dc8fa8ac5ba4ab8d3397b69a62c19.tar.gz
COMPMID-1355: OpenMP compilation is broken.
Change-Id: If821ff02a68551d2181b2b7fdc3028cb5343341f Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/138150 Tested-by: Jenkins <bsgcomp@arm.com> Reviewed-by: Anthony Barbier <anthony.barbier@arm.com>
Diffstat (limited to 'src/runtime/OMP')
-rw-r--r--src/runtime/OMP/OMPScheduler.cpp46
1 files changed, 36 insertions, 10 deletions
diff --git a/src/runtime/OMP/OMPScheduler.cpp b/src/runtime/OMP/OMPScheduler.cpp
index 43a8a6de54..f4253c8848 100644
--- a/src/runtime/OMP/OMPScheduler.cpp
+++ b/src/runtime/OMP/OMPScheduler.cpp
@@ -59,26 +59,52 @@ void OMPScheduler::set_num_threads(unsigned int num_threads)
void OMPScheduler::schedule(ICPPKernel *kernel, const Hints &hints)
{
ARM_COMPUTE_ERROR_ON_MSG(!kernel, "The child class didn't set the kernel");
-
- ThreadInfo info;
- info.cpu_info = &_cpu_info;
+ ARM_COMPUTE_ERROR_ON_MSG(hints.strategy() == StrategyHint::DYNAMIC,
+ "Dynamic scheduling is not supported in OMPScheduler");
const Window &max_window = kernel->window();
const unsigned int num_iterations = max_window.num_iterations(hints.split_dimension());
- info.num_threads = std::min(num_iterations, _num_threads);
+ const unsigned int num_threads = std::min(num_iterations, _num_threads);
- if(!kernel->is_parallelisable() || info.num_threads == 1)
+ if(!kernel->is_parallelisable() || num_threads == 1)
{
+ ThreadInfo info;
+ info.cpu_info = &_cpu_info;
kernel->run(max_window, info);
}
else
{
- #pragma omp parallel firstprivate(info) num_threads(info.num_threads)
+ const unsigned int num_windows = num_threads;
+ std::vector<IScheduler::Workload> workloads(num_windows);
+ for(unsigned int t = 0; t < num_windows; t++)
{
- const int tid = omp_get_thread_num();
- Window win = max_window.split_window(hints.split_dimension(), tid, info.num_threads);
- info.thread_id = tid;
- kernel->run(win, info);
+ //Capture 't' by copy, all the other variables by reference:
+ workloads[t] = [t, &hints, &max_window, &num_windows, &kernel](const ThreadInfo & info)
+ {
+ Window win = max_window.split_window(hints.split_dimension(), t, num_windows);
+ win.validate();
+ kernel->run(win, info);
+ };
}
+ run_workloads(workloads);
+ }
+}
+
+void OMPScheduler::run_workloads(std::vector<arm_compute::IScheduler::Workload> &workloads)
+{
+ const unsigned int num_threads = std::min(_num_threads, static_cast<unsigned int>(workloads.size()));
+ if(num_threads < 1)
+ {
+ return;
+ }
+
+ ThreadInfo info;
+ info.cpu_info = &_cpu_info;
+ info.num_threads = num_threads;
+ #pragma omp parallel firstprivate(info) num_threads(num_threads)
+ {
+ const int tid = omp_get_thread_num();
+ info.thread_id = tid;
+ workloads[tid](info);
}
}