diff options
author | Milos Puzovic <milos.puzovic@arm.com> | 2022-07-27 18:35:28 +0000 |
---|---|---|
committer | Pablo Marquez Tello <pablo.tello@arm.com> | 2022-08-01 16:08:33 +0000 |
commit | 385dad2bffecbf395aa9aad257809de81c727ac7 (patch) | |
tree | d164db0aac156cc9b9f5762d718ec9564f6b5787 /src | |
parent | e748346ab548e64a363a2df7e27088331f453115 (diff) | |
download | ComputeLibrary-385dad2bffecbf395aa9aad257809de81c727ac7.tar.gz |
Fix for OpenMP scheduler work breakdown
If number of work items is greater than number of available threads then
OpenMP scheduler will only execute as many work items as there are
threads. This fix makes sure that we iterate through all work items and
execute all of them.
Change-Id: I3ad4b732c01fadc70dacaf09af3007d2b31086c7
Signed-off-by: Milos Puzovic <milos.puzovic@arm.com>
Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/8001
Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
Benchmark: Arm Jenkins <bsgcomp@arm.com>
Tested-by: Arm Jenkins <bsgcomp@arm.com>
Reviewed-by: Pablo Marquez Tello <pablo.tello@arm.com>
Diffstat (limited to 'src')
-rw-r--r-- | src/runtime/OMP/OMPScheduler.cpp | 13 |
1 files changed, 7 insertions, 6 deletions
diff --git a/src/runtime/OMP/OMPScheduler.cpp b/src/runtime/OMP/OMPScheduler.cpp index e9b0bf4426..aad24b4f01 100644 --- a/src/runtime/OMP/OMPScheduler.cpp +++ b/src/runtime/OMP/OMPScheduler.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2021 Arm Limited. + * Copyright (c) 2017-2022 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -89,20 +89,21 @@ void OMPScheduler::schedule_op(ICPPKernel *kernel, const Hints &hints, const Win #ifndef DOXYGEN_SKIP_THIS void OMPScheduler::run_workloads(std::vector<arm_compute::IScheduler::Workload> &workloads) { - const unsigned int num_threads = std::min(_num_threads, static_cast<unsigned int>(workloads.size())); - if(num_threads < 1) + const unsigned int amount_of_work = static_cast<unsigned int>(workloads.size()); + if(amount_of_work < 1 || _num_threads == 1) { return; } ThreadInfo info; info.cpu_info = &cpu_info(); - info.num_threads = num_threads; - #pragma omp parallel firstprivate(info) num_threads(num_threads) + info.num_threads = _num_threads; + #pragma omp parallel for firstprivate(info) num_threads(_num_threads) default(shared) proc_bind(close) schedule(static, 1) + for(unsigned int wid = 0; wid < amount_of_work; ++wid) { const int tid = omp_get_thread_num(); info.thread_id = tid; - workloads[tid](info); + workloads[wid](info); } } #endif /* DOXYGEN_SKIP_THIS */ |