diff options
Diffstat (limited to 'src/runtime/OMP/OMPScheduler.cpp')
-rw-r--r-- | src/runtime/OMP/OMPScheduler.cpp | 43 |
1 files changed, 26 insertions, 17 deletions
diff --git a/src/runtime/OMP/OMPScheduler.cpp b/src/runtime/OMP/OMPScheduler.cpp index f67f06fc94..d4d6193fce 100644 --- a/src/runtime/OMP/OMPScheduler.cpp +++ b/src/runtime/OMP/OMPScheduler.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2019 ARM Limited. + * Copyright (c) 2017-2023 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -27,7 +27,6 @@ #include "arm_compute/core/Error.h" #include "arm_compute/core/Helpers.h" #include "arm_compute/core/Utils.h" -#include "arm_compute/runtime/CPUUtils.h" #include <omp.h> @@ -51,55 +50,65 @@ void OMPScheduler::set_num_threads(unsigned int num_threads) void OMPScheduler::schedule(ICPPKernel *kernel, const Hints &hints) { + ITensorPack tensors; + schedule_common(kernel, hints, kernel->window(), tensors); +} + +void OMPScheduler::schedule_op(ICPPKernel *kernel, const Hints &hints, const Window &window, ITensorPack &tensors) +{ ARM_COMPUTE_ERROR_ON_MSG(!kernel, "The child class didn't set the kernel"); ARM_COMPUTE_ERROR_ON_MSG(hints.strategy() == StrategyHint::DYNAMIC, "Dynamic scheduling is not supported in OMPScheduler"); - const Window &max_window = kernel->window(); + const Window &max_window = window; const unsigned int num_iterations = max_window.num_iterations(hints.split_dimension()); const unsigned int num_threads = std::min(num_iterations, _num_threads); - if(!kernel->is_parallelisable() || num_threads == 1) + if (!kernel->is_parallelisable() || num_threads == 1) { ThreadInfo info; - info.cpu_info = &_cpu_info; - kernel->run(max_window, info); + info.cpu_info = &cpu_info(); + kernel->run_op(tensors, max_window, info); } else { const unsigned int num_windows = num_threads; std::vector<IScheduler::Workload> workloads(num_windows); - for(unsigned int t = 0; t < num_windows; t++) + for (unsigned int t = 0; t < num_windows; t++) { //Capture 't' by copy, all the other variables by reference: - workloads[t] = [t, &hints, &max_window, &num_windows, &kernel](const ThreadInfo & info) + workloads[t] = [t, &hints, &max_window, &num_windows, &kernel, &tensors](const ThreadInfo &info) { Window win = max_window.split_window(hints.split_dimension(), t, num_windows); win.validate(); - kernel->run(win, info); + kernel->run_op(tensors, win, info); }; } run_workloads(workloads); } } - #ifndef DOXYGEN_SKIP_THIS void OMPScheduler::run_workloads(std::vector<arm_compute::IScheduler::Workload> &workloads) { - const unsigned int num_threads = std::min(_num_threads, static_cast<unsigned int>(workloads.size())); - if(num_threads < 1) + const unsigned int amount_of_work = static_cast<unsigned int>(workloads.size()); + const unsigned int num_threads_to_use = std::min(_num_threads, amount_of_work); + + if (num_threads_to_use < 1) { return; } ThreadInfo info; - info.cpu_info = &_cpu_info; - info.num_threads = num_threads; - #pragma omp parallel firstprivate(info) num_threads(num_threads) + info.cpu_info = &cpu_info(); + info.num_threads = num_threads_to_use; +#pragma omp parallel for firstprivate(info) num_threads(num_threads_to_use) default(shared) proc_bind(close) \ + schedule(static, 1) + for (unsigned int wid = 0; wid < amount_of_work; ++wid) { - const int tid = omp_get_thread_num(); + const int tid = omp_get_thread_num(); + info.thread_id = tid; - workloads[tid](info); + workloads[wid](info); } } #endif /* DOXYGEN_SKIP_THIS */ |