From c186b574e52b81c75e551cee46a6c4cc7d500c90 Mon Sep 17 00:00:00 2001 From: Moritz Pflanzer Date: Thu, 7 Sep 2017 09:48:04 +0100 Subject: COMPMID-481: Add thread info parameter Change-Id: Iebb50a88d017445b6b37a86563ebd4abd86c5cf5 Reviewed-on: http://mpd-gerrit.cambridge.arm.com/86788 Tested-by: Kaizen Reviewed-by: Anthony Barbier --- src/runtime/CL/functions/CLHOGMultiDetection.cpp | 3 +- src/runtime/CL/functions/CLHarrisCorners.cpp | 2 +- src/runtime/CPP/CPPScheduler.cpp | 49 ++++++++++------------ src/runtime/CPP/SingleThreadScheduler.cpp | 5 ++- src/runtime/NEON/INESimpleFunction.cpp | 2 +- src/runtime/NEON/functions/NECannyEdge.cpp | 6 +-- src/runtime/NEON/functions/NEConvolution.cpp | 2 +- src/runtime/NEON/functions/NEDerivative.cpp | 2 +- .../NEON/functions/NEDirectConvolutionLayer.cpp | 2 +- src/runtime/NEON/functions/NEEqualizeHistogram.cpp | 2 +- src/runtime/NEON/functions/NEFastCorners.cpp | 2 +- src/runtime/NEON/functions/NEGaussian5x5.cpp | 2 +- src/runtime/NEON/functions/NEGaussianPyramid.cpp | 2 +- src/runtime/NEON/functions/NEHOGMultiDetection.cpp | 2 +- src/runtime/NEON/functions/NEHarrisCorners.cpp | 6 +-- src/runtime/NEON/functions/NEMeanStdDev.cpp | 2 +- src/runtime/NEON/functions/NESobel5x5.cpp | 2 +- src/runtime/NEON/functions/NESobel7x7.cpp | 2 +- 18 files changed, 46 insertions(+), 49 deletions(-) (limited to 'src/runtime') diff --git a/src/runtime/CL/functions/CLHOGMultiDetection.cpp b/src/runtime/CL/functions/CLHOGMultiDetection.cpp index 6def2dedc9..9eed355710 100644 --- a/src/runtime/CL/functions/CLHOGMultiDetection.cpp +++ b/src/runtime/CL/functions/CLHOGMultiDetection.cpp @@ -29,6 +29,7 @@ #include "arm_compute/runtime/CL/CLArray.h" #include "arm_compute/runtime/CL/CLScheduler.h" #include "arm_compute/runtime/CL/CLTensor.h" +#include "arm_compute/runtime/Scheduler.h" #include "support/ToolchainSupport.h" using namespace arm_compute; @@ -246,7 +247,7 @@ void CLHOGMultiDetection::run() { // Map detection windows array before computing non maxima suppression _detection_windows->map(CLScheduler::get().queue(), true); - _non_maxima_kernel->run(_non_maxima_kernel->window()); + Scheduler::get().schedule(_non_maxima_kernel.get(), Window::DimY); _detection_windows->unmap(CLScheduler::get().queue()); } } diff --git a/src/runtime/CL/functions/CLHarrisCorners.cpp b/src/runtime/CL/functions/CLHarrisCorners.cpp index 8f9fcdc58f..2140240753 100644 --- a/src/runtime/CL/functions/CLHarrisCorners.cpp +++ b/src/runtime/CL/functions/CLHarrisCorners.cpp @@ -165,6 +165,6 @@ void CLHarrisCorners::run() _nonmax.unmap(); _corners->map(CLScheduler::get().queue(), true); - _sort_euclidean.run(_sort_euclidean.window()); + Scheduler::get().schedule(&_sort_euclidean, Window::DimY); _corners->unmap(CLScheduler::get().queue()); } diff --git a/src/runtime/CPP/CPPScheduler.cpp b/src/runtime/CPP/CPPScheduler.cpp index 2a321a1101..9cc3f033c2 100644 --- a/src/runtime/CPP/CPPScheduler.cpp +++ b/src/runtime/CPP/CPPScheduler.cpp @@ -52,7 +52,7 @@ public: * This function will return as soon as the kernel has been sent to the worker thread. * wait() needs to be called to ensure the execution is complete. */ - void start(ICPPKernel *kernel, const Window &window); + void start(ICPPKernel *kernel, const Window &window, const ThreadInfo &info); /** Wait for the current kernel execution to complete */ void wait(); @@ -64,13 +64,14 @@ private: std::thread _thread; ICPPKernel *_kernel{ nullptr }; Window _window; + ThreadInfo _info; sem_t _wait_for_work; sem_t _job_complete; std::exception_ptr _current_exception; }; Thread::Thread() - : _thread(), _window(), _wait_for_work(), _job_complete(), _current_exception(nullptr) + : _thread(), _window(), _info(), _wait_for_work(), _job_complete(), _current_exception(nullptr) { int ret = sem_init(&_wait_for_work, 0, 0); ARM_COMPUTE_ERROR_ON(ret < 0); @@ -87,7 +88,7 @@ Thread::~Thread() { ARM_COMPUTE_ERROR_ON(!_thread.joinable()); - start(nullptr, Window()); + start(nullptr, Window(), ThreadInfo()); _thread.join(); int ret = sem_destroy(&_wait_for_work); @@ -99,10 +100,11 @@ Thread::~Thread() ARM_COMPUTE_UNUSED(ret); } -void Thread::start(ICPPKernel *kernel, const Window &window) +void Thread::start(ICPPKernel *kernel, const Window &window, const ThreadInfo &info) { _kernel = kernel; _window = window; + _info = info; int ret = sem_post(&_wait_for_work); ARM_COMPUTE_UNUSED(ret); ARM_COMPUTE_ERROR_ON(ret < 0); @@ -133,7 +135,7 @@ void Thread::worker_thread() try { _window.validate(); - _kernel->run(_window); + _kernel->run(_window, _info); } catch(...) { @@ -163,8 +165,7 @@ CPPScheduler &CPPScheduler::get() CPPScheduler::CPPScheduler() : _num_threads(std::thread::hardware_concurrency()), - _threads(std::unique_ptr(new Thread[std::thread::hardware_concurrency() - 1], delete_threads)), - _target(CPUTarget::INTRINSICS) + _threads(std::unique_ptr(new Thread[std::thread::hardware_concurrency() - 1], delete_threads)) { } @@ -179,50 +180,42 @@ unsigned int CPPScheduler::num_threads() const return _num_threads; } -void CPPScheduler::set_target(CPUTarget target) -{ - _target = target; -} - -CPUTarget CPPScheduler::target() const -{ - return _target; -} - void CPPScheduler::schedule(ICPPKernel *kernel, unsigned int split_dimension) { ARM_COMPUTE_ERROR_ON_MSG(!kernel, "The child class didn't set the kernel"); /** [Scheduler example] */ + ThreadInfo info; + info.cpu = _target; + const Window &max_window = kernel->window(); const unsigned int num_iterations = max_window.num_iterations(split_dimension); - const unsigned int num_threads = std::min(num_iterations, _num_threads); + info.num_threads = std::min(num_iterations, _num_threads); - if(!kernel->is_parallelisable() || 1 == num_threads) + if(!kernel->is_parallelisable() || info.num_threads == 1) { - kernel->run(max_window); + kernel->run(max_window, info); } else { - for(unsigned int t = 0; t < num_threads; ++t) + for(int t = 0; t < info.num_threads; ++t) { - Window win = max_window.split_window(split_dimension, t, num_threads); - win.set_thread_id(t); - win.set_num_threads(num_threads); + Window win = max_window.split_window(split_dimension, t, info.num_threads); + info.thread_id = t; - if(t != num_threads - 1) + if(t != info.num_threads - 1) { - _threads[t].start(kernel, win); + _threads[t].start(kernel, win, info); } else { - kernel->run(win); + kernel->run(win, info); } } try { - for(unsigned int t = 1; t < num_threads; ++t) + for(int t = 1; t < info.num_threads; ++t) { _threads[t - 1].wait(); } diff --git a/src/runtime/CPP/SingleThreadScheduler.cpp b/src/runtime/CPP/SingleThreadScheduler.cpp index f086813e91..4e46a59fd0 100644 --- a/src/runtime/CPP/SingleThreadScheduler.cpp +++ b/src/runtime/CPP/SingleThreadScheduler.cpp @@ -38,12 +38,15 @@ SingleThreadScheduler &SingleThreadScheduler::get() void SingleThreadScheduler::set_num_threads(unsigned int num_threads) { ARM_COMPUTE_UNUSED(num_threads); + ARM_COMPUTE_ERROR_ON(num_threads != 1); } void SingleThreadScheduler::schedule(ICPPKernel *kernel, unsigned int split_dimension) { ARM_COMPUTE_UNUSED(split_dimension); - kernel->run(kernel->window()); + ThreadInfo info; + info.cpu = _target; + kernel->run(kernel->window(), info); } unsigned int SingleThreadScheduler::num_threads() const diff --git a/src/runtime/NEON/INESimpleFunction.cpp b/src/runtime/NEON/INESimpleFunction.cpp index a748a1e4ad..23d9872294 100644 --- a/src/runtime/NEON/INESimpleFunction.cpp +++ b/src/runtime/NEON/INESimpleFunction.cpp @@ -35,6 +35,6 @@ INESimpleFunction::INESimpleFunction() // NOLINT void INESimpleFunction::run() { - _border_handler.run(_border_handler.window()); + NEScheduler::get().schedule(&_border_handler, Window::DimZ); NEScheduler::get().schedule(_kernel.get(), Window::DimY); } diff --git a/src/runtime/NEON/functions/NECannyEdge.cpp b/src/runtime/NEON/functions/NECannyEdge.cpp index ca8877e2fc..318cea2342 100644 --- a/src/runtime/NEON/functions/NECannyEdge.cpp +++ b/src/runtime/NEON/functions/NECannyEdge.cpp @@ -161,7 +161,7 @@ void NECannyEdge::run() _sobel->run(); // Fill border before non-maxima suppression. Nop for border mode undefined. - _border_mag_gradient.run(_border_mag_gradient.window()); + NEScheduler::get().schedule(&_border_mag_gradient, Window::DimZ); // Run gradient NEScheduler::get().schedule(_gradient.get(), Window::DimY); @@ -173,8 +173,8 @@ void NECannyEdge::run() memset(_output->buffer(), 0, _output->info()->total_size()); // Fill border before edge trace - _border_edge_trace.run(_border_edge_trace.window()); + NEScheduler::get().schedule(&_border_edge_trace, Window::DimZ); // Run edge tracing - _edge_trace.run(_edge_trace.window()); + NEScheduler::get().schedule(&_edge_trace, Window::DimY); } diff --git a/src/runtime/NEON/functions/NEConvolution.cpp b/src/runtime/NEON/functions/NEConvolution.cpp index 4ad6450c67..249274ba32 100644 --- a/src/runtime/NEON/functions/NEConvolution.cpp +++ b/src/runtime/NEON/functions/NEConvolution.cpp @@ -94,7 +94,7 @@ void NEConvolutionSquare::configure(ITensor *input, ITensor *output template void NEConvolutionSquare::run() { - _border_handler.run(_border_handler.window()); + NEScheduler::get().schedule(&_border_handler, Window::DimZ); if(_is_separable) { diff --git a/src/runtime/NEON/functions/NEDerivative.cpp b/src/runtime/NEON/functions/NEDerivative.cpp index c50db14746..81180307f6 100644 --- a/src/runtime/NEON/functions/NEDerivative.cpp +++ b/src/runtime/NEON/functions/NEDerivative.cpp @@ -47,6 +47,6 @@ void NEDerivative::configure(ITensor *input, ITensor *output_x, ITensor *output_ void NEDerivative::run() { - _border_handler.run(_border_handler.window()); + NEScheduler::get().schedule(&_border_handler, Window::DimZ); NEScheduler::get().schedule(&_kernel, Window::DimY); } diff --git a/src/runtime/NEON/functions/NEDirectConvolutionLayer.cpp b/src/runtime/NEON/functions/NEDirectConvolutionLayer.cpp index 2e3a6835dc..810efe539f 100644 --- a/src/runtime/NEON/functions/NEDirectConvolutionLayer.cpp +++ b/src/runtime/NEON/functions/NEDirectConvolutionLayer.cpp @@ -85,7 +85,7 @@ void NEDirectConvolutionLayer::configure(ITensor *input, const ITensor *weights, void NEDirectConvolutionLayer::run() { - _input_border_handler.run(_input_border_handler.window()); + NEScheduler::get().schedule(&_input_border_handler, Window::DimZ); NEScheduler::get().schedule(&_conv_kernel, Window::DimZ); NEScheduler::get().schedule(&_accumulate_bias_kernel, Window::DimY); diff --git a/src/runtime/NEON/functions/NEEqualizeHistogram.cpp b/src/runtime/NEON/functions/NEEqualizeHistogram.cpp index f6ec677e44..70b93cae9e 100644 --- a/src/runtime/NEON/functions/NEEqualizeHistogram.cpp +++ b/src/runtime/NEON/functions/NEEqualizeHistogram.cpp @@ -55,7 +55,7 @@ void NEEqualizeHistogram::run() NEScheduler::get().schedule(&_histogram_kernel, Window::DimY); // Calculate cumulative distribution of histogram and create LUT. - _cd_histogram_kernel.run(_cd_histogram_kernel.window()); + NEScheduler::get().schedule(&_cd_histogram_kernel, Window::DimY); // Map input to output using created LUT. NEScheduler::get().schedule(&_map_histogram_kernel, Window::DimY); diff --git a/src/runtime/NEON/functions/NEFastCorners.cpp b/src/runtime/NEON/functions/NEFastCorners.cpp index 33a58f1904..265041fc42 100644 --- a/src/runtime/NEON/functions/NEFastCorners.cpp +++ b/src/runtime/NEON/functions/NEFastCorners.cpp @@ -88,7 +88,7 @@ void NEFastCorners::configure(IImage *input, float threshold, bool nonmax_suppre void NEFastCorners::run() { - _border_handler.run(_border_handler.window()); + NEScheduler::get().schedule(&_border_handler, Window::DimZ); NEScheduler::get().schedule(&_fast_corners_kernel, Window::DimY); diff --git a/src/runtime/NEON/functions/NEGaussian5x5.cpp b/src/runtime/NEON/functions/NEGaussian5x5.cpp index 69639d0d43..a1ce985633 100644 --- a/src/runtime/NEON/functions/NEGaussian5x5.cpp +++ b/src/runtime/NEON/functions/NEGaussian5x5.cpp @@ -54,7 +54,7 @@ void NEGaussian5x5::configure(ITensor *input, ITensor *output, BorderMode border void NEGaussian5x5::run() { - _border_handler.run(_border_handler.window()); + NEScheduler::get().schedule(&_border_handler, Window::DimZ); NEScheduler::get().schedule(&_kernel_hor, Window::DimY); NEScheduler::get().schedule(&_kernel_vert, Window::DimY); } diff --git a/src/runtime/NEON/functions/NEGaussianPyramid.cpp b/src/runtime/NEON/functions/NEGaussianPyramid.cpp index e857aabd3b..90bd5842eb 100644 --- a/src/runtime/NEON/functions/NEGaussianPyramid.cpp +++ b/src/runtime/NEON/functions/NEGaussianPyramid.cpp @@ -109,7 +109,7 @@ void NEGaussianPyramidHalf::run() for(unsigned int i = 0; i < num_levels - 1; ++i) { - _border_handler[i].run(_border_handler[i].window()); + NEScheduler::get().schedule(_border_handler.get() + i, Window::DimZ); NEScheduler::get().schedule(_horizontal_reduction.get() + i, Window::DimY); NEScheduler::get().schedule(_vertical_reduction.get() + i, Window::DimY); } diff --git a/src/runtime/NEON/functions/NEHOGMultiDetection.cpp b/src/runtime/NEON/functions/NEHOGMultiDetection.cpp index 8b3d01423c..1a038a2f62 100644 --- a/src/runtime/NEON/functions/NEHOGMultiDetection.cpp +++ b/src/runtime/NEON/functions/NEHOGMultiDetection.cpp @@ -239,6 +239,6 @@ void NEHOGMultiDetection::run() // Run non-maxima suppression kernel if enabled if(_non_maxima_suppression) { - _non_maxima_kernel->run(_non_maxima_kernel->window()); + NEScheduler::get().schedule(_non_maxima_kernel.get(), Window::DimY); } } diff --git a/src/runtime/NEON/functions/NEHarrisCorners.cpp b/src/runtime/NEON/functions/NEHarrisCorners.cpp index 24b2bcb5b3..7ec681dce9 100644 --- a/src/runtime/NEON/functions/NEHarrisCorners.cpp +++ b/src/runtime/NEON/functions/NEHarrisCorners.cpp @@ -207,8 +207,8 @@ void NEHarrisCorners::run() _sobel->run(); // Fill border before harris score kernel - _border_gx.run(_border_gx.window()); - _border_gy.run(_border_gy.window()); + NEScheduler::get().schedule(&_border_gx, Window::DimZ); + NEScheduler::get().schedule(&_border_gy, Window::DimZ); // Run harris score kernel NEScheduler::get().schedule(_harris_score.get(), Window::DimY); @@ -220,5 +220,5 @@ void NEHarrisCorners::run() NEScheduler::get().schedule(&_candidates, Window::DimY); // Run sort & euclidean distance - _sort_euclidean.run(_sort_euclidean.window()); + NEScheduler::get().schedule(&_sort_euclidean, Window::DimY); } diff --git a/src/runtime/NEON/functions/NEMeanStdDev.cpp b/src/runtime/NEON/functions/NEMeanStdDev.cpp index ab8e72bf1d..2304bc80d7 100644 --- a/src/runtime/NEON/functions/NEMeanStdDev.cpp +++ b/src/runtime/NEON/functions/NEMeanStdDev.cpp @@ -43,6 +43,6 @@ void NEMeanStdDev::run() _global_sum = 0; _global_sum_squared = 0; - _fill_border_kernel.run(_fill_border_kernel.window()); + NEScheduler::get().schedule(&_fill_border_kernel, Window::DimZ); NEScheduler::get().schedule(&_mean_stddev_kernel, Window::DimY); } diff --git a/src/runtime/NEON/functions/NESobel5x5.cpp b/src/runtime/NEON/functions/NESobel5x5.cpp index 8967a22ba1..305d21122e 100644 --- a/src/runtime/NEON/functions/NESobel5x5.cpp +++ b/src/runtime/NEON/functions/NESobel5x5.cpp @@ -75,7 +75,7 @@ void NESobel5x5::configure(ITensor *input, ITensor *output_x, ITensor *output_y, void NESobel5x5::run() { - _border_handler.run(_border_handler.window()); + NEScheduler::get().schedule(&_border_handler, Window::DimZ); NEScheduler::get().schedule(&_sobel_hor, Window::DimY); NEScheduler::get().schedule(&_sobel_vert, Window::DimY); } diff --git a/src/runtime/NEON/functions/NESobel7x7.cpp b/src/runtime/NEON/functions/NESobel7x7.cpp index f628da9709..57fe028567 100644 --- a/src/runtime/NEON/functions/NESobel7x7.cpp +++ b/src/runtime/NEON/functions/NESobel7x7.cpp @@ -75,7 +75,7 @@ void NESobel7x7::configure(ITensor *input, ITensor *output_x, ITensor *output_y, void NESobel7x7::run() { - _border_handler.run(_border_handler.window()); + NEScheduler::get().schedule(&_border_handler, Window::DimZ); NEScheduler::get().schedule(&_sobel_hor, Window::DimY); NEScheduler::get().schedule(&_sobel_vert, Window::DimY); } -- cgit v1.2.1