From 77d42528b796f3b8f5033785d3bbb8d9cb3fc637 Mon Sep 17 00:00:00 2001 From: Georgios Pinitas Date: Tue, 5 Nov 2019 13:35:47 +0000 Subject: COMPMID-2853: VGG16 regression for fp32 * Caps dynamic scheduling granule to a max number Change-Id: I35a9239bc9984dbc1b416c40c4c1b4ac7f5808bd Signed-off-by: Georgios Pinitas Reviewed-on: https://review.mlplatform.org/c/2223 Comments-Addressed: Arm Jenkins Tested-by: Arm Jenkins Reviewed-by: Michele Di Giorgio --- arm_compute/runtime/IScheduler.h | 18 ++++++++++++++---- src/runtime/CPP/CPPScheduler.cpp | 4 ++-- .../NEON/functions/NEGEMMAssemblyDispatch.cpp | 20 ++++++++++++++------ tests/framework/instruments/SchedulerTimer.cpp | 2 +- 4 files changed, 31 insertions(+), 13 deletions(-) diff --git a/arm_compute/runtime/IScheduler.h b/arm_compute/runtime/IScheduler.h index 14acf04439..1c3e3e7abb 100644 --- a/arm_compute/runtime/IScheduler.h +++ b/arm_compute/runtime/IScheduler.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2018 ARM Limited. + * Copyright (c) 2017-2019 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -53,9 +53,10 @@ public: * * @param[in] split_dimension Dimension along which to split the kernel's execution window. * @param[in] strategy (Optional) Split strategy. + * @param[in] threshold (Optional) Dynamic scheduling capping threshold. */ - Hints(unsigned int split_dimension, StrategyHint strategy = StrategyHint::STATIC) - : _split_dimension(split_dimension), _strategy(strategy) + Hints(unsigned int split_dimension, StrategyHint strategy = StrategyHint::STATIC, int threshold = 0) + : _split_dimension(split_dimension), _strategy(strategy), _threshold(threshold) { } /** Set the split_dimension hint @@ -97,10 +98,19 @@ public: { return _strategy; } + /** Return the granule capping threshold to be used by dynamic scheduling. + * + * @return The capping threshold + */ + int threshold() const + { + return _threshold; + } private: unsigned int _split_dimension; StrategyHint _strategy; + int _threshold; }; /** Signature for the workloads to execute */ using Workload = std::function; @@ -165,5 +175,5 @@ protected: private: unsigned int _num_threads_hint = {}; }; -} +} // namespace arm_compute #endif /* __ARM_COMPUTE_ISCHEDULER_H__ */ diff --git a/src/runtime/CPP/CPPScheduler.cpp b/src/runtime/CPP/CPPScheduler.cpp index 5849218536..e684eeee98 100644 --- a/src/runtime/CPP/CPPScheduler.cpp +++ b/src/runtime/CPP/CPPScheduler.cpp @@ -338,9 +338,9 @@ void CPPScheduler::schedule(ICPPKernel *kernel, const Hints &hints) break; case StrategyHint::DYNAMIC: { + const unsigned int granule_threshold = (hints.threshold() <= 0) ? num_threads : static_cast(hints.threshold()); // Make sure we don't use some windows which are too small as this might create some contention on the ThreadFeeder - const unsigned int max_iterations = static_cast(_impl->_num_threads) * 3; - num_windows = num_iterations > max_iterations ? max_iterations : num_iterations; + num_windows = num_iterations > granule_threshold ? granule_threshold : num_iterations; break; } default: diff --git a/src/runtime/NEON/functions/NEGEMMAssemblyDispatch.cpp b/src/runtime/NEON/functions/NEGEMMAssemblyDispatch.cpp index 43e531579a..88e060109a 100644 --- a/src/runtime/NEON/functions/NEGEMMAssemblyDispatch.cpp +++ b/src/runtime/NEON/functions/NEGEMMAssemblyDispatch.cpp @@ -201,6 +201,8 @@ private: IWeightsManager *_weights_manager{ nullptr }; /** Weights transform object */ FallbackTransform _weights_transform{}; + /** GEMM kernel description */ + arm_gemm::KernelDescription _kernel_info{}; }; template @@ -208,12 +210,12 @@ void Fallback::configure(const ITensor *a, c arm_gemm::GemmArgs args, const GEMMInfo &gemm_info, MemoryGroup &memory_group, IWeightsManager *weights_manager, const OutputStage &os) { - arm_gemm::GemmConfig gemm_cfg; - const arm_gemm::KernelDescription gemm_kernel_info = arm_gemm::get_gemm_method(args, os); - _weights_manager = weights_manager; - if(gemm_kernel_info.method != arm_gemm::GemmMethod::GEMV_BATCHED) + arm_gemm::GemmConfig gemm_cfg; + _kernel_info = arm_gemm::get_gemm_method(args, os); + _weights_manager = weights_manager; + if(_kernel_info.method != arm_gemm::GemmMethod::GEMV_BATCHED) { - gemm_cfg.filter = gemm_kernel_info.name; + gemm_cfg.filter = _kernel_info.name; args._cfg = &gemm_cfg; } _gemm_kernel_asm = arm_gemm::gemm(args, os); @@ -387,7 +389,13 @@ void Fallback::run() bias, 0); // Schedule assembly kernel - NEScheduler::get().schedule(_optimised_kernel.get(), Window::DimX); + IScheduler::Hints scheduling_hint = IScheduler::Hints(Window::DimX); + if(_kernel_info.method == arm_gemm::GemmMethod::GEMM_INTERLEAVED) + { + constexpr int granule_threshold = 200; + scheduling_hint = IScheduler::Hints(Window::DimX, IScheduler::StrategyHint::DYNAMIC, granule_threshold); + } + NEScheduler::get().schedule(_optimised_kernel.get(), scheduling_hint); } template diff --git a/tests/framework/instruments/SchedulerTimer.cpp b/tests/framework/instruments/SchedulerTimer.cpp index 98c9b878d9..9e8bba28e8 100644 --- a/tests/framework/instruments/SchedulerTimer.cpp +++ b/tests/framework/instruments/SchedulerTimer.cpp @@ -76,7 +76,7 @@ public: void schedule(ICPPKernel *kernel, const Hints &hints) override { _timer.start(); - _real_scheduler.schedule(kernel, hints.split_dimension()); + _real_scheduler.schedule(kernel, hints); _timer.stop(); typename SchedulerClock::kernel_info info; -- cgit v1.2.1