aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGeorgios Pinitas <georgios.pinitas@arm.com>2019-11-05 13:35:47 +0000
committerGeorgios Pinitas <georgios.pinitas@arm.com>2019-11-11 12:33:13 +0000
commit77d42528b796f3b8f5033785d3bbb8d9cb3fc637 (patch)
tree9e8055e63f8475c5dde7d03768307e3eb7580a8a
parent9aaf09ec557dcce63cd1b5de173ce8947108327d (diff)
downloadComputeLibrary-77d42528b796f3b8f5033785d3bbb8d9cb3fc637.tar.gz
COMPMID-2853: VGG16 regression for fp32
* Caps dynamic scheduling granule to a max number Change-Id: I35a9239bc9984dbc1b416c40c4c1b4ac7f5808bd Signed-off-by: Georgios Pinitas <georgios.pinitas@arm.com> Reviewed-on: https://review.mlplatform.org/c/2223 Comments-Addressed: Arm Jenkins <bsgcomp@arm.com> Tested-by: Arm Jenkins <bsgcomp@arm.com> Reviewed-by: Michele Di Giorgio <michele.digiorgio@arm.com>
-rw-r--r--arm_compute/runtime/IScheduler.h18
-rw-r--r--src/runtime/CPP/CPPScheduler.cpp4
-rw-r--r--src/runtime/NEON/functions/NEGEMMAssemblyDispatch.cpp20
-rw-r--r--tests/framework/instruments/SchedulerTimer.cpp2
4 files changed, 31 insertions, 13 deletions
diff --git a/arm_compute/runtime/IScheduler.h b/arm_compute/runtime/IScheduler.h
index 14acf04439..1c3e3e7abb 100644
--- a/arm_compute/runtime/IScheduler.h
+++ b/arm_compute/runtime/IScheduler.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2018 ARM Limited.
+ * Copyright (c) 2017-2019 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -53,9 +53,10 @@ public:
*
* @param[in] split_dimension Dimension along which to split the kernel's execution window.
* @param[in] strategy (Optional) Split strategy.
+ * @param[in] threshold (Optional) Dynamic scheduling capping threshold.
*/
- Hints(unsigned int split_dimension, StrategyHint strategy = StrategyHint::STATIC)
- : _split_dimension(split_dimension), _strategy(strategy)
+ Hints(unsigned int split_dimension, StrategyHint strategy = StrategyHint::STATIC, int threshold = 0)
+ : _split_dimension(split_dimension), _strategy(strategy), _threshold(threshold)
{
}
/** Set the split_dimension hint
@@ -97,10 +98,19 @@ public:
{
return _strategy;
}
+ /** Return the granule capping threshold to be used by dynamic scheduling.
+ *
+ * @return The capping threshold
+ */
+ int threshold() const
+ {
+ return _threshold;
+ }
private:
unsigned int _split_dimension;
StrategyHint _strategy;
+ int _threshold;
};
/** Signature for the workloads to execute */
using Workload = std::function<void(const ThreadInfo &)>;
@@ -165,5 +175,5 @@ protected:
private:
unsigned int _num_threads_hint = {};
};
-}
+} // namespace arm_compute
#endif /* __ARM_COMPUTE_ISCHEDULER_H__ */
diff --git a/src/runtime/CPP/CPPScheduler.cpp b/src/runtime/CPP/CPPScheduler.cpp
index 5849218536..e684eeee98 100644
--- a/src/runtime/CPP/CPPScheduler.cpp
+++ b/src/runtime/CPP/CPPScheduler.cpp
@@ -338,9 +338,9 @@ void CPPScheduler::schedule(ICPPKernel *kernel, const Hints &hints)
break;
case StrategyHint::DYNAMIC:
{
+ const unsigned int granule_threshold = (hints.threshold() <= 0) ? num_threads : static_cast<unsigned int>(hints.threshold());
// Make sure we don't use some windows which are too small as this might create some contention on the ThreadFeeder
- const unsigned int max_iterations = static_cast<unsigned int>(_impl->_num_threads) * 3;
- num_windows = num_iterations > max_iterations ? max_iterations : num_iterations;
+ num_windows = num_iterations > granule_threshold ? granule_threshold : num_iterations;
break;
}
default:
diff --git a/src/runtime/NEON/functions/NEGEMMAssemblyDispatch.cpp b/src/runtime/NEON/functions/NEGEMMAssemblyDispatch.cpp
index 43e531579a..88e060109a 100644
--- a/src/runtime/NEON/functions/NEGEMMAssemblyDispatch.cpp
+++ b/src/runtime/NEON/functions/NEGEMMAssemblyDispatch.cpp
@@ -201,6 +201,8 @@ private:
IWeightsManager *_weights_manager{ nullptr };
/** Weights transform object */
FallbackTransform<TypeInput, TypeOutput> _weights_transform{};
+ /** GEMM kernel description */
+ arm_gemm::KernelDescription _kernel_info{};
};
template <typename TypeInput, typename TypeOutput, class OutputStage>
@@ -208,12 +210,12 @@ void Fallback<TypeInput, TypeOutput, OutputStage>::configure(const ITensor *a, c
arm_gemm::GemmArgs args, const GEMMInfo &gemm_info,
MemoryGroup &memory_group, IWeightsManager *weights_manager, const OutputStage &os)
{
- arm_gemm::GemmConfig gemm_cfg;
- const arm_gemm::KernelDescription gemm_kernel_info = arm_gemm::get_gemm_method<TypeInput, TypeOutput, OutputStage>(args, os);
- _weights_manager = weights_manager;
- if(gemm_kernel_info.method != arm_gemm::GemmMethod::GEMV_BATCHED)
+ arm_gemm::GemmConfig gemm_cfg;
+ _kernel_info = arm_gemm::get_gemm_method<TypeInput, TypeOutput, OutputStage>(args, os);
+ _weights_manager = weights_manager;
+ if(_kernel_info.method != arm_gemm::GemmMethod::GEMV_BATCHED)
{
- gemm_cfg.filter = gemm_kernel_info.name;
+ gemm_cfg.filter = _kernel_info.name;
args._cfg = &gemm_cfg;
}
_gemm_kernel_asm = arm_gemm::gemm<TypeInput, TypeOutput, OutputStage>(args, os);
@@ -387,7 +389,13 @@ void Fallback<TypeInput, TypeOutput, OutputStage>::run()
bias, 0);
// Schedule assembly kernel
- NEScheduler::get().schedule(_optimised_kernel.get(), Window::DimX);
+ IScheduler::Hints scheduling_hint = IScheduler::Hints(Window::DimX);
+ if(_kernel_info.method == arm_gemm::GemmMethod::GEMM_INTERLEAVED)
+ {
+ constexpr int granule_threshold = 200;
+ scheduling_hint = IScheduler::Hints(Window::DimX, IScheduler::StrategyHint::DYNAMIC, granule_threshold);
+ }
+ NEScheduler::get().schedule(_optimised_kernel.get(), scheduling_hint);
}
template <typename TypeInput, typename TypeOutput>
diff --git a/tests/framework/instruments/SchedulerTimer.cpp b/tests/framework/instruments/SchedulerTimer.cpp
index 98c9b878d9..9e8bba28e8 100644
--- a/tests/framework/instruments/SchedulerTimer.cpp
+++ b/tests/framework/instruments/SchedulerTimer.cpp
@@ -76,7 +76,7 @@ public:
void schedule(ICPPKernel *kernel, const Hints &hints) override
{
_timer.start();
- _real_scheduler.schedule(kernel, hints.split_dimension());
+ _real_scheduler.schedule(kernel, hints);
_timer.stop();
typename SchedulerClock<output_timestamps>::kernel_info info;