diff options
author | Matthew Sloyan <matthew.sloyan@arm.com> | 2021-02-10 16:50:53 +0000 |
---|---|---|
committer | Matthew Sloyan <matthew.sloyan@arm.com> | 2021-02-12 17:56:58 +0000 |
commit | 0a7dc6bba5d0810fe2ed6f84b0376a8b0674c0b3 (patch) | |
tree | 6931067f0e0ff25be7d6a01d379f4b1497dff424 /src/backends | |
parent | b454c5c65efb238c130b042ace390b2bc7f0bf75 (diff) | |
download | armnn-0a7dc6bba5d0810fe2ed6f84b0376a8b0674c0b3.tar.gz |
IVGCVSW-5685 Add CpuAcc specific configuration option numberOfThreads
* Added ability to set number of threads used in CpuAcc backend
* Enabled number-of-threads option in ExecuteNetwork
* Added TfLiteDelegate ModelOptions test
* Added unsigned int type to BackendOptions.hpp
Signed-off-by: Matthew Sloyan <matthew.sloyan@arm.com>
Change-Id: Ia576d4f45cbe5df3654bc730bb5ebd5181d82b5a
Diffstat (limited to 'src/backends')
-rw-r--r-- | src/backends/neon/NeonBackendModelContext.cpp | 20 | ||||
-rw-r--r-- | src/backends/neon/NeonBackendModelContext.hpp | 5 | ||||
-rw-r--r-- | src/backends/neon/NeonWorkloadFactory.cpp | 21 | ||||
-rw-r--r-- | src/backends/neon/NeonWorkloadFactory.hpp | 4 | ||||
-rw-r--r-- | src/backends/neon/test/NeonOptimizedNetworkTests.cpp | 32 |
5 files changed, 81 insertions, 1 deletions
diff --git a/src/backends/neon/NeonBackendModelContext.cpp b/src/backends/neon/NeonBackendModelContext.cpp index 2be71e5ded..270592e94d 100644 --- a/src/backends/neon/NeonBackendModelContext.cpp +++ b/src/backends/neon/NeonBackendModelContext.cpp @@ -17,13 +17,22 @@ bool ParseBool(const armnn::BackendOptions::Var& value, bool defaultValue) return defaultValue; } +unsigned int ParseUnsignedInt(const armnn::BackendOptions::Var& value, unsigned int defaultValue) +{ + if (value.IsUnsignedInt()) + { + return value.AsUnsignedInt(); + } + return defaultValue; +} + } // namespace anonymous namespace armnn { NeonBackendModelContext::NeonBackendModelContext(const ModelOptions& modelOptions) - : m_IsFastMathEnabled(false) + : m_IsFastMathEnabled(false), m_NumberOfThreads(0) { if (!modelOptions.empty()) { @@ -33,6 +42,10 @@ NeonBackendModelContext::NeonBackendModelContext(const ModelOptions& modelOption { m_IsFastMathEnabled |= ParseBool(value, false); } + if (name == "NumberOfThreads") + { + m_NumberOfThreads |= ParseUnsignedInt(value, 0); + } }); } } @@ -42,4 +55,9 @@ bool NeonBackendModelContext::IsFastMathEnabled() const return m_IsFastMathEnabled; } +unsigned int NeonBackendModelContext::GetNumberOfThreads() const +{ + return m_NumberOfThreads; +} + } // namespace armnn
\ No newline at end of file diff --git a/src/backends/neon/NeonBackendModelContext.hpp b/src/backends/neon/NeonBackendModelContext.hpp index 430ae45a6f..e736efc1d8 100644 --- a/src/backends/neon/NeonBackendModelContext.hpp +++ b/src/backends/neon/NeonBackendModelContext.hpp @@ -14,6 +14,8 @@ namespace armnn /// - "FastMathEnabled"\n /// Using the fast_math flag can lead to performance improvements in fp32 and fp16 layers but may result in\n /// results with reduced or different precision. The fast_math flag will not have any effect on int8 performance. +/// - "NumberOfThreads"\n +/// Specify the number of threads used by the CpuAcc backend. class NeonBackendModelContext : public IBackendModelContext { public: @@ -21,8 +23,11 @@ public: bool IsFastMathEnabled() const; + unsigned int GetNumberOfThreads() const; + private: bool m_IsFastMathEnabled; + unsigned int m_NumberOfThreads; }; } // namespace armnn
\ No newline at end of file diff --git a/src/backends/neon/NeonWorkloadFactory.cpp b/src/backends/neon/NeonWorkloadFactory.cpp index 7d0942874e..28352011dc 100644 --- a/src/backends/neon/NeonWorkloadFactory.cpp +++ b/src/backends/neon/NeonWorkloadFactory.cpp @@ -51,15 +51,36 @@ const BackendId& NeonWorkloadFactory::GetBackendId() const return s_Id; } +void NeonWorkloadFactory::SetNumberOfThreads() +{ + if (m_ModelContextPtr) + { + const unsigned int MIN_THREADS = 1; + const unsigned int MAX_THREADS = 64; + + // Set the number of threads to be used if the user has set NumberOfThreads param + // Only set if within limit or valid input + auto modelOptions = dynamic_cast<NeonBackendModelContext*>(m_ModelContextPtr.get()); + auto numberOfThreads = modelOptions->GetNumberOfThreads(); + + if (numberOfThreads != 0 && numberOfThreads >= MIN_THREADS && numberOfThreads <= MAX_THREADS) + { + arm_compute::Scheduler::get().set_num_threads(numberOfThreads); + } + } +} + NeonWorkloadFactory::NeonWorkloadFactory(const std::shared_ptr<NeonMemoryManager>& memoryManager) : m_MemoryManager(memoryManager), m_ModelContextPtr(IBackendInternal::IBackendSpecificModelContextPtr{}) { + SetNumberOfThreads(); } NeonWorkloadFactory::NeonWorkloadFactory(const std::shared_ptr<NeonMemoryManager>& memoryManager, const IBackendInternal::IBackendSpecificModelContextPtr& modelContextPtr) : m_MemoryManager(memoryManager), m_ModelContextPtr(modelContextPtr) { + SetNumberOfThreads(); } std::unique_ptr<ITensorHandle> NeonWorkloadFactory::CreateSubTensorHandle(ITensorHandle& parent, diff --git a/src/backends/neon/NeonWorkloadFactory.hpp b/src/backends/neon/NeonWorkloadFactory.hpp index 4817a06540..ee11002033 100644 --- a/src/backends/neon/NeonWorkloadFactory.hpp +++ b/src/backends/neon/NeonWorkloadFactory.hpp @@ -11,6 +11,8 @@ #include <aclCommon/BaseMemoryManager.hpp> #include <armnn/utility/IgnoreUnused.hpp> +#include <arm_compute/runtime/IScheduler.h> + namespace armnn { @@ -255,6 +257,8 @@ public: const WorkloadInfo& info) const override; private: + void SetNumberOfThreads(); + mutable std::shared_ptr<NeonMemoryManager> m_MemoryManager; const IBackendInternal::IBackendSpecificModelContextPtr m_ModelContextPtr; }; diff --git a/src/backends/neon/test/NeonOptimizedNetworkTests.cpp b/src/backends/neon/test/NeonOptimizedNetworkTests.cpp index 302711029e..85f06174c7 100644 --- a/src/backends/neon/test/NeonOptimizedNetworkTests.cpp +++ b/src/backends/neon/test/NeonOptimizedNetworkTests.cpp @@ -110,4 +110,36 @@ BOOST_AUTO_TEST_CASE(FastMathEnabledTestOnCpuAcc) BOOST_TEST(modelOptionsOut[0].GetOption(0).GetValue().AsBool() == true); } +BOOST_AUTO_TEST_CASE(NumberOfThreadsTestOnCpuAcc) +{ + armnn::INetworkPtr net(armnn::INetwork::Create()); + + armnn::IConnectableLayer* input = net->AddInputLayer(0); + armnn::IConnectableLayer* output = net->AddOutputLayer(0); + + input->GetOutputSlot(0).Connect(output->GetInputSlot(0)); + input->GetOutputSlot(0).SetTensorInfo(armnn::TensorInfo({ 1, 1, 4, 4 }, armnn::DataType::Float32)); + + armnn::IRuntime::CreationOptions options; + armnn::IRuntimePtr runtime(armnn::IRuntime::Create(options)); + + unsigned int numberOfThreads = 2; + + std::vector<armnn::BackendId> backends = {armnn::Compute::CpuAcc}; + armnn::OptimizerOptions optimizerOptions; + armnn::BackendOptions modelOptions("CpuAcc", {{"NumberOfThreads", numberOfThreads}}); + optimizerOptions.m_ModelOptions.push_back(modelOptions); + + armnn::IOptimizedNetworkPtr optimizedNet = armnn::Optimize( + *net, backends, runtime->GetDeviceSpec(), optimizerOptions); + + BOOST_CHECK(optimizedNet); + + auto modelOptionsOut = static_cast<armnn::OptimizedNetwork*>(optimizedNet.get())->GetModelOptions(); + + BOOST_TEST(modelOptionsOut.size() == 1); + BOOST_TEST(modelOptionsOut[0].GetOption(0).GetName() == "NumberOfThreads"); + BOOST_TEST(modelOptionsOut[0].GetOption(0).GetValue().AsUnsignedInt() == numberOfThreads); +} + BOOST_AUTO_TEST_SUITE_END()
\ No newline at end of file |