aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorMatthew Sloyan <matthew.sloyan@arm.com>2021-02-10 16:50:53 +0000
committerMatthew Sloyan <matthew.sloyan@arm.com>2021-02-12 17:56:58 +0000
commit0a7dc6bba5d0810fe2ed6f84b0376a8b0674c0b3 (patch)
tree6931067f0e0ff25be7d6a01d379f4b1497dff424 /src
parentb454c5c65efb238c130b042ace390b2bc7f0bf75 (diff)
downloadarmnn-0a7dc6bba5d0810fe2ed6f84b0376a8b0674c0b3.tar.gz
IVGCVSW-5685 Add CpuAcc specific configuration option numberOfThreads
* Added ability to set number of threads used in CpuAcc backend * Enabled number-of-threads option in ExecuteNetwork * Added TfLiteDelegate ModelOptions test * Added unsigned int type to BackendOptions.hpp Signed-off-by: Matthew Sloyan <matthew.sloyan@arm.com> Change-Id: Ia576d4f45cbe5df3654bc730bb5ebd5181d82b5a
Diffstat (limited to 'src')
-rw-r--r--src/backends/neon/NeonBackendModelContext.cpp20
-rw-r--r--src/backends/neon/NeonBackendModelContext.hpp5
-rw-r--r--src/backends/neon/NeonWorkloadFactory.cpp21
-rw-r--r--src/backends/neon/NeonWorkloadFactory.hpp4
-rw-r--r--src/backends/neon/test/NeonOptimizedNetworkTests.cpp32
5 files changed, 81 insertions, 1 deletions
diff --git a/src/backends/neon/NeonBackendModelContext.cpp b/src/backends/neon/NeonBackendModelContext.cpp
index 2be71e5ded..270592e94d 100644
--- a/src/backends/neon/NeonBackendModelContext.cpp
+++ b/src/backends/neon/NeonBackendModelContext.cpp
@@ -17,13 +17,22 @@ bool ParseBool(const armnn::BackendOptions::Var& value, bool defaultValue)
return defaultValue;
}
+unsigned int ParseUnsignedInt(const armnn::BackendOptions::Var& value, unsigned int defaultValue)
+{
+ if (value.IsUnsignedInt())
+ {
+ return value.AsUnsignedInt();
+ }
+ return defaultValue;
+}
+
} // namespace anonymous
namespace armnn
{
NeonBackendModelContext::NeonBackendModelContext(const ModelOptions& modelOptions)
- : m_IsFastMathEnabled(false)
+ : m_IsFastMathEnabled(false), m_NumberOfThreads(0)
{
if (!modelOptions.empty())
{
@@ -33,6 +42,10 @@ NeonBackendModelContext::NeonBackendModelContext(const ModelOptions& modelOption
{
m_IsFastMathEnabled |= ParseBool(value, false);
}
+ if (name == "NumberOfThreads")
+ {
+ m_NumberOfThreads |= ParseUnsignedInt(value, 0);
+ }
});
}
}
@@ -42,4 +55,9 @@ bool NeonBackendModelContext::IsFastMathEnabled() const
return m_IsFastMathEnabled;
}
+unsigned int NeonBackendModelContext::GetNumberOfThreads() const
+{
+ return m_NumberOfThreads;
+}
+
} // namespace armnn \ No newline at end of file
diff --git a/src/backends/neon/NeonBackendModelContext.hpp b/src/backends/neon/NeonBackendModelContext.hpp
index 430ae45a6f..e736efc1d8 100644
--- a/src/backends/neon/NeonBackendModelContext.hpp
+++ b/src/backends/neon/NeonBackendModelContext.hpp
@@ -14,6 +14,8 @@ namespace armnn
/// - "FastMathEnabled"\n
/// Using the fast_math flag can lead to performance improvements in fp32 and fp16 layers but may result in\n
/// results with reduced or different precision. The fast_math flag will not have any effect on int8 performance.
+/// - "NumberOfThreads"\n
+/// Specify the number of threads used by the CpuAcc backend.
class NeonBackendModelContext : public IBackendModelContext
{
public:
@@ -21,8 +23,11 @@ public:
bool IsFastMathEnabled() const;
+ unsigned int GetNumberOfThreads() const;
+
private:
bool m_IsFastMathEnabled;
+ unsigned int m_NumberOfThreads;
};
} // namespace armnn \ No newline at end of file
diff --git a/src/backends/neon/NeonWorkloadFactory.cpp b/src/backends/neon/NeonWorkloadFactory.cpp
index 7d0942874e..28352011dc 100644
--- a/src/backends/neon/NeonWorkloadFactory.cpp
+++ b/src/backends/neon/NeonWorkloadFactory.cpp
@@ -51,15 +51,36 @@ const BackendId& NeonWorkloadFactory::GetBackendId() const
return s_Id;
}
+void NeonWorkloadFactory::SetNumberOfThreads()
+{
+ if (m_ModelContextPtr)
+ {
+ const unsigned int MIN_THREADS = 1;
+ const unsigned int MAX_THREADS = 64;
+
+ // Set the number of threads to be used if the user has set NumberOfThreads param
+ // Only set if within limit or valid input
+ auto modelOptions = dynamic_cast<NeonBackendModelContext*>(m_ModelContextPtr.get());
+ auto numberOfThreads = modelOptions->GetNumberOfThreads();
+
+ if (numberOfThreads != 0 && numberOfThreads >= MIN_THREADS && numberOfThreads <= MAX_THREADS)
+ {
+ arm_compute::Scheduler::get().set_num_threads(numberOfThreads);
+ }
+ }
+}
+
NeonWorkloadFactory::NeonWorkloadFactory(const std::shared_ptr<NeonMemoryManager>& memoryManager)
: m_MemoryManager(memoryManager), m_ModelContextPtr(IBackendInternal::IBackendSpecificModelContextPtr{})
{
+ SetNumberOfThreads();
}
NeonWorkloadFactory::NeonWorkloadFactory(const std::shared_ptr<NeonMemoryManager>& memoryManager,
const IBackendInternal::IBackendSpecificModelContextPtr& modelContextPtr)
: m_MemoryManager(memoryManager), m_ModelContextPtr(modelContextPtr)
{
+ SetNumberOfThreads();
}
std::unique_ptr<ITensorHandle> NeonWorkloadFactory::CreateSubTensorHandle(ITensorHandle& parent,
diff --git a/src/backends/neon/NeonWorkloadFactory.hpp b/src/backends/neon/NeonWorkloadFactory.hpp
index 4817a06540..ee11002033 100644
--- a/src/backends/neon/NeonWorkloadFactory.hpp
+++ b/src/backends/neon/NeonWorkloadFactory.hpp
@@ -11,6 +11,8 @@
#include <aclCommon/BaseMemoryManager.hpp>
#include <armnn/utility/IgnoreUnused.hpp>
+#include <arm_compute/runtime/IScheduler.h>
+
namespace armnn
{
@@ -255,6 +257,8 @@ public:
const WorkloadInfo& info) const override;
private:
+ void SetNumberOfThreads();
+
mutable std::shared_ptr<NeonMemoryManager> m_MemoryManager;
const IBackendInternal::IBackendSpecificModelContextPtr m_ModelContextPtr;
};
diff --git a/src/backends/neon/test/NeonOptimizedNetworkTests.cpp b/src/backends/neon/test/NeonOptimizedNetworkTests.cpp
index 302711029e..85f06174c7 100644
--- a/src/backends/neon/test/NeonOptimizedNetworkTests.cpp
+++ b/src/backends/neon/test/NeonOptimizedNetworkTests.cpp
@@ -110,4 +110,36 @@ BOOST_AUTO_TEST_CASE(FastMathEnabledTestOnCpuAcc)
BOOST_TEST(modelOptionsOut[0].GetOption(0).GetValue().AsBool() == true);
}
+BOOST_AUTO_TEST_CASE(NumberOfThreadsTestOnCpuAcc)
+{
+ armnn::INetworkPtr net(armnn::INetwork::Create());
+
+ armnn::IConnectableLayer* input = net->AddInputLayer(0);
+ armnn::IConnectableLayer* output = net->AddOutputLayer(0);
+
+ input->GetOutputSlot(0).Connect(output->GetInputSlot(0));
+ input->GetOutputSlot(0).SetTensorInfo(armnn::TensorInfo({ 1, 1, 4, 4 }, armnn::DataType::Float32));
+
+ armnn::IRuntime::CreationOptions options;
+ armnn::IRuntimePtr runtime(armnn::IRuntime::Create(options));
+
+ unsigned int numberOfThreads = 2;
+
+ std::vector<armnn::BackendId> backends = {armnn::Compute::CpuAcc};
+ armnn::OptimizerOptions optimizerOptions;
+ armnn::BackendOptions modelOptions("CpuAcc", {{"NumberOfThreads", numberOfThreads}});
+ optimizerOptions.m_ModelOptions.push_back(modelOptions);
+
+ armnn::IOptimizedNetworkPtr optimizedNet = armnn::Optimize(
+ *net, backends, runtime->GetDeviceSpec(), optimizerOptions);
+
+ BOOST_CHECK(optimizedNet);
+
+ auto modelOptionsOut = static_cast<armnn::OptimizedNetwork*>(optimizedNet.get())->GetModelOptions();
+
+ BOOST_TEST(modelOptionsOut.size() == 1);
+ BOOST_TEST(modelOptionsOut[0].GetOption(0).GetName() == "NumberOfThreads");
+ BOOST_TEST(modelOptionsOut[0].GetOption(0).GetValue().AsUnsignedInt() == numberOfThreads);
+}
+
BOOST_AUTO_TEST_SUITE_END() \ No newline at end of file