From 0a7dc6bba5d0810fe2ed6f84b0376a8b0674c0b3 Mon Sep 17 00:00:00 2001 From: Matthew Sloyan Date: Wed, 10 Feb 2021 16:50:53 +0000 Subject: IVGCVSW-5685 Add CpuAcc specific configuration option numberOfThreads * Added ability to set number of threads used in CpuAcc backend * Enabled number-of-threads option in ExecuteNetwork * Added TfLiteDelegate ModelOptions test * Added unsigned int type to BackendOptions.hpp Signed-off-by: Matthew Sloyan Change-Id: Ia576d4f45cbe5df3654bc730bb5ebd5181d82b5a --- delegate/src/test/DelegateOptionsTest.cpp | 36 ++++++++++++++++++++++ include/armnn/BackendOptions.hpp | 14 +++++++++ src/backends/neon/NeonBackendModelContext.cpp | 20 +++++++++++- src/backends/neon/NeonBackendModelContext.hpp | 5 +++ src/backends/neon/NeonWorkloadFactory.cpp | 21 +++++++++++++ src/backends/neon/NeonWorkloadFactory.hpp | 4 +++ .../neon/test/NeonOptimizedNetworkTests.cpp | 32 +++++++++++++++++++ tests/ExecuteNetwork/ExecuteNetwork.cpp | 1 + tests/ExecuteNetwork/ExecuteNetworkParams.hpp | 1 + .../ExecuteNetworkProgramOptions.cpp | 6 ++++ tests/InferenceModel.hpp | 5 ++- 11 files changed, 143 insertions(+), 2 deletions(-) diff --git a/delegate/src/test/DelegateOptionsTest.cpp b/delegate/src/test/DelegateOptionsTest.cpp index 23510c7777..6024754be8 100644 --- a/delegate/src/test/DelegateOptionsTest.cpp +++ b/delegate/src/test/DelegateOptionsTest.cpp @@ -150,4 +150,40 @@ TEST_CASE ("ArmnnDelegateOptimizerOptionsImport") } +TEST_SUITE("DelegateOptions_CpuAccTests") +{ + +TEST_CASE ("ArmnnDelegateModelOptions_CpuAcc_Test") +{ + std::vector backends = { armnn::Compute::CpuAcc }; + std::vector tensorShape { 1, 2, 2, 1 }; + std::vector inputData = { 1, 2, 3, 4 }; + std::vector divData = { 2, 2, 3, 4 }; + std::vector expectedResult = { 1, 2, 2, 2 }; + + unsigned int numberOfThreads = 2; + + armnn::ModelOptions modelOptions; + armnn::BackendOptions cpuAcc("CpuAcc", + { + { "FastMathEnabled", true }, + { "NumberOfThreads", numberOfThreads } + }); + modelOptions.push_back(cpuAcc); + + armnn::OptimizerOptions optimizerOptions(false, false, false, false, modelOptions); + armnnDelegate::DelegateOptions delegateOptions(backends, optimizerOptions); + + DelegateOptionTest(::tflite::TensorType_FLOAT32, + backends, + tensorShape, + inputData, + inputData, + divData, + expectedResult, + delegateOptions); +} + +} + } // namespace armnnDelegate diff --git a/include/armnn/BackendOptions.hpp b/include/armnn/BackendOptions.hpp index 4aee070866..b705f41505 100644 --- a/include/armnn/BackendOptions.hpp +++ b/include/armnn/BackendOptions.hpp @@ -24,6 +24,7 @@ private: struct CheckAllowed { static const bool value = std::is_same::value || + std::is_same::value || std::is_same::value || std::is_same::value || std::is_same::value || @@ -38,6 +39,7 @@ public: public: /// Constructors explicit Var(int i) : m_Vals(i), m_Type(VarTypes::Integer) {}; + explicit Var(unsigned int u) : m_Vals(u), m_Type(VarTypes::UnsignedInteger) {}; explicit Var(float f) : m_Vals(f), m_Type(VarTypes::Float) {}; explicit Var(bool b) : m_Vals(b), m_Type(VarTypes::Boolean) {}; explicit Var(const char* s) : m_Vals(s), m_Type(VarTypes::String) {}; @@ -107,12 +109,14 @@ public: /// Type getters bool IsBool() const { return m_Type == VarTypes::Boolean; } bool IsInt() const { return m_Type == VarTypes::Integer; } + bool IsUnsignedInt() const { return m_Type == VarTypes::UnsignedInteger; } bool IsFloat() const { return m_Type == VarTypes::Float; } bool IsString() const { return m_Type == VarTypes::String; } /// Value getters bool AsBool() const { assert(IsBool()); return m_Vals.b; } int AsInt() const { assert(IsInt()); return m_Vals.i; } + unsigned int AsUnsignedInt() const { assert(IsUnsignedInt()); return m_Vals.u; } float AsFloat() const { assert(IsFloat()); return m_Vals.f; } std::string AsString() const { assert(IsString()); return m_Vals.s; } @@ -136,6 +140,10 @@ public: { func(m_Vals.i, other.m_Vals.i); } + else if (other.IsUnsignedInt()) + { + func(m_Vals.u, other.m_Vals.u); + } else if (other.IsFloat()) { func(m_Vals.f, other.m_Vals.f); @@ -163,12 +171,14 @@ public: Integer, Float, String, + UnsignedInteger }; /// Union of potential type values. union Vals { int i; + unsigned int u; float f; bool b; std::string s; @@ -177,6 +187,7 @@ public: ~Vals(){} explicit Vals(int i) : i(i) {}; + explicit Vals(unsigned int u) : u(u) {}; explicit Vals(float f) : f(f) {}; explicit Vals(bool b) : b(b) {}; explicit Vals(const char* s) : s(std::string(s)) {} @@ -196,6 +207,9 @@ public: BackendOption(std::string name, int value) : m_Name(name), m_Value(value) {} + BackendOption(std::string name, unsigned int value) + : m_Name(name), m_Value(value) + {} BackendOption(std::string name, float value) : m_Name(name), m_Value(value) {} diff --git a/src/backends/neon/NeonBackendModelContext.cpp b/src/backends/neon/NeonBackendModelContext.cpp index 2be71e5ded..270592e94d 100644 --- a/src/backends/neon/NeonBackendModelContext.cpp +++ b/src/backends/neon/NeonBackendModelContext.cpp @@ -17,13 +17,22 @@ bool ParseBool(const armnn::BackendOptions::Var& value, bool defaultValue) return defaultValue; } +unsigned int ParseUnsignedInt(const armnn::BackendOptions::Var& value, unsigned int defaultValue) +{ + if (value.IsUnsignedInt()) + { + return value.AsUnsignedInt(); + } + return defaultValue; +} + } // namespace anonymous namespace armnn { NeonBackendModelContext::NeonBackendModelContext(const ModelOptions& modelOptions) - : m_IsFastMathEnabled(false) + : m_IsFastMathEnabled(false), m_NumberOfThreads(0) { if (!modelOptions.empty()) { @@ -33,6 +42,10 @@ NeonBackendModelContext::NeonBackendModelContext(const ModelOptions& modelOption { m_IsFastMathEnabled |= ParseBool(value, false); } + if (name == "NumberOfThreads") + { + m_NumberOfThreads |= ParseUnsignedInt(value, 0); + } }); } } @@ -42,4 +55,9 @@ bool NeonBackendModelContext::IsFastMathEnabled() const return m_IsFastMathEnabled; } +unsigned int NeonBackendModelContext::GetNumberOfThreads() const +{ + return m_NumberOfThreads; +} + } // namespace armnn \ No newline at end of file diff --git a/src/backends/neon/NeonBackendModelContext.hpp b/src/backends/neon/NeonBackendModelContext.hpp index 430ae45a6f..e736efc1d8 100644 --- a/src/backends/neon/NeonBackendModelContext.hpp +++ b/src/backends/neon/NeonBackendModelContext.hpp @@ -14,6 +14,8 @@ namespace armnn /// - "FastMathEnabled"\n /// Using the fast_math flag can lead to performance improvements in fp32 and fp16 layers but may result in\n /// results with reduced or different precision. The fast_math flag will not have any effect on int8 performance. +/// - "NumberOfThreads"\n +/// Specify the number of threads used by the CpuAcc backend. class NeonBackendModelContext : public IBackendModelContext { public: @@ -21,8 +23,11 @@ public: bool IsFastMathEnabled() const; + unsigned int GetNumberOfThreads() const; + private: bool m_IsFastMathEnabled; + unsigned int m_NumberOfThreads; }; } // namespace armnn \ No newline at end of file diff --git a/src/backends/neon/NeonWorkloadFactory.cpp b/src/backends/neon/NeonWorkloadFactory.cpp index 7d0942874e..28352011dc 100644 --- a/src/backends/neon/NeonWorkloadFactory.cpp +++ b/src/backends/neon/NeonWorkloadFactory.cpp @@ -51,15 +51,36 @@ const BackendId& NeonWorkloadFactory::GetBackendId() const return s_Id; } +void NeonWorkloadFactory::SetNumberOfThreads() +{ + if (m_ModelContextPtr) + { + const unsigned int MIN_THREADS = 1; + const unsigned int MAX_THREADS = 64; + + // Set the number of threads to be used if the user has set NumberOfThreads param + // Only set if within limit or valid input + auto modelOptions = dynamic_cast(m_ModelContextPtr.get()); + auto numberOfThreads = modelOptions->GetNumberOfThreads(); + + if (numberOfThreads != 0 && numberOfThreads >= MIN_THREADS && numberOfThreads <= MAX_THREADS) + { + arm_compute::Scheduler::get().set_num_threads(numberOfThreads); + } + } +} + NeonWorkloadFactory::NeonWorkloadFactory(const std::shared_ptr& memoryManager) : m_MemoryManager(memoryManager), m_ModelContextPtr(IBackendInternal::IBackendSpecificModelContextPtr{}) { + SetNumberOfThreads(); } NeonWorkloadFactory::NeonWorkloadFactory(const std::shared_ptr& memoryManager, const IBackendInternal::IBackendSpecificModelContextPtr& modelContextPtr) : m_MemoryManager(memoryManager), m_ModelContextPtr(modelContextPtr) { + SetNumberOfThreads(); } std::unique_ptr NeonWorkloadFactory::CreateSubTensorHandle(ITensorHandle& parent, diff --git a/src/backends/neon/NeonWorkloadFactory.hpp b/src/backends/neon/NeonWorkloadFactory.hpp index 4817a06540..ee11002033 100644 --- a/src/backends/neon/NeonWorkloadFactory.hpp +++ b/src/backends/neon/NeonWorkloadFactory.hpp @@ -11,6 +11,8 @@ #include #include +#include + namespace armnn { @@ -255,6 +257,8 @@ public: const WorkloadInfo& info) const override; private: + void SetNumberOfThreads(); + mutable std::shared_ptr m_MemoryManager; const IBackendInternal::IBackendSpecificModelContextPtr m_ModelContextPtr; }; diff --git a/src/backends/neon/test/NeonOptimizedNetworkTests.cpp b/src/backends/neon/test/NeonOptimizedNetworkTests.cpp index 302711029e..85f06174c7 100644 --- a/src/backends/neon/test/NeonOptimizedNetworkTests.cpp +++ b/src/backends/neon/test/NeonOptimizedNetworkTests.cpp @@ -110,4 +110,36 @@ BOOST_AUTO_TEST_CASE(FastMathEnabledTestOnCpuAcc) BOOST_TEST(modelOptionsOut[0].GetOption(0).GetValue().AsBool() == true); } +BOOST_AUTO_TEST_CASE(NumberOfThreadsTestOnCpuAcc) +{ + armnn::INetworkPtr net(armnn::INetwork::Create()); + + armnn::IConnectableLayer* input = net->AddInputLayer(0); + armnn::IConnectableLayer* output = net->AddOutputLayer(0); + + input->GetOutputSlot(0).Connect(output->GetInputSlot(0)); + input->GetOutputSlot(0).SetTensorInfo(armnn::TensorInfo({ 1, 1, 4, 4 }, armnn::DataType::Float32)); + + armnn::IRuntime::CreationOptions options; + armnn::IRuntimePtr runtime(armnn::IRuntime::Create(options)); + + unsigned int numberOfThreads = 2; + + std::vector backends = {armnn::Compute::CpuAcc}; + armnn::OptimizerOptions optimizerOptions; + armnn::BackendOptions modelOptions("CpuAcc", {{"NumberOfThreads", numberOfThreads}}); + optimizerOptions.m_ModelOptions.push_back(modelOptions); + + armnn::IOptimizedNetworkPtr optimizedNet = armnn::Optimize( + *net, backends, runtime->GetDeviceSpec(), optimizerOptions); + + BOOST_CHECK(optimizedNet); + + auto modelOptionsOut = static_cast(optimizedNet.get())->GetModelOptions(); + + BOOST_TEST(modelOptionsOut.size() == 1); + BOOST_TEST(modelOptionsOut[0].GetOption(0).GetName() == "NumberOfThreads"); + BOOST_TEST(modelOptionsOut[0].GetOption(0).GetValue().AsUnsignedInt() == numberOfThreads); +} + BOOST_AUTO_TEST_SUITE_END() \ No newline at end of file diff --git a/tests/ExecuteNetwork/ExecuteNetwork.cpp b/tests/ExecuteNetwork/ExecuteNetwork.cpp index 5df5dfbce7..c19f519c73 100644 --- a/tests/ExecuteNetwork/ExecuteNetwork.cpp +++ b/tests/ExecuteNetwork/ExecuteNetwork.cpp @@ -308,6 +308,7 @@ int MainImpl(const ExecuteNetworkParams& params, inferenceModelParams.m_EnableFastMath = params.m_EnableFastMath; inferenceModelParams.m_SaveCachedNetwork = params.m_SaveCachedNetwork; inferenceModelParams.m_CachedNetworkFilePath = params.m_CachedNetworkFilePath; + inferenceModelParams.m_NumberOfThreads = params.m_NumberOfThreads; for(const std::string& inputName: params.m_InputNames) { diff --git a/tests/ExecuteNetwork/ExecuteNetworkParams.hpp b/tests/ExecuteNetwork/ExecuteNetworkParams.hpp index 56d32907b8..830270adbc 100644 --- a/tests/ExecuteNetwork/ExecuteNetworkParams.hpp +++ b/tests/ExecuteNetwork/ExecuteNetworkParams.hpp @@ -34,6 +34,7 @@ struct ExecuteNetworkParams size_t m_Iterations; std::string m_ModelFormat; std::string m_ModelPath; + unsigned int m_NumberOfThreads; std::vector m_OutputNames; std::vector m_OutputTensorFiles; std::vector m_OutputTypes; diff --git a/tests/ExecuteNetwork/ExecuteNetworkProgramOptions.cpp b/tests/ExecuteNetwork/ExecuteNetworkProgramOptions.cpp index a080e57d0c..73da1f1d1d 100644 --- a/tests/ExecuteNetwork/ExecuteNetworkProgramOptions.cpp +++ b/tests/ExecuteNetwork/ExecuteNetworkProgramOptions.cpp @@ -276,6 +276,12 @@ ProgramOptions::ProgramOptions() : m_CxxOptions{"ExecuteNetwork", "performance improvements but may result in reduced or different precision.", cxxopts::value(m_ExNetParams.m_EnableFastMath)->default_value("false")->implicit_value("true")) + ("number-of-threads", + "Assign the number of threads used by the CpuAcc backend. " + "Input value must be between 1 and 64. " + "Default is set to 0 (Backend will decide number of threads to use).", + cxxopts::value(m_ExNetParams.m_NumberOfThreads)->default_value("0")) + ("save-cached-network", "Enables saving of the cached network to a file given with the cached-network-filepath option. " "See also --cached-network-filepath", diff --git a/tests/InferenceModel.hpp b/tests/InferenceModel.hpp index 936d0bf9ea..d20bb2271f 100644 --- a/tests/InferenceModel.hpp +++ b/tests/InferenceModel.hpp @@ -99,6 +99,7 @@ struct Params bool m_EnableFastMath; bool m_SaveCachedNetwork; std::string m_CachedNetworkFilePath; + unsigned int m_NumberOfThreads; Params() : m_ComputeDevices{} @@ -113,6 +114,7 @@ struct Params , m_EnableFastMath(false) , m_SaveCachedNetwork(false) , m_CachedNetworkFilePath("") + , m_NumberOfThreads(0) {} }; @@ -436,7 +438,8 @@ public: }); armnn::BackendOptions cpuAcc("CpuAcc", { - { "FastMathEnabled", params.m_EnableFastMath } + { "FastMathEnabled", params.m_EnableFastMath }, + { "NumberOfThreads", params.m_NumberOfThreads } }); options.m_ModelOptions.push_back(gpuAcc); options.m_ModelOptions.push_back(cpuAcc); -- cgit v1.2.1