diff options
author | Finn Williams <Finn.Williams@arm.com> | 2021-07-08 13:07:19 +0100 |
---|---|---|
committer | Finn Williams <Finn.Williams@arm.com> | 2021-07-12 17:09:06 +0100 |
commit | fdf2eaea1773d066dbb48e3d214ccd5446fa918a (patch) | |
tree | 5b68a1a30e65bf0cdac65199bef5ea11eefaa442 | |
parent | f769f292ba506784c8d21d691de821f4e3b53fec (diff) | |
download | android-nn-driver-fdf2eaea1773d066dbb48e3d214ccd5446fa918a.tar.gz |
IVGCVSW-6062 update ArmnnPreparedModels to have a single static instance of the threadpool
Signed-off-by: Finn Williams <Finn.Williams@arm.com>
Change-Id: Ie350a11eab5d677dd6a5571ea4094aa51b23c501
-rw-r--r-- | ArmnnPreparedModel.cpp | 21 | ||||
-rw-r--r-- | ArmnnPreparedModel.hpp | 20 | ||||
-rw-r--r-- | ArmnnPreparedModel_1_2.cpp | 21 | ||||
-rw-r--r-- | ArmnnPreparedModel_1_2.hpp | 3 | ||||
-rw-r--r-- | ArmnnPreparedModel_1_3.cpp | 21 | ||||
-rw-r--r-- | ArmnnPreparedModel_1_3.hpp | 27 |
6 files changed, 85 insertions, 28 deletions
diff --git a/ArmnnPreparedModel.cpp b/ArmnnPreparedModel.cpp index 388a1116..f14560a1 100644 --- a/ArmnnPreparedModel.cpp +++ b/ArmnnPreparedModel.cpp @@ -90,6 +90,9 @@ RequestThread<ArmnnPreparedModel, HalVersion, CallbackContext_1_0> ArmnnPreparedModel<HalVersion>::m_RequestThread; template<typename HalVersion> +std::unique_ptr<armnn::Threadpool> ArmnnPreparedModel<HalVersion>::m_Threadpool(nullptr); + +template<typename HalVersion> template <typename TensorBindingCollection> void ArmnnPreparedModel<HalVersion>::DumpTensorsIfRequired(char const* tensorNamePrefix, const TensorBindingCollection& tensorBindings) @@ -126,7 +129,7 @@ ArmnnPreparedModel<HalVersion>::ArmnnPreparedModel(armnn::NetworkId networkId, // Enable profiling if required. m_Runtime->GetProfiler(m_NetworkId)->EnableProfiling(m_GpuProfilingEnabled); - if (asyncModelExecutionEnabled) + if (m_AsyncModelExecutionEnabled) { std::vector<std::shared_ptr<armnn::IWorkingMemHandle>> memHandles; for (unsigned int i=0; i < numberOfThreads; ++i) @@ -134,8 +137,16 @@ ArmnnPreparedModel<HalVersion>::ArmnnPreparedModel(armnn::NetworkId networkId, memHandles.emplace_back(m_Runtime->CreateWorkingMemHandle(networkId)); } + if (!m_Threadpool) + { + m_Threadpool = std::make_unique<armnn::Threadpool>(numberOfThreads, runtime, memHandles); + } + else + { + m_Threadpool->LoadMemHandles(memHandles); + } + m_WorkingMemHandle = memHandles.back(); - m_Threadpool = std::make_unique<armnn::Threadpool>(numberOfThreads, runtime, memHandles); } } @@ -148,6 +159,12 @@ ArmnnPreparedModel<HalVersion>::~ArmnnPreparedModel() // Unload the network associated with this model. m_Runtime->UnloadNetwork(m_NetworkId); + // Unload the network memhandles from the threadpool + if (m_AsyncModelExecutionEnabled) + { + m_Threadpool->UnloadMemHandles(m_NetworkId); + } + // Dump the profiling info to a file if required. DumpJsonProfilingIfRequired(m_GpuProfilingEnabled, m_RequestInputsAndOutputsDumpDir, m_NetworkId, profiler.get()); } diff --git a/ArmnnPreparedModel.hpp b/ArmnnPreparedModel.hpp index 58177d92..685d950e 100644 --- a/ArmnnPreparedModel.hpp +++ b/ArmnnPreparedModel.hpp @@ -96,17 +96,19 @@ private: std::shared_ptr<armnn::OutputTensors>& outputTensors, CallbackContext m_CallbackContext); - armnn::NetworkId m_NetworkId; - armnn::IRuntime* m_Runtime; - std::unique_ptr<armnn::Threadpool> m_Threadpool; - HalModel m_Model; + armnn::NetworkId m_NetworkId; + armnn::IRuntime* m_Runtime; + HalModel m_Model; // There must be a single RequestThread for all ArmnnPreparedModel objects to ensure serial execution of workloads // It is specific to this class, so it is declared as static here - static RequestThread<ArmnnPreparedModel, HalVersion, CallbackContext_1_0> m_RequestThread; - uint32_t m_RequestCount; - const std::string& m_RequestInputsAndOutputsDumpDir; - const bool m_GpuProfilingEnabled; - + static RequestThread<ArmnnPreparedModel, + HalVersion, + CallbackContext_1_0> m_RequestThread; + uint32_t m_RequestCount; + const std::string& m_RequestInputsAndOutputsDumpDir; + const bool m_GpuProfilingEnabled; + // Static to allow sharing of threadpool between ArmnnPreparedModel instances + static std::unique_ptr<armnn::Threadpool> m_Threadpool; std::shared_ptr<armnn::IWorkingMemHandle> m_WorkingMemHandle; const bool m_AsyncModelExecutionEnabled; }; diff --git a/ArmnnPreparedModel_1_2.cpp b/ArmnnPreparedModel_1_2.cpp index e46b5be3..5a10d546 100644 --- a/ArmnnPreparedModel_1_2.cpp +++ b/ArmnnPreparedModel_1_2.cpp @@ -125,6 +125,9 @@ RequestThread<ArmnnPreparedModel_1_2, HalVersion, CallbackContext_1_2> ArmnnPreparedModel_1_2<HalVersion>::m_RequestThread; template<typename HalVersion> +std::unique_ptr<armnn::Threadpool> ArmnnPreparedModel_1_2<HalVersion>::m_Threadpool(nullptr); + +template<typename HalVersion> template<typename TensorBindingCollection> void ArmnnPreparedModel_1_2<HalVersion>::DumpTensorsIfRequired(char const* tensorNamePrefix, const TensorBindingCollection& tensorBindings) @@ -161,7 +164,7 @@ ArmnnPreparedModel_1_2<HalVersion>::ArmnnPreparedModel_1_2(armnn::NetworkId netw // Enable profiling if required. m_Runtime->GetProfiler(m_NetworkId)->EnableProfiling(m_GpuProfilingEnabled); - if (asyncModelExecutionEnabled) + if (m_AsyncModelExecutionEnabled) { std::vector<std::shared_ptr<armnn::IWorkingMemHandle>> memHandles; for (unsigned int i=0; i < numberOfThreads; ++i) @@ -169,8 +172,16 @@ ArmnnPreparedModel_1_2<HalVersion>::ArmnnPreparedModel_1_2(armnn::NetworkId netw memHandles.emplace_back(m_Runtime->CreateWorkingMemHandle(networkId)); } + if (!m_Threadpool) + { + m_Threadpool = std::make_unique<armnn::Threadpool>(numberOfThreads, runtime, memHandles); + } + else + { + m_Threadpool->LoadMemHandles(memHandles); + } + m_WorkingMemHandle = memHandles.back(); - m_Threadpool = std::make_unique<armnn::Threadpool>(numberOfThreads, runtime, memHandles); } } @@ -183,6 +194,12 @@ ArmnnPreparedModel_1_2<HalVersion>::~ArmnnPreparedModel_1_2() // Unload the network associated with this model. m_Runtime->UnloadNetwork(m_NetworkId); + // Unload the network memhandles from the threadpool + if (m_AsyncModelExecutionEnabled) + { + m_Threadpool->UnloadMemHandles(m_NetworkId); + } + // Dump the profiling info to a file if required. DumpJsonProfilingIfRequired(m_GpuProfilingEnabled, m_RequestInputsAndOutputsDumpDir, m_NetworkId, profiler.get()); } diff --git a/ArmnnPreparedModel_1_2.hpp b/ArmnnPreparedModel_1_2.hpp index 4ee2b817..c64c891e 100644 --- a/ArmnnPreparedModel_1_2.hpp +++ b/ArmnnPreparedModel_1_2.hpp @@ -143,7 +143,6 @@ private: armnn::NetworkId m_NetworkId; armnn::IRuntime* m_Runtime; - std::unique_ptr<armnn::Threadpool> m_Threadpool; V1_2::Model m_Model; // There must be a single RequestThread for all ArmnnPreparedModel objects to ensure serial execution of workloads // It is specific to this class, so it is declared as static here @@ -153,6 +152,8 @@ private: uint32_t m_RequestCount; const std::string& m_RequestInputsAndOutputsDumpDir; const bool m_GpuProfilingEnabled; + // Static to allow sharing of threadpool between ArmnnPreparedModel instances + static std::unique_ptr<armnn::Threadpool> m_Threadpool; std::shared_ptr<IWorkingMemHandle> m_WorkingMemHandle; const bool m_AsyncModelExecutionEnabled; }; diff --git a/ArmnnPreparedModel_1_3.cpp b/ArmnnPreparedModel_1_3.cpp index dcac2813..16ea113c 100644 --- a/ArmnnPreparedModel_1_3.cpp +++ b/ArmnnPreparedModel_1_3.cpp @@ -145,6 +145,9 @@ RequestThread_1_3<ArmnnPreparedModel_1_3, HalVersion, CallbackContext_1_3> ArmnnPreparedModel_1_3<HalVersion>::m_RequestThread; template<typename HalVersion> +std::unique_ptr<armnn::Threadpool> ArmnnPreparedModel_1_3<HalVersion>::m_Threadpool(nullptr); + +template<typename HalVersion> template<typename TensorBindingCollection> void ArmnnPreparedModel_1_3<HalVersion>::DumpTensorsIfRequired(char const* tensorNamePrefix, const TensorBindingCollection& tensorBindings) @@ -183,7 +186,7 @@ ArmnnPreparedModel_1_3<HalVersion>::ArmnnPreparedModel_1_3(armnn::NetworkId netw // Enable profiling if required. m_Runtime->GetProfiler(m_NetworkId)->EnableProfiling(m_GpuProfilingEnabled); - if (asyncModelExecutionEnabled) + if (m_AsyncModelExecutionEnabled) { std::vector<std::shared_ptr<armnn::IWorkingMemHandle>> memHandles; for (unsigned int i=0; i < numberOfThreads; ++i) @@ -191,8 +194,16 @@ ArmnnPreparedModel_1_3<HalVersion>::ArmnnPreparedModel_1_3(armnn::NetworkId netw memHandles.emplace_back(m_Runtime->CreateWorkingMemHandle(networkId)); } + if (!m_Threadpool) + { + m_Threadpool = std::make_unique<armnn::Threadpool>(numberOfThreads, runtime, memHandles); + } + else + { + m_Threadpool->LoadMemHandles(memHandles); + } + m_WorkingMemHandle = memHandles.back(); - m_Threadpool = std::make_unique<armnn::Threadpool>(numberOfThreads, runtime, memHandles); } } @@ -205,6 +216,12 @@ ArmnnPreparedModel_1_3<HalVersion>::~ArmnnPreparedModel_1_3() // Unload the network associated with this model. m_Runtime->UnloadNetwork(m_NetworkId); + // Unload the network memhandles from the threadpool + if (m_AsyncModelExecutionEnabled) + { + m_Threadpool->UnloadMemHandles(m_NetworkId); + } + // Dump the profiling info to a file if required. DumpJsonProfilingIfRequired(m_GpuProfilingEnabled, m_RequestInputsAndOutputsDumpDir, m_NetworkId, profiler.get()); } diff --git a/ArmnnPreparedModel_1_3.hpp b/ArmnnPreparedModel_1_3.hpp index 46798cde..a245cc4c 100644 --- a/ArmnnPreparedModel_1_3.hpp +++ b/ArmnnPreparedModel_1_3.hpp @@ -175,20 +175,23 @@ private: CallbackContext m_CallbackContext, armnn::QosExecPriority priority); - armnn::NetworkId m_NetworkId; - armnn::IRuntime* m_Runtime; - std::unique_ptr<armnn::Threadpool> m_Threadpool; - V1_3::Model m_Model; + armnn::NetworkId m_NetworkId; + armnn::IRuntime* m_Runtime; + V1_3::Model m_Model; // There must be a single RequestThread for all ArmnnPreparedModel objects to ensure serial execution of workloads // It is specific to this class, so it is declared as static here - static RequestThread_1_3<ArmnnPreparedModel_1_3, HalVersion, CallbackContext_1_3> m_RequestThread; - uint32_t m_RequestCount; - const std::string& m_RequestInputsAndOutputsDumpDir; - const bool m_GpuProfilingEnabled; - V1_3::Priority m_ModelPriority; - - std::shared_ptr<IWorkingMemHandle> m_WorkingMemHandle; - const bool m_AsyncModelExecutionEnabled; + static RequestThread_1_3<ArmnnPreparedModel_1_3, + HalVersion, + CallbackContext_1_3> m_RequestThread; + uint32_t m_RequestCount; + const std::string& m_RequestInputsAndOutputsDumpDir; + const bool m_GpuProfilingEnabled; + V1_3::Priority m_ModelPriority; + + // Static to allow sharing of threadpool between ArmnnPreparedModel instances + static std::unique_ptr<armnn::Threadpool> m_Threadpool; + std::shared_ptr<IWorkingMemHandle> m_WorkingMemHandle; + const bool m_AsyncModelExecutionEnabled; }; } |