From fdf2eaea1773d066dbb48e3d214ccd5446fa918a Mon Sep 17 00:00:00 2001 From: Finn Williams Date: Thu, 8 Jul 2021 13:07:19 +0100 Subject: IVGCVSW-6062 update ArmnnPreparedModels to have a single static instance of the threadpool Signed-off-by: Finn Williams Change-Id: Ie350a11eab5d677dd6a5571ea4094aa51b23c501 --- ArmnnPreparedModel.cpp | 21 +++++++++++++++++++-- ArmnnPreparedModel.hpp | 20 +++++++++++--------- ArmnnPreparedModel_1_2.cpp | 21 +++++++++++++++++++-- ArmnnPreparedModel_1_2.hpp | 3 ++- ArmnnPreparedModel_1_3.cpp | 21 +++++++++++++++++++-- ArmnnPreparedModel_1_3.hpp | 27 +++++++++++++++------------ 6 files changed, 85 insertions(+), 28 deletions(-) diff --git a/ArmnnPreparedModel.cpp b/ArmnnPreparedModel.cpp index 388a1116..f14560a1 100644 --- a/ArmnnPreparedModel.cpp +++ b/ArmnnPreparedModel.cpp @@ -89,6 +89,9 @@ template RequestThread ArmnnPreparedModel::m_RequestThread; +template +std::unique_ptr ArmnnPreparedModel::m_Threadpool(nullptr); + template template void ArmnnPreparedModel::DumpTensorsIfRequired(char const* tensorNamePrefix, @@ -126,7 +129,7 @@ ArmnnPreparedModel::ArmnnPreparedModel(armnn::NetworkId networkId, // Enable profiling if required. m_Runtime->GetProfiler(m_NetworkId)->EnableProfiling(m_GpuProfilingEnabled); - if (asyncModelExecutionEnabled) + if (m_AsyncModelExecutionEnabled) { std::vector> memHandles; for (unsigned int i=0; i < numberOfThreads; ++i) @@ -134,8 +137,16 @@ ArmnnPreparedModel::ArmnnPreparedModel(armnn::NetworkId networkId, memHandles.emplace_back(m_Runtime->CreateWorkingMemHandle(networkId)); } + if (!m_Threadpool) + { + m_Threadpool = std::make_unique(numberOfThreads, runtime, memHandles); + } + else + { + m_Threadpool->LoadMemHandles(memHandles); + } + m_WorkingMemHandle = memHandles.back(); - m_Threadpool = std::make_unique(numberOfThreads, runtime, memHandles); } } @@ -148,6 +159,12 @@ ArmnnPreparedModel::~ArmnnPreparedModel() // Unload the network associated with this model. m_Runtime->UnloadNetwork(m_NetworkId); + // Unload the network memhandles from the threadpool + if (m_AsyncModelExecutionEnabled) + { + m_Threadpool->UnloadMemHandles(m_NetworkId); + } + // Dump the profiling info to a file if required. DumpJsonProfilingIfRequired(m_GpuProfilingEnabled, m_RequestInputsAndOutputsDumpDir, m_NetworkId, profiler.get()); } diff --git a/ArmnnPreparedModel.hpp b/ArmnnPreparedModel.hpp index 58177d92..685d950e 100644 --- a/ArmnnPreparedModel.hpp +++ b/ArmnnPreparedModel.hpp @@ -96,17 +96,19 @@ private: std::shared_ptr& outputTensors, CallbackContext m_CallbackContext); - armnn::NetworkId m_NetworkId; - armnn::IRuntime* m_Runtime; - std::unique_ptr m_Threadpool; - HalModel m_Model; + armnn::NetworkId m_NetworkId; + armnn::IRuntime* m_Runtime; + HalModel m_Model; // There must be a single RequestThread for all ArmnnPreparedModel objects to ensure serial execution of workloads // It is specific to this class, so it is declared as static here - static RequestThread m_RequestThread; - uint32_t m_RequestCount; - const std::string& m_RequestInputsAndOutputsDumpDir; - const bool m_GpuProfilingEnabled; - + static RequestThread m_RequestThread; + uint32_t m_RequestCount; + const std::string& m_RequestInputsAndOutputsDumpDir; + const bool m_GpuProfilingEnabled; + // Static to allow sharing of threadpool between ArmnnPreparedModel instances + static std::unique_ptr m_Threadpool; std::shared_ptr m_WorkingMemHandle; const bool m_AsyncModelExecutionEnabled; }; diff --git a/ArmnnPreparedModel_1_2.cpp b/ArmnnPreparedModel_1_2.cpp index e46b5be3..5a10d546 100644 --- a/ArmnnPreparedModel_1_2.cpp +++ b/ArmnnPreparedModel_1_2.cpp @@ -124,6 +124,9 @@ template RequestThread ArmnnPreparedModel_1_2::m_RequestThread; +template +std::unique_ptr ArmnnPreparedModel_1_2::m_Threadpool(nullptr); + template template void ArmnnPreparedModel_1_2::DumpTensorsIfRequired(char const* tensorNamePrefix, @@ -161,7 +164,7 @@ ArmnnPreparedModel_1_2::ArmnnPreparedModel_1_2(armnn::NetworkId netw // Enable profiling if required. m_Runtime->GetProfiler(m_NetworkId)->EnableProfiling(m_GpuProfilingEnabled); - if (asyncModelExecutionEnabled) + if (m_AsyncModelExecutionEnabled) { std::vector> memHandles; for (unsigned int i=0; i < numberOfThreads; ++i) @@ -169,8 +172,16 @@ ArmnnPreparedModel_1_2::ArmnnPreparedModel_1_2(armnn::NetworkId netw memHandles.emplace_back(m_Runtime->CreateWorkingMemHandle(networkId)); } + if (!m_Threadpool) + { + m_Threadpool = std::make_unique(numberOfThreads, runtime, memHandles); + } + else + { + m_Threadpool->LoadMemHandles(memHandles); + } + m_WorkingMemHandle = memHandles.back(); - m_Threadpool = std::make_unique(numberOfThreads, runtime, memHandles); } } @@ -183,6 +194,12 @@ ArmnnPreparedModel_1_2::~ArmnnPreparedModel_1_2() // Unload the network associated with this model. m_Runtime->UnloadNetwork(m_NetworkId); + // Unload the network memhandles from the threadpool + if (m_AsyncModelExecutionEnabled) + { + m_Threadpool->UnloadMemHandles(m_NetworkId); + } + // Dump the profiling info to a file if required. DumpJsonProfilingIfRequired(m_GpuProfilingEnabled, m_RequestInputsAndOutputsDumpDir, m_NetworkId, profiler.get()); } diff --git a/ArmnnPreparedModel_1_2.hpp b/ArmnnPreparedModel_1_2.hpp index 4ee2b817..c64c891e 100644 --- a/ArmnnPreparedModel_1_2.hpp +++ b/ArmnnPreparedModel_1_2.hpp @@ -143,7 +143,6 @@ private: armnn::NetworkId m_NetworkId; armnn::IRuntime* m_Runtime; - std::unique_ptr m_Threadpool; V1_2::Model m_Model; // There must be a single RequestThread for all ArmnnPreparedModel objects to ensure serial execution of workloads // It is specific to this class, so it is declared as static here @@ -153,6 +152,8 @@ private: uint32_t m_RequestCount; const std::string& m_RequestInputsAndOutputsDumpDir; const bool m_GpuProfilingEnabled; + // Static to allow sharing of threadpool between ArmnnPreparedModel instances + static std::unique_ptr m_Threadpool; std::shared_ptr m_WorkingMemHandle; const bool m_AsyncModelExecutionEnabled; }; diff --git a/ArmnnPreparedModel_1_3.cpp b/ArmnnPreparedModel_1_3.cpp index dcac2813..16ea113c 100644 --- a/ArmnnPreparedModel_1_3.cpp +++ b/ArmnnPreparedModel_1_3.cpp @@ -144,6 +144,9 @@ template RequestThread_1_3 ArmnnPreparedModel_1_3::m_RequestThread; +template +std::unique_ptr ArmnnPreparedModel_1_3::m_Threadpool(nullptr); + template template void ArmnnPreparedModel_1_3::DumpTensorsIfRequired(char const* tensorNamePrefix, @@ -183,7 +186,7 @@ ArmnnPreparedModel_1_3::ArmnnPreparedModel_1_3(armnn::NetworkId netw // Enable profiling if required. m_Runtime->GetProfiler(m_NetworkId)->EnableProfiling(m_GpuProfilingEnabled); - if (asyncModelExecutionEnabled) + if (m_AsyncModelExecutionEnabled) { std::vector> memHandles; for (unsigned int i=0; i < numberOfThreads; ++i) @@ -191,8 +194,16 @@ ArmnnPreparedModel_1_3::ArmnnPreparedModel_1_3(armnn::NetworkId netw memHandles.emplace_back(m_Runtime->CreateWorkingMemHandle(networkId)); } + if (!m_Threadpool) + { + m_Threadpool = std::make_unique(numberOfThreads, runtime, memHandles); + } + else + { + m_Threadpool->LoadMemHandles(memHandles); + } + m_WorkingMemHandle = memHandles.back(); - m_Threadpool = std::make_unique(numberOfThreads, runtime, memHandles); } } @@ -205,6 +216,12 @@ ArmnnPreparedModel_1_3::~ArmnnPreparedModel_1_3() // Unload the network associated with this model. m_Runtime->UnloadNetwork(m_NetworkId); + // Unload the network memhandles from the threadpool + if (m_AsyncModelExecutionEnabled) + { + m_Threadpool->UnloadMemHandles(m_NetworkId); + } + // Dump the profiling info to a file if required. DumpJsonProfilingIfRequired(m_GpuProfilingEnabled, m_RequestInputsAndOutputsDumpDir, m_NetworkId, profiler.get()); } diff --git a/ArmnnPreparedModel_1_3.hpp b/ArmnnPreparedModel_1_3.hpp index 46798cde..a245cc4c 100644 --- a/ArmnnPreparedModel_1_3.hpp +++ b/ArmnnPreparedModel_1_3.hpp @@ -175,20 +175,23 @@ private: CallbackContext m_CallbackContext, armnn::QosExecPriority priority); - armnn::NetworkId m_NetworkId; - armnn::IRuntime* m_Runtime; - std::unique_ptr m_Threadpool; - V1_3::Model m_Model; + armnn::NetworkId m_NetworkId; + armnn::IRuntime* m_Runtime; + V1_3::Model m_Model; // There must be a single RequestThread for all ArmnnPreparedModel objects to ensure serial execution of workloads // It is specific to this class, so it is declared as static here - static RequestThread_1_3 m_RequestThread; - uint32_t m_RequestCount; - const std::string& m_RequestInputsAndOutputsDumpDir; - const bool m_GpuProfilingEnabled; - V1_3::Priority m_ModelPriority; - - std::shared_ptr m_WorkingMemHandle; - const bool m_AsyncModelExecutionEnabled; + static RequestThread_1_3 m_RequestThread; + uint32_t m_RequestCount; + const std::string& m_RequestInputsAndOutputsDumpDir; + const bool m_GpuProfilingEnabled; + V1_3::Priority m_ModelPriority; + + // Static to allow sharing of threadpool between ArmnnPreparedModel instances + static std::unique_ptr m_Threadpool; + std::shared_ptr m_WorkingMemHandle; + const bool m_AsyncModelExecutionEnabled; }; } -- cgit v1.2.1