aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorFinn Williams <Finn.Williams@arm.com>2021-07-08 13:07:19 +0100
committerFinn Williams <Finn.Williams@arm.com>2021-07-12 17:09:06 +0100
commitfdf2eaea1773d066dbb48e3d214ccd5446fa918a (patch)
tree5b68a1a30e65bf0cdac65199bef5ea11eefaa442
parentf769f292ba506784c8d21d691de821f4e3b53fec (diff)
downloadandroid-nn-driver-fdf2eaea1773d066dbb48e3d214ccd5446fa918a.tar.gz
IVGCVSW-6062 update ArmnnPreparedModels to have a single static instance of the threadpool
Signed-off-by: Finn Williams <Finn.Williams@arm.com> Change-Id: Ie350a11eab5d677dd6a5571ea4094aa51b23c501
-rw-r--r--ArmnnPreparedModel.cpp21
-rw-r--r--ArmnnPreparedModel.hpp20
-rw-r--r--ArmnnPreparedModel_1_2.cpp21
-rw-r--r--ArmnnPreparedModel_1_2.hpp3
-rw-r--r--ArmnnPreparedModel_1_3.cpp21
-rw-r--r--ArmnnPreparedModel_1_3.hpp27
6 files changed, 85 insertions, 28 deletions
diff --git a/ArmnnPreparedModel.cpp b/ArmnnPreparedModel.cpp
index 388a1116..f14560a1 100644
--- a/ArmnnPreparedModel.cpp
+++ b/ArmnnPreparedModel.cpp
@@ -90,6 +90,9 @@ RequestThread<ArmnnPreparedModel, HalVersion, CallbackContext_1_0>
ArmnnPreparedModel<HalVersion>::m_RequestThread;
template<typename HalVersion>
+std::unique_ptr<armnn::Threadpool> ArmnnPreparedModel<HalVersion>::m_Threadpool(nullptr);
+
+template<typename HalVersion>
template <typename TensorBindingCollection>
void ArmnnPreparedModel<HalVersion>::DumpTensorsIfRequired(char const* tensorNamePrefix,
const TensorBindingCollection& tensorBindings)
@@ -126,7 +129,7 @@ ArmnnPreparedModel<HalVersion>::ArmnnPreparedModel(armnn::NetworkId networkId,
// Enable profiling if required.
m_Runtime->GetProfiler(m_NetworkId)->EnableProfiling(m_GpuProfilingEnabled);
- if (asyncModelExecutionEnabled)
+ if (m_AsyncModelExecutionEnabled)
{
std::vector<std::shared_ptr<armnn::IWorkingMemHandle>> memHandles;
for (unsigned int i=0; i < numberOfThreads; ++i)
@@ -134,8 +137,16 @@ ArmnnPreparedModel<HalVersion>::ArmnnPreparedModel(armnn::NetworkId networkId,
memHandles.emplace_back(m_Runtime->CreateWorkingMemHandle(networkId));
}
+ if (!m_Threadpool)
+ {
+ m_Threadpool = std::make_unique<armnn::Threadpool>(numberOfThreads, runtime, memHandles);
+ }
+ else
+ {
+ m_Threadpool->LoadMemHandles(memHandles);
+ }
+
m_WorkingMemHandle = memHandles.back();
- m_Threadpool = std::make_unique<armnn::Threadpool>(numberOfThreads, runtime, memHandles);
}
}
@@ -148,6 +159,12 @@ ArmnnPreparedModel<HalVersion>::~ArmnnPreparedModel()
// Unload the network associated with this model.
m_Runtime->UnloadNetwork(m_NetworkId);
+ // Unload the network memhandles from the threadpool
+ if (m_AsyncModelExecutionEnabled)
+ {
+ m_Threadpool->UnloadMemHandles(m_NetworkId);
+ }
+
// Dump the profiling info to a file if required.
DumpJsonProfilingIfRequired(m_GpuProfilingEnabled, m_RequestInputsAndOutputsDumpDir, m_NetworkId, profiler.get());
}
diff --git a/ArmnnPreparedModel.hpp b/ArmnnPreparedModel.hpp
index 58177d92..685d950e 100644
--- a/ArmnnPreparedModel.hpp
+++ b/ArmnnPreparedModel.hpp
@@ -96,17 +96,19 @@ private:
std::shared_ptr<armnn::OutputTensors>& outputTensors,
CallbackContext m_CallbackContext);
- armnn::NetworkId m_NetworkId;
- armnn::IRuntime* m_Runtime;
- std::unique_ptr<armnn::Threadpool> m_Threadpool;
- HalModel m_Model;
+ armnn::NetworkId m_NetworkId;
+ armnn::IRuntime* m_Runtime;
+ HalModel m_Model;
// There must be a single RequestThread for all ArmnnPreparedModel objects to ensure serial execution of workloads
// It is specific to this class, so it is declared as static here
- static RequestThread<ArmnnPreparedModel, HalVersion, CallbackContext_1_0> m_RequestThread;
- uint32_t m_RequestCount;
- const std::string& m_RequestInputsAndOutputsDumpDir;
- const bool m_GpuProfilingEnabled;
-
+ static RequestThread<ArmnnPreparedModel,
+ HalVersion,
+ CallbackContext_1_0> m_RequestThread;
+ uint32_t m_RequestCount;
+ const std::string& m_RequestInputsAndOutputsDumpDir;
+ const bool m_GpuProfilingEnabled;
+ // Static to allow sharing of threadpool between ArmnnPreparedModel instances
+ static std::unique_ptr<armnn::Threadpool> m_Threadpool;
std::shared_ptr<armnn::IWorkingMemHandle> m_WorkingMemHandle;
const bool m_AsyncModelExecutionEnabled;
};
diff --git a/ArmnnPreparedModel_1_2.cpp b/ArmnnPreparedModel_1_2.cpp
index e46b5be3..5a10d546 100644
--- a/ArmnnPreparedModel_1_2.cpp
+++ b/ArmnnPreparedModel_1_2.cpp
@@ -125,6 +125,9 @@ RequestThread<ArmnnPreparedModel_1_2, HalVersion, CallbackContext_1_2>
ArmnnPreparedModel_1_2<HalVersion>::m_RequestThread;
template<typename HalVersion>
+std::unique_ptr<armnn::Threadpool> ArmnnPreparedModel_1_2<HalVersion>::m_Threadpool(nullptr);
+
+template<typename HalVersion>
template<typename TensorBindingCollection>
void ArmnnPreparedModel_1_2<HalVersion>::DumpTensorsIfRequired(char const* tensorNamePrefix,
const TensorBindingCollection& tensorBindings)
@@ -161,7 +164,7 @@ ArmnnPreparedModel_1_2<HalVersion>::ArmnnPreparedModel_1_2(armnn::NetworkId netw
// Enable profiling if required.
m_Runtime->GetProfiler(m_NetworkId)->EnableProfiling(m_GpuProfilingEnabled);
- if (asyncModelExecutionEnabled)
+ if (m_AsyncModelExecutionEnabled)
{
std::vector<std::shared_ptr<armnn::IWorkingMemHandle>> memHandles;
for (unsigned int i=0; i < numberOfThreads; ++i)
@@ -169,8 +172,16 @@ ArmnnPreparedModel_1_2<HalVersion>::ArmnnPreparedModel_1_2(armnn::NetworkId netw
memHandles.emplace_back(m_Runtime->CreateWorkingMemHandle(networkId));
}
+ if (!m_Threadpool)
+ {
+ m_Threadpool = std::make_unique<armnn::Threadpool>(numberOfThreads, runtime, memHandles);
+ }
+ else
+ {
+ m_Threadpool->LoadMemHandles(memHandles);
+ }
+
m_WorkingMemHandle = memHandles.back();
- m_Threadpool = std::make_unique<armnn::Threadpool>(numberOfThreads, runtime, memHandles);
}
}
@@ -183,6 +194,12 @@ ArmnnPreparedModel_1_2<HalVersion>::~ArmnnPreparedModel_1_2()
// Unload the network associated with this model.
m_Runtime->UnloadNetwork(m_NetworkId);
+ // Unload the network memhandles from the threadpool
+ if (m_AsyncModelExecutionEnabled)
+ {
+ m_Threadpool->UnloadMemHandles(m_NetworkId);
+ }
+
// Dump the profiling info to a file if required.
DumpJsonProfilingIfRequired(m_GpuProfilingEnabled, m_RequestInputsAndOutputsDumpDir, m_NetworkId, profiler.get());
}
diff --git a/ArmnnPreparedModel_1_2.hpp b/ArmnnPreparedModel_1_2.hpp
index 4ee2b817..c64c891e 100644
--- a/ArmnnPreparedModel_1_2.hpp
+++ b/ArmnnPreparedModel_1_2.hpp
@@ -143,7 +143,6 @@ private:
armnn::NetworkId m_NetworkId;
armnn::IRuntime* m_Runtime;
- std::unique_ptr<armnn::Threadpool> m_Threadpool;
V1_2::Model m_Model;
// There must be a single RequestThread for all ArmnnPreparedModel objects to ensure serial execution of workloads
// It is specific to this class, so it is declared as static here
@@ -153,6 +152,8 @@ private:
uint32_t m_RequestCount;
const std::string& m_RequestInputsAndOutputsDumpDir;
const bool m_GpuProfilingEnabled;
+ // Static to allow sharing of threadpool between ArmnnPreparedModel instances
+ static std::unique_ptr<armnn::Threadpool> m_Threadpool;
std::shared_ptr<IWorkingMemHandle> m_WorkingMemHandle;
const bool m_AsyncModelExecutionEnabled;
};
diff --git a/ArmnnPreparedModel_1_3.cpp b/ArmnnPreparedModel_1_3.cpp
index dcac2813..16ea113c 100644
--- a/ArmnnPreparedModel_1_3.cpp
+++ b/ArmnnPreparedModel_1_3.cpp
@@ -145,6 +145,9 @@ RequestThread_1_3<ArmnnPreparedModel_1_3, HalVersion, CallbackContext_1_3>
ArmnnPreparedModel_1_3<HalVersion>::m_RequestThread;
template<typename HalVersion>
+std::unique_ptr<armnn::Threadpool> ArmnnPreparedModel_1_3<HalVersion>::m_Threadpool(nullptr);
+
+template<typename HalVersion>
template<typename TensorBindingCollection>
void ArmnnPreparedModel_1_3<HalVersion>::DumpTensorsIfRequired(char const* tensorNamePrefix,
const TensorBindingCollection& tensorBindings)
@@ -183,7 +186,7 @@ ArmnnPreparedModel_1_3<HalVersion>::ArmnnPreparedModel_1_3(armnn::NetworkId netw
// Enable profiling if required.
m_Runtime->GetProfiler(m_NetworkId)->EnableProfiling(m_GpuProfilingEnabled);
- if (asyncModelExecutionEnabled)
+ if (m_AsyncModelExecutionEnabled)
{
std::vector<std::shared_ptr<armnn::IWorkingMemHandle>> memHandles;
for (unsigned int i=0; i < numberOfThreads; ++i)
@@ -191,8 +194,16 @@ ArmnnPreparedModel_1_3<HalVersion>::ArmnnPreparedModel_1_3(armnn::NetworkId netw
memHandles.emplace_back(m_Runtime->CreateWorkingMemHandle(networkId));
}
+ if (!m_Threadpool)
+ {
+ m_Threadpool = std::make_unique<armnn::Threadpool>(numberOfThreads, runtime, memHandles);
+ }
+ else
+ {
+ m_Threadpool->LoadMemHandles(memHandles);
+ }
+
m_WorkingMemHandle = memHandles.back();
- m_Threadpool = std::make_unique<armnn::Threadpool>(numberOfThreads, runtime, memHandles);
}
}
@@ -205,6 +216,12 @@ ArmnnPreparedModel_1_3<HalVersion>::~ArmnnPreparedModel_1_3()
// Unload the network associated with this model.
m_Runtime->UnloadNetwork(m_NetworkId);
+ // Unload the network memhandles from the threadpool
+ if (m_AsyncModelExecutionEnabled)
+ {
+ m_Threadpool->UnloadMemHandles(m_NetworkId);
+ }
+
// Dump the profiling info to a file if required.
DumpJsonProfilingIfRequired(m_GpuProfilingEnabled, m_RequestInputsAndOutputsDumpDir, m_NetworkId, profiler.get());
}
diff --git a/ArmnnPreparedModel_1_3.hpp b/ArmnnPreparedModel_1_3.hpp
index 46798cde..a245cc4c 100644
--- a/ArmnnPreparedModel_1_3.hpp
+++ b/ArmnnPreparedModel_1_3.hpp
@@ -175,20 +175,23 @@ private:
CallbackContext m_CallbackContext,
armnn::QosExecPriority priority);
- armnn::NetworkId m_NetworkId;
- armnn::IRuntime* m_Runtime;
- std::unique_ptr<armnn::Threadpool> m_Threadpool;
- V1_3::Model m_Model;
+ armnn::NetworkId m_NetworkId;
+ armnn::IRuntime* m_Runtime;
+ V1_3::Model m_Model;
// There must be a single RequestThread for all ArmnnPreparedModel objects to ensure serial execution of workloads
// It is specific to this class, so it is declared as static here
- static RequestThread_1_3<ArmnnPreparedModel_1_3, HalVersion, CallbackContext_1_3> m_RequestThread;
- uint32_t m_RequestCount;
- const std::string& m_RequestInputsAndOutputsDumpDir;
- const bool m_GpuProfilingEnabled;
- V1_3::Priority m_ModelPriority;
-
- std::shared_ptr<IWorkingMemHandle> m_WorkingMemHandle;
- const bool m_AsyncModelExecutionEnabled;
+ static RequestThread_1_3<ArmnnPreparedModel_1_3,
+ HalVersion,
+ CallbackContext_1_3> m_RequestThread;
+ uint32_t m_RequestCount;
+ const std::string& m_RequestInputsAndOutputsDumpDir;
+ const bool m_GpuProfilingEnabled;
+ V1_3::Priority m_ModelPriority;
+
+ // Static to allow sharing of threadpool between ArmnnPreparedModel instances
+ static std::unique_ptr<armnn::Threadpool> m_Threadpool;
+ std::shared_ptr<IWorkingMemHandle> m_WorkingMemHandle;
+ const bool m_AsyncModelExecutionEnabled;
};
}