aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorFinn Williams <Finn.Williams@arm.com>2021-06-11 15:04:02 +0100
committerFinn Williams <Finn.Williams@arm.com>2021-06-23 13:21:01 +0100
commitca3a3e0fd86a07de9e073ba31dc2b42d6ca84536 (patch)
tree755d270b8c743a24e7ead37dc84f6ca69ad783c4
parentdc873f6309784d5fd6914ca5432d32ae6c3de0c2 (diff)
downloadandroid-nn-driver-ca3a3e0fd86a07de9e073ba31dc2b42d6ca84536.tar.gz
IVGCVSW-6062 Rework the async threadpool
!armnn:5801 Signed-off-by: Finn Williams <Finn.Williams@arm.com> Change-Id: I9964d0899ce752441f380edddbd974010257b2dd
-rw-r--r--1.2/ArmnnDriverImpl.cpp6
-rw-r--r--1.3/ArmnnDriverImpl.cpp6
-rw-r--r--ArmnnDriverImpl.cpp6
-rw-r--r--ArmnnPreparedModel.cpp22
-rw-r--r--ArmnnPreparedModel.hpp35
-rw-r--r--ArmnnPreparedModel_1_2.cpp22
-rw-r--r--ArmnnPreparedModel_1_2.hpp46
-rw-r--r--ArmnnPreparedModel_1_3.cpp22
-rw-r--r--ArmnnPreparedModel_1_3.hpp30
9 files changed, 81 insertions, 114 deletions
diff --git a/1.2/ArmnnDriverImpl.cpp b/1.2/ArmnnDriverImpl.cpp
index 3eae526f..afe3060d 100644
--- a/1.2/ArmnnDriverImpl.cpp
+++ b/1.2/ArmnnDriverImpl.cpp
@@ -191,8 +191,7 @@ Return<V1_0::ErrorStatus> ArmnnDriverImpl::prepareArmnnModel_1_2(
std::string msg;
armnn::INetworkProperties networkProperties(options.isAsyncModelExecutionEnabled(),
MemorySource::Undefined,
- MemorySource::Undefined,
- options.getNoOfArmnnThreads());
+ MemorySource::Undefined);
try
{
if (runtime->LoadNetwork(netId, move(optNet), msg, networkProperties) != armnn::Status::Success)
@@ -222,7 +221,8 @@ Return<V1_0::ErrorStatus> ArmnnDriverImpl::prepareArmnnModel_1_2(
model,
options.GetRequestInputsAndOutputsDumpDir(),
options.IsGpuProfilingEnabled(),
- options.isAsyncModelExecutionEnabled()));
+ options.isAsyncModelExecutionEnabled(),
+ options.getNoOfArmnnThreads()));
// Run a single 'dummy' inference of the model. This means that CL kernels will get compiled (and tuned if
// this is enabled) before the first 'real' inference which removes the overhead of the first inference.
diff --git a/1.3/ArmnnDriverImpl.cpp b/1.3/ArmnnDriverImpl.cpp
index 5c5e6071..6f1b5098 100644
--- a/1.3/ArmnnDriverImpl.cpp
+++ b/1.3/ArmnnDriverImpl.cpp
@@ -202,8 +202,7 @@ Return<V1_3::ErrorStatus> ArmnnDriverImpl::prepareArmnnModel_1_3(
std::string msg;
armnn::INetworkProperties networkProperties(options.isAsyncModelExecutionEnabled(),
MemorySource::Undefined,
- MemorySource::Undefined,
- options.getNoOfArmnnThreads());
+ MemorySource::Undefined);
try
{
if (runtime->LoadNetwork(netId, move(optNet), msg, networkProperties) != armnn::Status::Success)
@@ -234,7 +233,8 @@ Return<V1_3::ErrorStatus> ArmnnDriverImpl::prepareArmnnModel_1_3(
options.GetRequestInputsAndOutputsDumpDir(),
options.IsGpuProfilingEnabled(),
priority,
- options.isAsyncModelExecutionEnabled()));
+ options.isAsyncModelExecutionEnabled(),
+ options.getNoOfArmnnThreads()));
// Run a single 'dummy' inference of the model. This means that CL kernels will get compiled (and tuned if
// this is enabled) before the first 'real' inference which removes the overhead of the first inference.
diff --git a/ArmnnDriverImpl.cpp b/ArmnnDriverImpl.cpp
index 0e6e8b18..78ef12fe 100644
--- a/ArmnnDriverImpl.cpp
+++ b/ArmnnDriverImpl.cpp
@@ -166,8 +166,7 @@ Return<V1_0::ErrorStatus> ArmnnDriverImpl<HalPolicy>::prepareModel(
std::string msg;
armnn::INetworkProperties networkProperties(options.isAsyncModelExecutionEnabled(),
armnn::MemorySource::Undefined,
- armnn::MemorySource::Undefined,
- options.getNoOfArmnnThreads());
+ armnn::MemorySource::Undefined);
try
{
@@ -198,7 +197,8 @@ Return<V1_0::ErrorStatus> ArmnnDriverImpl<HalPolicy>::prepareModel(
model,
options.GetRequestInputsAndOutputsDumpDir(),
options.IsGpuProfilingEnabled(),
- options.isAsyncModelExecutionEnabled()));
+ options.isAsyncModelExecutionEnabled(),
+ options.getNoOfArmnnThreads()));
// Run a single 'dummy' inference of the model. This means that CL kernels will get compiled (and tuned if
// this is enabled) before the first 'real' inference which removes the overhead of the first inference.
diff --git a/ArmnnPreparedModel.cpp b/ArmnnPreparedModel.cpp
index 259d1b8c..77709b0e 100644
--- a/ArmnnPreparedModel.cpp
+++ b/ArmnnPreparedModel.cpp
@@ -113,7 +113,8 @@ ArmnnPreparedModel<HalVersion>::ArmnnPreparedModel(armnn::NetworkId networkId,
const HalModel& model,
const std::string& requestInputsAndOutputsDumpDir,
const bool gpuProfilingEnabled,
- const bool asyncModelExecutionEnabled)
+ const bool asyncModelExecutionEnabled,
+ const unsigned int numberOfThreads)
: m_NetworkId(networkId)
, m_Runtime(runtime)
, m_Model(model)
@@ -127,7 +128,14 @@ ArmnnPreparedModel<HalVersion>::ArmnnPreparedModel(armnn::NetworkId networkId,
if (asyncModelExecutionEnabled)
{
- m_WorkingMemHandle = m_Runtime->CreateWorkingMemHandle(networkId);
+ std::vector<std::shared_ptr<armnn::IWorkingMemHandle>> memHandles;
+ for (int i=0; i < numberOfThreads; ++i)
+ {
+ memHandles.emplace_back(m_Runtime->CreateWorkingMemHandle(networkId));
+ }
+
+ m_WorkingMemHandle = memHandles.back();
+ m_Threadpool = std::make_unique<armnn::Threadpool>(numberOfThreads, runtime, memHandles);
}
}
@@ -397,11 +405,11 @@ void ArmnnPreparedModel<HalVersion>::ScheduleGraphForExecution(
outputTensors,
callbackContext);
- m_Runtime->Schedule(m_NetworkId,
- *tpCb->m_InputTensors,
- *tpCb->m_OutputTensors,
- armnn::QosExecPriority::High,
- tpCb);
+ m_Threadpool->Schedule(m_NetworkId,
+ *tpCb->m_InputTensors,
+ *tpCb->m_OutputTensors,
+ armnn::QosExecPriority::Medium,
+ tpCb);
ALOGV("ArmnnPreparedModel::ScheduleGraphForExecution end");
}
diff --git a/ArmnnPreparedModel.hpp b/ArmnnPreparedModel.hpp
index d1c830d4..58177d92 100644
--- a/ArmnnPreparedModel.hpp
+++ b/ArmnnPreparedModel.hpp
@@ -11,6 +11,7 @@
#include <NeuralNetworks.h>
#include <armnn/ArmNN.hpp>
+#include <armnn/Threadpool.hpp>
#include <string>
#include <vector>
@@ -39,7 +40,8 @@ public:
const HalModel& model,
const std::string& requestInputsAndOutputsDumpDir,
const bool gpuProfilingEnabled,
- const bool asyncModelExecutionEnabled = false);
+ const bool asyncModelExecutionEnabled = false,
+ const unsigned int numberOfThreads = 1);
virtual ~ArmnnPreparedModel();
@@ -76,28 +78,6 @@ private:
void Notify(armnn::Status status, armnn::InferenceTimingPair timeTaken) override;
- // Retrieve the ArmNN Status from the AsyncExecutionCallback that has been notified
- virtual armnn::Status GetStatus() const override
- {
- return armnn::Status::Success;
- }
-
- // Block the calling thread until the AsyncExecutionCallback object allows it to proceed
- virtual void Wait() const override
- {}
-
- // Retrieve the start time before executing the inference
- virtual armnn::HighResolutionClock GetStartTime() const override
- {
- return std::chrono::high_resolution_clock::now();
- }
-
- // Retrieve the time after executing the inference
- virtual armnn::HighResolutionClock GetEndTime() const override
- {
- return std::chrono::high_resolution_clock::now();
- }
-
ArmnnPreparedModel<HalVersion>* m_Model;
std::shared_ptr<std::vector<::android::nn::RunTimePoolInfo>> m_MemPools;
std::shared_ptr<armnn::InputTensors> m_InputTensors;
@@ -116,9 +96,10 @@ private:
std::shared_ptr<armnn::OutputTensors>& outputTensors,
CallbackContext m_CallbackContext);
- armnn::NetworkId m_NetworkId;
- armnn::IRuntime* m_Runtime;
- HalModel m_Model;
+ armnn::NetworkId m_NetworkId;
+ armnn::IRuntime* m_Runtime;
+ std::unique_ptr<armnn::Threadpool> m_Threadpool;
+ HalModel m_Model;
// There must be a single RequestThread for all ArmnnPreparedModel objects to ensure serial execution of workloads
// It is specific to this class, so it is declared as static here
static RequestThread<ArmnnPreparedModel, HalVersion, CallbackContext_1_0> m_RequestThread;
@@ -126,7 +107,7 @@ private:
const std::string& m_RequestInputsAndOutputsDumpDir;
const bool m_GpuProfilingEnabled;
- std::unique_ptr<armnn::IWorkingMemHandle> m_WorkingMemHandle;
+ std::shared_ptr<armnn::IWorkingMemHandle> m_WorkingMemHandle;
const bool m_AsyncModelExecutionEnabled;
};
diff --git a/ArmnnPreparedModel_1_2.cpp b/ArmnnPreparedModel_1_2.cpp
index 83e4aac5..07fd5a6d 100644
--- a/ArmnnPreparedModel_1_2.cpp
+++ b/ArmnnPreparedModel_1_2.cpp
@@ -148,7 +148,8 @@ ArmnnPreparedModel_1_2<HalVersion>::ArmnnPreparedModel_1_2(armnn::NetworkId netw
const V1_2::Model& model,
const std::string& requestInputsAndOutputsDumpDir,
const bool gpuProfilingEnabled,
- const bool asyncModelExecutionEnabled)
+ const bool asyncModelExecutionEnabled,
+ const unsigned int numberOfThreads)
: m_NetworkId(networkId)
, m_Runtime(runtime)
, m_Model(model)
@@ -162,7 +163,14 @@ ArmnnPreparedModel_1_2<HalVersion>::ArmnnPreparedModel_1_2(armnn::NetworkId netw
if (asyncModelExecutionEnabled)
{
- m_WorkingMemHandle = m_Runtime->CreateWorkingMemHandle(networkId);
+ std::vector<std::shared_ptr<armnn::IWorkingMemHandle>> memHandles;
+ for (int i=0; i < numberOfThreads; ++i)
+ {
+ memHandles.emplace_back(m_Runtime->CreateWorkingMemHandle(networkId));
+ }
+
+ m_WorkingMemHandle = memHandles.back();
+ m_Threadpool = std::make_unique<armnn::Threadpool>(numberOfThreads, runtime, memHandles);
}
}
@@ -661,11 +669,11 @@ void ArmnnPreparedModel_1_2<HalVersion>::ScheduleGraphForExecution(
outputTensors,
callbackContext);
- m_Runtime->Schedule(m_NetworkId,
- *tpCb->m_InputTensors,
- *tpCb->m_OutputTensors,
- armnn::QosExecPriority::High,
- tpCb);
+ m_Threadpool->Schedule(m_NetworkId,
+ *tpCb->m_InputTensors,
+ *tpCb->m_OutputTensors,
+ armnn::QosExecPriority::Medium,
+ tpCb);
ALOGV("ArmnnPreparedModel_1_2::ScheduleGraphForExecution end");
}
diff --git a/ArmnnPreparedModel_1_2.hpp b/ArmnnPreparedModel_1_2.hpp
index 6c630c56..4ee2b817 100644
--- a/ArmnnPreparedModel_1_2.hpp
+++ b/ArmnnPreparedModel_1_2.hpp
@@ -12,6 +12,7 @@
#include <NeuralNetworks.h>
#include <armnn/ArmNN.hpp>
+#include <armnn/Threadpool.hpp>
#include <string>
#include <vector>
@@ -45,7 +46,8 @@ public:
const HalModel& model,
const std::string& requestInputsAndOutputsDumpDir,
const bool gpuProfilingEnabled,
- const bool asyncModelExecutionEnabled = false);
+ const bool asyncModelExecutionEnabled = false,
+ const unsigned int numberOfThreads = 1);
virtual ~ArmnnPreparedModel_1_2();
@@ -98,28 +100,6 @@ private:
void Notify(armnn::Status status, armnn::InferenceTimingPair timeTaken) override;
- // Retrieve the Arm NN Status from the AsyncExecutionCallback that has been notified
- virtual armnn::Status GetStatus() const override
- {
- return armnn::Status::Success;
- }
-
- // Block the calling thread until the AsyncExecutionCallback object allows it to proceed
- virtual void Wait() const override
- {}
-
- // Retrieve the start time before executing the inference
- virtual armnn::HighResolutionClock GetStartTime() const override
- {
- return std::chrono::high_resolution_clock::now();
- }
-
- // Retrieve the time after executing the inference
- virtual armnn::HighResolutionClock GetEndTime() const override
- {
- return std::chrono::high_resolution_clock::now();
- }
-
ArmnnPreparedModel_1_2<HalVersion>* m_Model;
std::shared_ptr<std::vector<::android::nn::RunTimePoolInfo>> m_MemPools;
std::vector<V1_2::OutputShape> m_OutputShapes;
@@ -161,20 +141,20 @@ private:
std::shared_ptr<armnn::OutputTensors>& outputTensors,
CallbackContext m_CallbackContext);
- armnn::NetworkId m_NetworkId;
- armnn::IRuntime* m_Runtime;
- V1_2::Model m_Model;
+ armnn::NetworkId m_NetworkId;
+ armnn::IRuntime* m_Runtime;
+ std::unique_ptr<armnn::Threadpool> m_Threadpool;
+ V1_2::Model m_Model;
// There must be a single RequestThread for all ArmnnPreparedModel objects to ensure serial execution of workloads
// It is specific to this class, so it is declared as static here
static RequestThread<ArmnnPreparedModel_1_2,
HalVersion,
- CallbackContext_1_2> m_RequestThread;
- uint32_t m_RequestCount;
- const std::string& m_RequestInputsAndOutputsDumpDir;
- const bool m_GpuProfilingEnabled;
-
- std::unique_ptr<IWorkingMemHandle> m_WorkingMemHandle;
- const bool m_AsyncModelExecutionEnabled;
+ CallbackContext_1_2> m_RequestThread;
+ uint32_t m_RequestCount;
+ const std::string& m_RequestInputsAndOutputsDumpDir;
+ const bool m_GpuProfilingEnabled;
+ std::shared_ptr<IWorkingMemHandle> m_WorkingMemHandle;
+ const bool m_AsyncModelExecutionEnabled;
};
}
diff --git a/ArmnnPreparedModel_1_3.cpp b/ArmnnPreparedModel_1_3.cpp
index 63918725..cd3df516 100644
--- a/ArmnnPreparedModel_1_3.cpp
+++ b/ArmnnPreparedModel_1_3.cpp
@@ -169,7 +169,8 @@ ArmnnPreparedModel_1_3<HalVersion>::ArmnnPreparedModel_1_3(armnn::NetworkId netw
const std::string& requestInputsAndOutputsDumpDir,
const bool gpuProfilingEnabled,
V1_3::Priority priority,
- const bool asyncModelExecutionEnabled)
+ const bool asyncModelExecutionEnabled,
+ const unsigned int numberOfThreads)
: m_NetworkId(networkId)
, m_Runtime(runtime)
, m_Model(model)
@@ -184,7 +185,14 @@ ArmnnPreparedModel_1_3<HalVersion>::ArmnnPreparedModel_1_3(armnn::NetworkId netw
if (asyncModelExecutionEnabled)
{
- m_WorkingMemHandle = m_Runtime->CreateWorkingMemHandle(networkId);
+ std::vector<std::shared_ptr<armnn::IWorkingMemHandle>> memHandles;
+ for (int i=0; i < numberOfThreads; ++i)
+ {
+ memHandles.emplace_back(m_Runtime->CreateWorkingMemHandle(networkId));
+ }
+
+ m_WorkingMemHandle = memHandles.back();
+ m_Threadpool = std::make_unique<armnn::Threadpool>(numberOfThreads, runtime, memHandles);
}
}
@@ -821,11 +829,11 @@ void ArmnnPreparedModel_1_3<HalVersion>::ScheduleGraphForExecution(
outputTensors,
callbackContext);
- m_Runtime->Schedule(m_NetworkId,
- *tpCb->m_InputTensors,
- *tpCb->m_OutputTensors,
- priority,
- tpCb);
+ m_Threadpool->Schedule(m_NetworkId,
+ *tpCb->m_InputTensors,
+ *tpCb->m_OutputTensors,
+ priority,
+ tpCb);
ALOGV("ArmnnPreparedModel_1_3::ScheduleGraphForExecution end");
}
diff --git a/ArmnnPreparedModel_1_3.hpp b/ArmnnPreparedModel_1_3.hpp
index 11299cc4..46798cde 100644
--- a/ArmnnPreparedModel_1_3.hpp
+++ b/ArmnnPreparedModel_1_3.hpp
@@ -12,6 +12,8 @@
#include <NeuralNetworks.h>
#include <armnn/ArmNN.hpp>
+#include <armnn/Threadpool.hpp>
+
#include <string>
#include <vector>
@@ -52,7 +54,8 @@ public:
const std::string& requestInputsAndOutputsDumpDir,
const bool gpuProfilingEnabled,
V1_3::Priority priority = V1_3::Priority::MEDIUM,
- const bool asyncModelExecutionEnabled = false);
+ const bool asyncModelExecutionEnabled = false,
+ const unsigned int numberOfThreads = 1);
virtual ~ArmnnPreparedModel_1_3();
@@ -131,28 +134,6 @@ private:
void Notify(armnn::Status status, armnn::InferenceTimingPair timeTaken) override;
- // Retrieve the Arm NN Status from the AsyncExecutionCallback that has been notified
- virtual armnn::Status GetStatus() const override
- {
- return armnn::Status::Success;
- }
-
- // Block the calling thread until the AsyncExecutionCallback object allows it to proceed
- virtual void Wait() const override
- {}
-
- // Retrieve the start time before executing the inference
- virtual armnn::HighResolutionClock GetStartTime() const override
- {
- return std::chrono::high_resolution_clock::now();
- }
-
- // Retrieve the time after executing the inference
- virtual armnn::HighResolutionClock GetEndTime() const override
- {
- return std::chrono::high_resolution_clock::now();
- }
-
ArmnnPreparedModel_1_3<HalVersion>* m_Model;
std::shared_ptr<std::vector<::android::nn::RunTimePoolInfo>> m_MemPools;
std::vector<V1_2::OutputShape> m_OutputShapes;
@@ -196,6 +177,7 @@ private:
armnn::NetworkId m_NetworkId;
armnn::IRuntime* m_Runtime;
+ std::unique_ptr<armnn::Threadpool> m_Threadpool;
V1_3::Model m_Model;
// There must be a single RequestThread for all ArmnnPreparedModel objects to ensure serial execution of workloads
// It is specific to this class, so it is declared as static here
@@ -205,7 +187,7 @@ private:
const bool m_GpuProfilingEnabled;
V1_3::Priority m_ModelPriority;
- std::unique_ptr<IWorkingMemHandle> m_WorkingMemHandle;
+ std::shared_ptr<IWorkingMemHandle> m_WorkingMemHandle;
const bool m_AsyncModelExecutionEnabled;
};