aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorDerek Lamberti <derek.lamberti@arm.com>2020-06-19 14:33:05 +0100
committerDerek Lamberti <derek.lamberti@arm.com>2020-06-25 11:36:51 +0100
commita08d29b815987e98e7f45519e6a55eee0f085e5f (patch)
treec098756dd4d067ad3500b6d89cae32a3f4299193 /src
parentd5ba9aad6fa12345744d442ba0d865686ae3aea3 (diff)
downloadarmnn-a08d29b815987e98e7f45519e6a55eee0f085e5f.tar.gz
Minor improvement of inference profiling
* Start inference profiling at the actual beginning * Add profiling events for EnqueueInputs and EnqueueOutputs * Add profiling event for working memory allocation * Refactor Execute body to remove code duplication * forward arguments to constructors rather than copy Change-Id: Iacab85f0a02e88e2423885f86f97e4dba4037319 Signed-off-by: Derek Lamberti <derek.lamberti@arm.com>
Diffstat (limited to 'src')
-rw-r--r--src/armnn/LoadedNetwork.cpp89
-rw-r--r--src/armnn/LoadedNetwork.hpp2
-rw-r--r--src/armnn/Profiling.hpp10
-rw-r--r--src/armnn/Runtime.cpp1
4 files changed, 46 insertions, 56 deletions
diff --git a/src/armnn/LoadedNetwork.cpp b/src/armnn/LoadedNetwork.cpp
index fbf8cfbb4c..b35dfd1107 100644
--- a/src/armnn/LoadedNetwork.cpp
+++ b/src/armnn/LoadedNetwork.cpp
@@ -451,8 +451,6 @@ private:
Status LoadedNetwork::EnqueueWorkload(const InputTensors& inputTensors,
const OutputTensors& outputTensors)
{
- ARMNN_SCOPED_PROFILING_EVENT(Compute::Undefined, "EnqueueWorkload");
-
const Graph& graph = m_OptimizedNetwork->GetGraph();
// Walk graph to determine the order of execution.
@@ -471,21 +469,27 @@ Status LoadedNetwork::EnqueueWorkload(const InputTensors& inputTensors,
}
// For each input to the network, call EnqueueInput with the data passed by the user.
- m_InputQueue.clear();
- m_InputQueue.reserve(graph.GetNumInputs());
- for (const BindableLayer* inputLayer : graph.GetInputLayers())
{
- const TensorPin& pin = workloadData.GetInputTensorPin(inputLayer->GetBindingId());
- EnqueueInput(*inputLayer, pin.GetTensorHandle(), pin.GetTensorInfo());
+ ARMNN_SCOPED_PROFILING_EVENT(Compute::Undefined, "PrepareInputs");
+ m_InputQueue.clear();
+ m_InputQueue.reserve(graph.GetNumInputs());
+ for (const BindableLayer* inputLayer : graph.GetInputLayers())
+ {
+ const TensorPin& pin = workloadData.GetInputTensorPin(inputLayer->GetBindingId());
+ EnqueueInput(*inputLayer, pin.GetTensorHandle(), pin.GetTensorInfo());
+ }
}
// For each output to the network, call EnqueueOutput with the data passed by the user.
- m_OutputQueue.clear();
- m_OutputQueue.reserve(graph.GetNumOutputs());
- for (const BindableLayer* outputLayer : graph.GetOutputLayers())
{
- const TensorPin& pin = workloadData.GetOutputTensorPin(outputLayer->GetBindingId());
- EnqueueOutput(*outputLayer, pin.GetTensorHandle(), pin.GetTensorInfo());
+ ARMNN_SCOPED_PROFILING_EVENT(Compute::Undefined, "PrepareOutputs");
+ m_OutputQueue.clear();
+ m_OutputQueue.reserve(graph.GetNumOutputs());
+ for (const BindableLayer* outputLayer : graph.GetOutputLayers())
+ {
+ const TensorPin& pin = workloadData.GetOutputTensorPin(outputLayer->GetBindingId());
+ EnqueueOutput(*outputLayer, pin.GetTensorHandle(), pin.GetTensorInfo());
+ }
}
std::unique_ptr<TimelineUtilityMethods> timelineUtils =
@@ -684,8 +688,13 @@ void LoadedNetwork::EnqueueOutput(const BindableLayer& layer, ITensorHandle* ten
}
}
-void LoadedNetwork::AllocateWorkingMemory()
+void LoadedNetwork::AllocateWorkingMemory(std::lock_guard<std::mutex>& lock)
{
+ ARMNN_SCOPED_PROFILING_EVENT(Compute::Undefined, "Working Memory Allocation");
+
+ // this unused parameter makes sure we can only call this function with a valid lock
+ IgnoreUnused(lock);
+
if (m_IsWorkingMemAllocated)
{
return;
@@ -736,49 +745,29 @@ bool LoadedNetwork::Execute(std::unique_ptr<TimelineUtilityMethods>& timelineUti
try
{
std::lock_guard<std::mutex> lockGuard(m_WorkingMemMutex);
- AllocateWorkingMemory();
+ AllocateWorkingMemory(lockGuard);
ProfilingDynamicGuid workloadInferenceID(0);
- for (auto& input : m_InputQueue)
+ auto ExecuteQueue = [&timelineUtils, &workloadInferenceID, &inferenceGuid](WorkloadQueue& queue)
{
- if(timelineUtils)
+ for (auto& workload : queue)
{
- workloadInferenceID = timelineUtils->RecordWorkloadInferenceAndStartOfLifeEvent(input->GetGuid(),
- inferenceGuid);
- }
- input->Execute();
- if(timelineUtils)
- {
- timelineUtils->RecordEndOfLifeEvent(workloadInferenceID);
+ if(timelineUtils)
+ {
+ workloadInferenceID = timelineUtils->RecordWorkloadInferenceAndStartOfLifeEvent(workload->GetGuid(),
+ inferenceGuid);
+ }
+ workload->Execute();
+ if(timelineUtils)
+ {
+ timelineUtils->RecordEndOfLifeEvent(workloadInferenceID);
+ }
}
- }
+ };
- for (auto& workload : m_WorkloadQueue)
- {
- if(timelineUtils)
- {
- workloadInferenceID = timelineUtils->RecordWorkloadInferenceAndStartOfLifeEvent(workload->GetGuid(),
- inferenceGuid);
- }
- workload->Execute();
- if(timelineUtils)
- {
- timelineUtils->RecordEndOfLifeEvent(workloadInferenceID);
- }
- }
- for (auto& output: m_OutputQueue)
- {
- if(timelineUtils)
- {
- workloadInferenceID = timelineUtils->RecordWorkloadInferenceAndStartOfLifeEvent(output->GetGuid(),
- inferenceGuid);
- }
- output->Execute();
- if(timelineUtils)
- {
- timelineUtils->RecordEndOfLifeEvent(workloadInferenceID);
- }
- }
+ ExecuteQueue(m_InputQueue);
+ ExecuteQueue(m_WorkloadQueue);
+ ExecuteQueue(m_OutputQueue);
}
catch (const RuntimeException& error)
{
diff --git a/src/armnn/LoadedNetwork.hpp b/src/armnn/LoadedNetwork.hpp
index 918375ac38..8c2103019e 100644
--- a/src/armnn/LoadedNetwork.hpp
+++ b/src/armnn/LoadedNetwork.hpp
@@ -59,7 +59,7 @@ public:
void SendNetworkStructure();
private:
- void AllocateWorkingMemory();
+ void AllocateWorkingMemory(std::lock_guard<std::mutex>& lock);
LoadedNetwork(std::unique_ptr<OptimizedNetwork> net,
const INetworkProperties& networkProperties,
diff --git a/src/armnn/Profiling.hpp b/src/armnn/Profiling.hpp
index 08d7f7ba21..08e55a14c5 100644
--- a/src/armnn/Profiling.hpp
+++ b/src/armnn/Profiling.hpp
@@ -115,7 +115,7 @@ public:
using InstrumentPtr = std::unique_ptr<Instrument>;
template<typename... Args>
- ScopedProfilingEvent(const BackendId& backendId, const std::string& name, Args... args)
+ ScopedProfilingEvent(const BackendId& backendId, const std::string& name, Args&&... args)
: m_Event(nullptr)
, m_Profiler(ProfilerManager::GetInstance().GetProfiler())
{
@@ -123,7 +123,7 @@ public:
{
std::vector<InstrumentPtr> instruments(0);
instruments.reserve(sizeof...(args)); //One allocation
- ConstructNextInVector(instruments, args...);
+ ConstructNextInVector(instruments, std::forward<Args>(args)...);
m_Event = m_Profiler->BeginEvent(backendId, name, std::move(instruments));
}
}
@@ -144,10 +144,10 @@ private:
}
template<typename Arg, typename... Args>
- void ConstructNextInVector(std::vector<InstrumentPtr>& instruments, Arg arg, Args... args)
+ void ConstructNextInVector(std::vector<InstrumentPtr>& instruments, Arg&& arg, Args&&... args)
{
- instruments.emplace_back(std::make_unique<Arg>(arg));
- ConstructNextInVector(instruments, args...);
+ instruments.emplace_back(std::make_unique<Arg>(std::forward<Arg>(arg)));
+ ConstructNextInVector(instruments, std::forward<Args>(args)...);
}
Event* m_Event; ///< Event to track
diff --git a/src/armnn/Runtime.cpp b/src/armnn/Runtime.cpp
index 5692494836..28e2df22ab 100644
--- a/src/armnn/Runtime.cpp
+++ b/src/armnn/Runtime.cpp
@@ -308,6 +308,7 @@ Status Runtime::EnqueueWorkload(NetworkId networkId,
const InputTensors& inputTensors,
const OutputTensors& outputTensors)
{
+ ARMNN_SCOPED_PROFILING_EVENT(Compute::Undefined, "EnqueueWorkload");
LoadedNetwork* loadedNetwork = GetLoadedNetworkPtr(networkId);
static thread_local NetworkId lastId = networkId;