diff options
Diffstat (limited to 'src/armnn')
-rw-r--r-- | src/armnn/LoadedNetwork.cpp | 89 | ||||
-rw-r--r-- | src/armnn/LoadedNetwork.hpp | 2 | ||||
-rw-r--r-- | src/armnn/Profiling.hpp | 10 | ||||
-rw-r--r-- | src/armnn/Runtime.cpp | 1 |
4 files changed, 46 insertions, 56 deletions
diff --git a/src/armnn/LoadedNetwork.cpp b/src/armnn/LoadedNetwork.cpp index fbf8cfbb4c..b35dfd1107 100644 --- a/src/armnn/LoadedNetwork.cpp +++ b/src/armnn/LoadedNetwork.cpp @@ -451,8 +451,6 @@ private: Status LoadedNetwork::EnqueueWorkload(const InputTensors& inputTensors, const OutputTensors& outputTensors) { - ARMNN_SCOPED_PROFILING_EVENT(Compute::Undefined, "EnqueueWorkload"); - const Graph& graph = m_OptimizedNetwork->GetGraph(); // Walk graph to determine the order of execution. @@ -471,21 +469,27 @@ Status LoadedNetwork::EnqueueWorkload(const InputTensors& inputTensors, } // For each input to the network, call EnqueueInput with the data passed by the user. - m_InputQueue.clear(); - m_InputQueue.reserve(graph.GetNumInputs()); - for (const BindableLayer* inputLayer : graph.GetInputLayers()) { - const TensorPin& pin = workloadData.GetInputTensorPin(inputLayer->GetBindingId()); - EnqueueInput(*inputLayer, pin.GetTensorHandle(), pin.GetTensorInfo()); + ARMNN_SCOPED_PROFILING_EVENT(Compute::Undefined, "PrepareInputs"); + m_InputQueue.clear(); + m_InputQueue.reserve(graph.GetNumInputs()); + for (const BindableLayer* inputLayer : graph.GetInputLayers()) + { + const TensorPin& pin = workloadData.GetInputTensorPin(inputLayer->GetBindingId()); + EnqueueInput(*inputLayer, pin.GetTensorHandle(), pin.GetTensorInfo()); + } } // For each output to the network, call EnqueueOutput with the data passed by the user. - m_OutputQueue.clear(); - m_OutputQueue.reserve(graph.GetNumOutputs()); - for (const BindableLayer* outputLayer : graph.GetOutputLayers()) { - const TensorPin& pin = workloadData.GetOutputTensorPin(outputLayer->GetBindingId()); - EnqueueOutput(*outputLayer, pin.GetTensorHandle(), pin.GetTensorInfo()); + ARMNN_SCOPED_PROFILING_EVENT(Compute::Undefined, "PrepareOutputs"); + m_OutputQueue.clear(); + m_OutputQueue.reserve(graph.GetNumOutputs()); + for (const BindableLayer* outputLayer : graph.GetOutputLayers()) + { + const TensorPin& pin = workloadData.GetOutputTensorPin(outputLayer->GetBindingId()); + EnqueueOutput(*outputLayer, pin.GetTensorHandle(), pin.GetTensorInfo()); + } } std::unique_ptr<TimelineUtilityMethods> timelineUtils = @@ -684,8 +688,13 @@ void LoadedNetwork::EnqueueOutput(const BindableLayer& layer, ITensorHandle* ten } } -void LoadedNetwork::AllocateWorkingMemory() +void LoadedNetwork::AllocateWorkingMemory(std::lock_guard<std::mutex>& lock) { + ARMNN_SCOPED_PROFILING_EVENT(Compute::Undefined, "Working Memory Allocation"); + + // this unused parameter makes sure we can only call this function with a valid lock + IgnoreUnused(lock); + if (m_IsWorkingMemAllocated) { return; @@ -736,49 +745,29 @@ bool LoadedNetwork::Execute(std::unique_ptr<TimelineUtilityMethods>& timelineUti try { std::lock_guard<std::mutex> lockGuard(m_WorkingMemMutex); - AllocateWorkingMemory(); + AllocateWorkingMemory(lockGuard); ProfilingDynamicGuid workloadInferenceID(0); - for (auto& input : m_InputQueue) + auto ExecuteQueue = [&timelineUtils, &workloadInferenceID, &inferenceGuid](WorkloadQueue& queue) { - if(timelineUtils) + for (auto& workload : queue) { - workloadInferenceID = timelineUtils->RecordWorkloadInferenceAndStartOfLifeEvent(input->GetGuid(), - inferenceGuid); - } - input->Execute(); - if(timelineUtils) - { - timelineUtils->RecordEndOfLifeEvent(workloadInferenceID); + if(timelineUtils) + { + workloadInferenceID = timelineUtils->RecordWorkloadInferenceAndStartOfLifeEvent(workload->GetGuid(), + inferenceGuid); + } + workload->Execute(); + if(timelineUtils) + { + timelineUtils->RecordEndOfLifeEvent(workloadInferenceID); + } } - } + }; - for (auto& workload : m_WorkloadQueue) - { - if(timelineUtils) - { - workloadInferenceID = timelineUtils->RecordWorkloadInferenceAndStartOfLifeEvent(workload->GetGuid(), - inferenceGuid); - } - workload->Execute(); - if(timelineUtils) - { - timelineUtils->RecordEndOfLifeEvent(workloadInferenceID); - } - } - for (auto& output: m_OutputQueue) - { - if(timelineUtils) - { - workloadInferenceID = timelineUtils->RecordWorkloadInferenceAndStartOfLifeEvent(output->GetGuid(), - inferenceGuid); - } - output->Execute(); - if(timelineUtils) - { - timelineUtils->RecordEndOfLifeEvent(workloadInferenceID); - } - } + ExecuteQueue(m_InputQueue); + ExecuteQueue(m_WorkloadQueue); + ExecuteQueue(m_OutputQueue); } catch (const RuntimeException& error) { diff --git a/src/armnn/LoadedNetwork.hpp b/src/armnn/LoadedNetwork.hpp index 918375ac38..8c2103019e 100644 --- a/src/armnn/LoadedNetwork.hpp +++ b/src/armnn/LoadedNetwork.hpp @@ -59,7 +59,7 @@ public: void SendNetworkStructure(); private: - void AllocateWorkingMemory(); + void AllocateWorkingMemory(std::lock_guard<std::mutex>& lock); LoadedNetwork(std::unique_ptr<OptimizedNetwork> net, const INetworkProperties& networkProperties, diff --git a/src/armnn/Profiling.hpp b/src/armnn/Profiling.hpp index 08d7f7ba21..08e55a14c5 100644 --- a/src/armnn/Profiling.hpp +++ b/src/armnn/Profiling.hpp @@ -115,7 +115,7 @@ public: using InstrumentPtr = std::unique_ptr<Instrument>; template<typename... Args> - ScopedProfilingEvent(const BackendId& backendId, const std::string& name, Args... args) + ScopedProfilingEvent(const BackendId& backendId, const std::string& name, Args&&... args) : m_Event(nullptr) , m_Profiler(ProfilerManager::GetInstance().GetProfiler()) { @@ -123,7 +123,7 @@ public: { std::vector<InstrumentPtr> instruments(0); instruments.reserve(sizeof...(args)); //One allocation - ConstructNextInVector(instruments, args...); + ConstructNextInVector(instruments, std::forward<Args>(args)...); m_Event = m_Profiler->BeginEvent(backendId, name, std::move(instruments)); } } @@ -144,10 +144,10 @@ private: } template<typename Arg, typename... Args> - void ConstructNextInVector(std::vector<InstrumentPtr>& instruments, Arg arg, Args... args) + void ConstructNextInVector(std::vector<InstrumentPtr>& instruments, Arg&& arg, Args&&... args) { - instruments.emplace_back(std::make_unique<Arg>(arg)); - ConstructNextInVector(instruments, args...); + instruments.emplace_back(std::make_unique<Arg>(std::forward<Arg>(arg))); + ConstructNextInVector(instruments, std::forward<Args>(args)...); } Event* m_Event; ///< Event to track diff --git a/src/armnn/Runtime.cpp b/src/armnn/Runtime.cpp index 5692494836..28e2df22ab 100644 --- a/src/armnn/Runtime.cpp +++ b/src/armnn/Runtime.cpp @@ -308,6 +308,7 @@ Status Runtime::EnqueueWorkload(NetworkId networkId, const InputTensors& inputTensors, const OutputTensors& outputTensors) { + ARMNN_SCOPED_PROFILING_EVENT(Compute::Undefined, "EnqueueWorkload"); LoadedNetwork* loadedNetwork = GetLoadedNetworkPtr(networkId); static thread_local NetworkId lastId = networkId; |