plain/21.05/_loaded_network_8hpp_source.xhtml

 //
 // Copyright © 2017 Arm Ltd and Contributors. All rights reserved.
 // SPDX-License-Identifier: MIT
 //
 #pragma once

 #include <armnn/Tensor.hpp>
 #include <armnn/Types.hpp>

 #include "Network.hpp"
 #include "LayerFwd.hpp"
 #include "Profiling.hpp"

 #include <armnn/backends/IBackendInternal.hpp>
 #include <backendsCommon/TensorHandleFactoryRegistry.hpp>
 #include <backendsCommon/Workload.hpp>
 #include <backendsCommon/WorkloadFactory.hpp>
 #include <ProfilingService.hpp>
 #include <TimelineUtilityMethods.hpp>

 #include <mutex>
 #include <condition_variable>
 #include <unordered_map>

 namespace cl
 {
 class Context;
 class CommandQueue;
 class Device;
 }

 namespace armnn
 {

 class LoadedNetwork
 {
 public:
     using WorkloadQueue = std::vector<std::unique_ptr<IWorkload>>;

     using ExecutionTuple = std::tuple<InputTensors,
                                       OutputTensors,
                                       std::shared_ptr<IAsyncExecutionCallback>>;

     using ExecutionQueue = std::queue<std::shared_ptr<ExecutionTuple>>;

     ~LoadedNetwork()
     {
         FreeWorkingMemory();
         TerminateThreadPool();
     }

     /// Create a new unique WorkingMemHandle object. Create multiple handles if you wish to have
     /// overlapped Execution by calling this function from different threads.
     std::unique_ptr<IWorkingMemHandle> CreateWorkingMemHandle(NetworkId networkId);

     TensorInfo GetInputTensorInfo(LayerBindingId layerId) const;
     TensorInfo GetOutputTensorInfo(LayerBindingId layerId) const;

     /// Single thread execution of the loaded network
     Status EnqueueWorkload(const InputTensors& inputTensors, const OutputTensors& outputTensors);

     /// Thread safe execution of the loaded network
     Status Execute(const InputTensors& inputTensors,
                    const OutputTensors& outputTensors,
                    IWorkingMemHandle& workingMemHandle);

     /// Schedule an asynchronous execution on the loaded network
     void Schedule(const InputTensors& inputTensors,
                   const OutputTensors& outputTensors,
                   const QosExecPriority priority,
                   std::shared_ptr<IAsyncExecutionCallback> cb);

     static std::unique_ptr<LoadedNetwork> MakeLoadedNetwork(std::unique_ptr<IOptimizedNetwork> net,
                                                             std::string& errorMessage,
                                                             const INetworkProperties& networkProperties,
                                                             profiling::ProfilingService& profilingService,
                                                             const NetworkId networkIdOut);

     // NOTE we return by reference as the purpose of this method is only to provide
     // access to the private m_Profiler and in theory we should not need to increment
     // the shared_ptr's reference counter
     const std::shared_ptr<IProfiler>& GetProfiler() const { return m_Profiler; }

     void FreeWorkingMemory();

     void RegisterDebugCallback(const DebugCallbackFunction& func);

     void SendNetworkStructure();

     bool IsAsyncEnabled()
     {
         return m_NetworkProperties.m_AsyncEnabled;
     }

     profiling::ProfilingGuid GetNetworkGuid();

 private:
     using WorkloadFactoryWithMemoryManager =
     std::pair<IBackendInternal::IWorkloadFactoryPtr, IBackendInternal::IMemoryManagerSharedPtr>;

     using WorkloadFactoryMap = std::unordered_map<BackendId, WorkloadFactoryWithMemoryManager>;

     void AllocateWorkingMemory(std::lock_guard<std::mutex>& lock);
     void AllocateAndExecuteConstantWorkloads();

     std::unordered_map<LayerGuid, ITensorHandle* > m_ConstantTensorHandles;
     std::unordered_map<LayerGuid, std::unique_ptr<IWorkload> > m_ConstantWorkloads;

     LoadedNetwork(std::unique_ptr<IOptimizedNetwork> net,
                   const INetworkProperties& networkProperties,
                   profiling::ProfilingService& profilingService,
                   const NetworkId networkIdOut);

     void EnqueueInput(const BindableLayer& layer, ITensorHandle* tensorHandle, const TensorInfo& tensorInfo);

     void EnqueueOutput(const BindableLayer& layer, ITensorHandle* tensorHandle, const TensorInfo& tensorInfo);

     void EnqueueInput(const BindableLayer& layer, const ConstTensor& inputTensor, WorkingMemHandle& handle);

     void EnqueueOutput(const BindableLayer& layer, const Tensor& outputTensor, WorkingMemHandle& handle);

     void ProcessExecPriorities(std::unique_ptr<IWorkingMemHandle> workingMemHandle);

     bool Execute(std::unique_ptr<profiling::TimelineUtilityMethods>& timelineUtils,
                  profiling::ProfilingGuid inferenceGuid);

     void CreateThreadPool(std::size_t numThreads);

     void TerminateThreadPool() noexcept;

     const IWorkloadFactory& GetWorkloadFactory(const Layer& layer) const;

     using BackendPtrMap = std::unordered_map<BackendId, IBackendInternalUniquePtr>;

     BackendPtrMap       m_Backends;
     WorkloadFactoryMap  m_WorkloadFactories;

     std::unique_ptr<IOptimizedNetwork> m_OptimizedNetwork;
     std::shared_ptr<IProfiler>         m_Profiler;

     WorkloadQueue                      m_InputQueue;
     WorkloadQueue                      m_WorkloadQueue;
     WorkloadQueue                      m_OutputQueue;

     mutable std::mutex m_WorkingMemMutex;

     bool m_IsWorkingMemAllocated = false;

     std::vector<std::unique_ptr<std::thread>> m_Threads;
     std::stack<IWorkingMemHandle>             m_WorkingMemHandles;

     ExecutionQueue m_HighPriorityQueue;
     ExecutionQueue m_MediumPriorityQueue;
     ExecutionQueue m_LowPriorityQueue;

     // Condition Variables require mutex which will guard the shared state.
     // Has an event happened? Stop signal for example
     std::condition_variable m_ThreadPoolEvent;
     std::mutex              m_ThreadPoolMutex;

     // The shared state for conditional variable
     bool m_TerminatePool = false;

     INetworkProperties m_NetworkProperties;

     const NetworkId m_NetworkId;

     TensorHandleFactoryRegistry m_TensorHandleFactoryRegistry;

     profiling::ProfilingService& m_ProfilingService;
 };

 }
cl
Definition: LoadedNetwork.hpp:25

Tensor.hpp

WorkloadFactory.hpp

armnn::TensorInfo
Definition: Tensor.hpp:152

armnn::IWorkloadFactory
Definition: WorkloadFactory.hpp:22

armnn::BindableLayer
Definition: Layer.hpp:431

armnn::profiling::ProfilingGuid
Definition: Types.hpp:327

armnn::InputTensors
std::vector< std::pair< LayerBindingId, class ConstTensor > > InputTensors
Definition: Tensor.hpp:340

armnn
Copyright (c) 2021 ARM Limited and Contributors.
Definition: 01_00_software_tools.dox:6

armnn::DebugCallbackFunction
std::function< void(LayerGuid guid, unsigned int slotIndex, ITensorHandle *tensorHandle)> DebugCallbackFunction
Define the type of callback for the Debug layer to call.
Definition: Types.hpp:316

armnn::LayerBindingId
int LayerBindingId
Type of identifiers for bindable layers (inputs, outputs).
Definition: Types.hpp:243

armnn::LoadedNetwork::ExecutionTuple
std::tuple< InputTensors, OutputTensors, std::shared_ptr< IAsyncExecutionCallback > > ExecutionTuple
Definition: LoadedNetwork.hpp:42

armnn::INetworkProperties
Definition: IRuntime.hpp:30

armnn::Tensor
A tensor defined by a TensorInfo (shape and data type) and a mutable backing store.
Definition: Tensor.hpp:306

IBackendInternal.hpp

armnn::ITensorHandle
Definition: ITensorHandle.hpp:15

Types.hpp

armnn::LoadedNetwork::~LoadedNetwork
~LoadedNetwork()
Definition: LoadedNetwork.hpp:46

armnn::NetworkId
int NetworkId
Definition: IRuntime.hpp:22

armnn::ConstTensor
A tensor defined by a TensorInfo (shape and data type) and an immutable backing store.
Definition: Tensor.hpp:314

armnn::OutputTensors
std::vector< std::pair< LayerBindingId, class Tensor > > OutputTensors
Definition: Tensor.hpp:341

armnn::Status
Status
enumeration
Definition: Types.hpp:30

armnn::experimental::IWorkingMemHandle
Definition: IWorkingMemHandle.hpp:20

armnn::LoadedNetwork::WorkloadQueue
std::vector< std::unique_ptr< IWorkload > > WorkloadQueue
Definition: LoadedNetwork.hpp:38

armnn::TensorHandleFactoryRegistry
Definition: TensorHandleFactoryRegistry.hpp:20

armnn::profiling::ProfilingService
Definition: ProfilingService.hpp:49

Workload.hpp

armnn::LoadedNetwork
Definition: LoadedNetwork.hpp:35

TimelineUtilityMethods.hpp

LayerFwd.hpp

armnn::LoadedNetwork::GetProfiler
const std::shared_ptr< IProfiler > & GetProfiler() const
Definition: LoadedNetwork.hpp:82

Network.hpp

armnn::LoadedNetwork::IsAsyncEnabled
bool IsAsyncEnabled()
Definition: LoadedNetwork.hpp:90

armnn::QosExecPriority
QosExecPriority
Definition: Types.hpp:60

Profiling.hpp

TensorHandleFactoryRegistry.hpp

armnn::experimental::WorkingMemHandle
Definition: WorkingMemHandle.hpp:23

armnn::LoadedNetwork::ExecutionQueue
std::queue< std::shared_ptr< ExecutionTuple > > ExecutionQueue
Definition: LoadedNetwork.hpp:44

armnn::Layer
Definition: Layer.hpp:210

ProfilingService.hpp