ArmNN
 21.05
LoadedNetwork.hpp
Go to the documentation of this file.
1 //
2 // Copyright © 2017 Arm Ltd and Contributors. All rights reserved.
3 // SPDX-License-Identifier: MIT
4 //
5 #pragma once
6 
7 #include <armnn/Tensor.hpp>
8 #include <armnn/Types.hpp>
9 
10 #include "Network.hpp"
11 #include "LayerFwd.hpp"
12 #include "Profiling.hpp"
13 
18 #include <ProfilingService.hpp>
20 
21 #include <mutex>
22 #include <condition_variable>
23 #include <unordered_map>
24 
25 namespace cl
26 {
27 class Context;
28 class CommandQueue;
29 class Device;
30 }
31 
32 namespace armnn
33 {
34 
36 {
37 public:
38  using WorkloadQueue = std::vector<std::unique_ptr<IWorkload>>;
39 
40  using ExecutionTuple = std::tuple<InputTensors,
42  std::shared_ptr<IAsyncExecutionCallback>>;
43 
44  using ExecutionQueue = std::queue<std::shared_ptr<ExecutionTuple>>;
45 
47  {
48  FreeWorkingMemory();
49  TerminateThreadPool();
50  }
51 
52  /// Create a new unique WorkingMemHandle object. Create multiple handles if you wish to have
53  /// overlapped Execution by calling this function from different threads.
54  std::unique_ptr<IWorkingMemHandle> CreateWorkingMemHandle(NetworkId networkId);
55 
56  TensorInfo GetInputTensorInfo(LayerBindingId layerId) const;
57  TensorInfo GetOutputTensorInfo(LayerBindingId layerId) const;
58 
59  /// Single thread execution of the loaded network
60  Status EnqueueWorkload(const InputTensors& inputTensors, const OutputTensors& outputTensors);
61 
62  /// Thread safe execution of the loaded network
63  Status Execute(const InputTensors& inputTensors,
64  const OutputTensors& outputTensors,
65  IWorkingMemHandle& workingMemHandle);
66 
67  /// Schedule an asynchronous execution on the loaded network
68  void Schedule(const InputTensors& inputTensors,
69  const OutputTensors& outputTensors,
70  const QosExecPriority priority,
71  std::shared_ptr<IAsyncExecutionCallback> cb);
72 
73  static std::unique_ptr<LoadedNetwork> MakeLoadedNetwork(std::unique_ptr<IOptimizedNetwork> net,
74  std::string& errorMessage,
75  const INetworkProperties& networkProperties,
76  profiling::ProfilingService& profilingService,
77  const NetworkId networkIdOut);
78 
79  // NOTE we return by reference as the purpose of this method is only to provide
80  // access to the private m_Profiler and in theory we should not need to increment
81  // the shared_ptr's reference counter
82  const std::shared_ptr<IProfiler>& GetProfiler() const { return m_Profiler; }
83 
84  void FreeWorkingMemory();
85 
86  void RegisterDebugCallback(const DebugCallbackFunction& func);
87 
88  void SendNetworkStructure();
89 
91  {
92  return m_NetworkProperties.m_AsyncEnabled;
93  }
94 
95  profiling::ProfilingGuid GetNetworkGuid();
96 
97 private:
98  using WorkloadFactoryWithMemoryManager =
99  std::pair<IBackendInternal::IWorkloadFactoryPtr, IBackendInternal::IMemoryManagerSharedPtr>;
100 
101  using WorkloadFactoryMap = std::unordered_map<BackendId, WorkloadFactoryWithMemoryManager>;
102 
103  void AllocateWorkingMemory(std::lock_guard<std::mutex>& lock);
104  void AllocateAndExecuteConstantWorkloads();
105 
106  std::unordered_map<LayerGuid, ITensorHandle* > m_ConstantTensorHandles;
107  std::unordered_map<LayerGuid, std::unique_ptr<IWorkload> > m_ConstantWorkloads;
108 
109  LoadedNetwork(std::unique_ptr<IOptimizedNetwork> net,
110  const INetworkProperties& networkProperties,
111  profiling::ProfilingService& profilingService,
112  const NetworkId networkIdOut);
113 
114  void EnqueueInput(const BindableLayer& layer, ITensorHandle* tensorHandle, const TensorInfo& tensorInfo);
115 
116  void EnqueueOutput(const BindableLayer& layer, ITensorHandle* tensorHandle, const TensorInfo& tensorInfo);
117 
118  void EnqueueInput(const BindableLayer& layer, const ConstTensor& inputTensor, WorkingMemHandle& handle);
119 
120  void EnqueueOutput(const BindableLayer& layer, const Tensor& outputTensor, WorkingMemHandle& handle);
121 
122  void ProcessExecPriorities(std::unique_ptr<IWorkingMemHandle> workingMemHandle);
123 
124  bool Execute(std::unique_ptr<profiling::TimelineUtilityMethods>& timelineUtils,
125  profiling::ProfilingGuid inferenceGuid);
126 
127  void CreateThreadPool(std::size_t numThreads);
128 
129  void TerminateThreadPool() noexcept;
130 
131  const IWorkloadFactory& GetWorkloadFactory(const Layer& layer) const;
132 
133  using BackendPtrMap = std::unordered_map<BackendId, IBackendInternalUniquePtr>;
134 
135  BackendPtrMap m_Backends;
136  WorkloadFactoryMap m_WorkloadFactories;
137 
138  std::unique_ptr<IOptimizedNetwork> m_OptimizedNetwork;
139  std::shared_ptr<IProfiler> m_Profiler;
140 
141  WorkloadQueue m_InputQueue;
142  WorkloadQueue m_WorkloadQueue;
143  WorkloadQueue m_OutputQueue;
144 
145  mutable std::mutex m_WorkingMemMutex;
146 
147  bool m_IsWorkingMemAllocated = false;
148 
149  std::vector<std::unique_ptr<std::thread>> m_Threads;
150  std::stack<IWorkingMemHandle> m_WorkingMemHandles;
151 
152  ExecutionQueue m_HighPriorityQueue;
153  ExecutionQueue m_MediumPriorityQueue;
154  ExecutionQueue m_LowPriorityQueue;
155 
156  // Condition Variables require mutex which will guard the shared state.
157  // Has an event happened? Stop signal for example
158  std::condition_variable m_ThreadPoolEvent;
159  std::mutex m_ThreadPoolMutex;
160 
161  // The shared state for conditional variable
162  bool m_TerminatePool = false;
163 
164  INetworkProperties m_NetworkProperties;
165 
166  const NetworkId m_NetworkId;
167 
168  TensorHandleFactoryRegistry m_TensorHandleFactoryRegistry;
169 
170  profiling::ProfilingService& m_ProfilingService;
171 };
172 
173 }
std::vector< std::pair< LayerBindingId, class ConstTensor > > InputTensors
Definition: Tensor.hpp:340
Copyright (c) 2021 ARM Limited and Contributors.
std::function< void(LayerGuid guid, unsigned int slotIndex, ITensorHandle *tensorHandle)> DebugCallbackFunction
Define the type of callback for the Debug layer to call.
Definition: Types.hpp:316
int LayerBindingId
Type of identifiers for bindable layers (inputs, outputs).
Definition: Types.hpp:243
std::tuple< InputTensors, OutputTensors, std::shared_ptr< IAsyncExecutionCallback > > ExecutionTuple
A tensor defined by a TensorInfo (shape and data type) and a mutable backing store.
Definition: Tensor.hpp:306
int NetworkId
Definition: IRuntime.hpp:22
A tensor defined by a TensorInfo (shape and data type) and an immutable backing store.
Definition: Tensor.hpp:314
std::vector< std::pair< LayerBindingId, class Tensor > > OutputTensors
Definition: Tensor.hpp:341
Status
enumeration
Definition: Types.hpp:30
std::vector< std::unique_ptr< IWorkload > > WorkloadQueue
const std::shared_ptr< IProfiler > & GetProfiler() const
QosExecPriority
Definition: Types.hpp:60
std::queue< std::shared_ptr< ExecutionTuple > > ExecutionQueue