ArmNN
 21.11
IRuntime.hpp
Go to the documentation of this file.
1 //
2 // Copyright © 2017 Arm Ltd. All rights reserved.
3 // SPDX-License-Identifier: MIT
4 //
5 #pragma once
6 
7 #include "BackendOptions.hpp"
8 #include "INetwork.hpp"
9 #include "IProfiler.hpp"
10 #include "IWorkingMemHandle.hpp"
12 #include "Tensor.hpp"
13 #include "Types.hpp"
14 #include "TypesUtils.hpp"
16 
19 #include <memory>
20 #include <map>
21 
22 namespace armnn
23 {
24 
25 using NetworkId = int;
26 
28 
29 struct RuntimeImpl;
30 class IRuntime;
31 using IRuntimePtr = std::unique_ptr<IRuntime, void(*)(IRuntime* runtime)>;
32 
34 {
35  ARMNN_DEPRECATED_MSG_REMOVAL_DATE("Please use INetworkProperties constructor with MemorySource argument", "22.02")
36  INetworkProperties(bool importEnabled = false,
37  bool exportEnabled = false,
38  bool asyncEnabled = false,
39  bool profilingEnabled = false)
40  : m_ImportEnabled(importEnabled),
41  m_ExportEnabled(exportEnabled),
42  m_AsyncEnabled(asyncEnabled),
43  m_ProfilingEnabled(profilingEnabled),
48  {}
49 
50  ARMNN_DEPRECATED_MSG_REMOVAL_DATE("Please use INetworkProperties constructor without numThreads argument", "22.02")
51  INetworkProperties(bool asyncEnabled,
52  MemorySource inputSource,
53  MemorySource outputSource,
54  size_t numThreads,
55  bool profilingEnabled = false)
56  : m_ImportEnabled(inputSource != MemorySource::Undefined),
57  m_ExportEnabled(outputSource != MemorySource::Undefined),
58  m_AsyncEnabled(asyncEnabled),
59  m_ProfilingEnabled(profilingEnabled),
61  m_InputSource(inputSource),
62  m_OutputSource(outputSource),
64  {
65  armnn::IgnoreUnused(numThreads);
66  }
67 
68  INetworkProperties(bool asyncEnabled,
69  MemorySource inputSource,
70  MemorySource outputSource,
71  bool profilingEnabled = false,
73  bool externalMemoryManagementEnabled = false)
74  : m_ImportEnabled(inputSource != MemorySource::Undefined),
75  m_ExportEnabled(outputSource != MemorySource::Undefined),
76  m_AsyncEnabled(asyncEnabled),
77  m_ProfilingEnabled(profilingEnabled),
78  m_OutputNetworkDetailsMethod(detailsMethod),
79  m_InputSource(inputSource),
80  m_OutputSource(outputSource),
81  m_ExternalMemoryManagementEnabled(externalMemoryManagementEnabled)
82  {}
83 
84  /// Deprecated and will be removed in future release.
85  const bool m_ImportEnabled;
86  /// Deprecated and will be removed in future release.
87  const bool m_ExportEnabled;
88 
89  const bool m_AsyncEnabled;
90 
91  const bool m_ProfilingEnabled;
92 
94 
97 
99 
100  virtual ~INetworkProperties() {}
101 };
102 
103 using namespace armnn::experimental;
104 
105 class IRuntime
106 {
107 public:
109  {
111  : m_GpuAccTunedParameters(nullptr)
112  , m_EnableGpuProfiling(false)
113  , m_DynamicBackendsPath("")
114  , m_ProtectedMode(false)
115  , m_CustomAllocatorMap()
116  , m_MemoryOptimizerStrategyMap()
117  {}
118 
119  /// If set, uses the GpuAcc tuned parameters from the given object when executing GPU workloads.
120  /// It will also be updated with new tuned parameters if it is configured to do so.
121  std::shared_ptr<IGpuAccTunedParameters> m_GpuAccTunedParameters;
122 
123  /// Setting this flag will allow the user to obtain GPU profiling information from the runtime.
125 
126  /// Setting this value will override the paths set by the DYNAMIC_BACKEND_PATHS compiler directive
127  /// Only a single path is allowed for the override
128  /// It defines the path to search for any [dynamic backend libraries](src/dynamic/README.md).
130 
131  /// Setting this flag will allow the user to create the Runtime in protected mode.
132  /// It will run all the inferences on protected memory and will make sure that
133  /// INetworkProperties::m_ImportEnabled set to true with MemorySource::DmaBufProtected option
134  /// This requires that the backend supports Protected Memory and has an allocator capable of
135  /// allocating Protected Memory associated with it.
137 
138  /// @brief A map to define a custom memory allocator for specific backend Ids.
139  ///
140  /// @details A Custom Allocator is used for allocation of working memory in the backends.
141  /// Set this if you need to take control of how memory is allocated on a backend. Required for
142  /// Protected Mode in order to correctly allocate Protected Memory
143  ///
144  /// @note Only supported for GpuAcc
145  std::map<BackendId, std::shared_ptr<ICustomAllocator>> m_CustomAllocatorMap;
146 
147  /// @brief A map to define a custom memory optimizer strategy for specific backend Ids.
148  ///
149  /// @details A Memory Optimizer Strategy provides a solution to an abstract representation of
150  /// a network's memory requirements. This can also be used to return a pre-computed solution
151  /// for a specific network. Set this if you want to implement a Custom Memory Optimizer Strategy
152  /// for a given backend.
153  std::map<BackendId, std::shared_ptr<IMemoryOptimizerStrategy>> m_MemoryOptimizerStrategyMap;
154 
156  {
158  : m_EnableProfiling(false)
159  , m_TimelineEnabled(false)
160  , m_OutgoingCaptureFile("")
161  , m_IncomingCaptureFile("")
162  , m_FileOnly(false)
163  , m_CapturePeriod(LOWEST_CAPTURE_PERIOD)
164  , m_FileFormat("binary")
165  , m_LocalPacketHandlers()
166  {}
167 
168  /// Indicates whether external profiling is enabled or not.
170  /// Indicates whether external timeline profiling is enabled or not.
172  /// Path to a file in which outgoing timeline profiling messages will be stored.
174  /// Path to a file in which incoming timeline profiling messages will be stored.
176  /// Enable profiling output to file only.
178  /// The duration at which captured profiling messages will be flushed.
179  uint32_t m_CapturePeriod;
180  /// The format of the file used for outputting profiling data.
181  std::string m_FileFormat;
182  std::vector<armnn::profiling::ILocalPacketHandlerSharedPtr> m_LocalPacketHandlers;
183  };
185 
186  /// Pass backend specific options.
187  ///
188  /// For example, to enable GpuAcc tuning add the following
189  /// ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~.cpp
190  /// m_BackendOption.emplace_back(
191  /// BackendOptions{"GpuAcc",
192  /// {
193  /// {"TuningLevel", 2},
194  /// {"TuningFile", filename}
195  /// {"MemoryOptimizerStrategy", strategyname}
196  /// }
197  /// });
198  /// ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
199  /// Execute representative workloads through the runtime to generate tuning data.
200  /// The tuning file is written once the runtime is destroyed
201 
202  /// To execute with the tuning data, start up with just the tuning file specified.
203  /// ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~.cpp
204  /// m_BackendOption.emplace_back(
205  /// BackendOptions{"GpuAcc",
206  /// {
207  /// {"TuningFile", filename}
208  /// }
209  /// });
210  /// ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
211 
212  /// The following backend options are available:
213  /// AllBackends:
214  /// "MemoryOptimizerStrategy" : string [stategynameString]
215  /// (Existing Memory Optimizer Strategies: ConstantMemoryStrategy)
216  /// GpuAcc:
217  /// "TuningLevel" : int [0..3] (0=UseOnly(default) | 1=RapidTuning | 2=NormalTuning | 3=ExhaustiveTuning)
218  /// "TuningFile" : string [filenameString]
219  /// "KernelProfilingEnabled" : bool [true | false]
220  std::vector<BackendOptions> m_BackendOptions;
221  };
222 
223  static IRuntime* CreateRaw(const CreationOptions& options);
224  static IRuntimePtr Create(const CreationOptions& options);
225  static void Destroy(IRuntime* runtime);
226 
227  /// Loads a complete network into the IRuntime.
228  /// @param [out] networkIdOut - Unique identifier for the network is returned in this reference.
229  /// @param [in] network - Complete network to load into the IRuntime.
230  /// The runtime takes ownership of the network once passed in.
231  /// @return armnn::Status
232  Status LoadNetwork(NetworkId& networkIdOut, IOptimizedNetworkPtr network);
233 
234  /// Load a complete network into the IRuntime.
235  /// @param [out] networkIdOut Unique identifier for the network is returned in this reference.
236  /// @param [in] network Complete network to load into the IRuntime.
237  /// @param [out] errorMessage Error message if there were any errors.
238  /// The runtime takes ownership of the network once passed in.
239  /// @return armnn::Status
240  Status LoadNetwork(NetworkId& networkIdOut,
241  IOptimizedNetworkPtr network,
242  std::string& errorMessage);
243 
244  Status LoadNetwork(NetworkId& networkIdOut,
245  IOptimizedNetworkPtr network,
246  std::string& errorMessage,
247  const INetworkProperties& networkProperties);
248 
249  TensorInfo GetInputTensorInfo(NetworkId networkId, LayerBindingId layerId) const;
250  TensorInfo GetOutputTensorInfo(NetworkId networkId, LayerBindingId layerId) const;
251 
252 
253  /// ImportInputs separates the importing and mapping of InputTensors from network execution.
254  /// Allowing for a set of InputTensors to be imported and mapped once, but used in execution many times.
255  /// This function is not thread safe and must not be used while other threads are calling Execute().
256  /// Only compatible with AsyncEnabled networks
257  std::vector<ImportedInputId> ImportInputs(NetworkId networkId, const InputTensors& inputTensors);
258 
259  /// ImportOutputs separates the importing and mapping of OutputTensors from network execution.
260  /// Allowing for a set of OutputTensors to be imported and mapped once, but used in execution many times.
261  /// This function is not thread safe and must not be used while other threads are calling Execute().
262  /// Only compatible with AsyncEnabled networks
263  std::vector<ImportedOutputId> ImportOutputs(NetworkId networkId, const OutputTensors& outputTensors);
264 
265  /// Un-import and delete the imported InputTensor/s
266  /// This function is not thread safe and must not be used while other threads are calling Execute().
267  /// Only compatible with AsyncEnabled networks
268  void ClearImportedInputs(NetworkId networkId, const std::vector<ImportedInputId> inputIds);
269 
270  /// Un-import and delete the imported OutputTensor/s
271  /// This function is not thread safe and must not be used while other threads are calling Execute().
272  /// Only compatible with AsyncEnabled networks
273  void ClearImportedOutputs(NetworkId networkId, const std::vector<ImportedOutputId> outputIds);
274 
275  /// Evaluates a network using input in inputTensors and outputs filled into outputTensors
276  Status EnqueueWorkload(NetworkId networkId,
277  const InputTensors& inputTensors,
278  const OutputTensors& outputTensors);
279 
280  /// This is an experimental function.
281  /// Evaluates a network using input in inputTensors and outputs filled into outputTensors.
282  /// This function performs a thread safe execution of the network. Returns once execution is complete.
283  /// Will block until this and any other thread using the same workingMem object completes.
284  Status Execute(IWorkingMemHandle& workingMemHandle,
285  const InputTensors& inputTensors,
286  const OutputTensors& outputTensors,
287  std::vector<ImportedInputId> preImportedInputs = {},
288  std::vector<ImportedOutputId> preImportedOutputs = {});
289 
290  /// Unloads a network from the IRuntime.
291  /// At the moment this only removes the network from the m_Impl->m_Network.
292  /// This might need more work in the future to be AndroidNN compliant.
293  /// @param [in] networkId - Unique identifier for the network to be unloaded. Generated in LoadNetwork().
294  /// @return armnn::Status
295  Status UnloadNetwork(NetworkId networkId);
296 
297  const IDeviceSpec& GetDeviceSpec() const;
298 
299  /// Create a new unique WorkingMemHandle object. Create multiple handles if you wish to have
300  /// overlapped Execution by calling this function from different threads.
301  std::unique_ptr<IWorkingMemHandle> CreateWorkingMemHandle(NetworkId networkId);
302 
303  /// Gets the profiler corresponding to the given network id.
304  /// @param networkId The id of the network for which to get the profile.
305  /// @return A pointer to the requested profiler, or nullptr if not found.
306  const std::shared_ptr<IProfiler> GetProfiler(NetworkId networkId) const;
307 
308  /// Registers a callback function to debug layers performing custom computations on intermediate tensors.
309  /// @param networkId The id of the network to register the callback.
310  /// @param func callback function to pass to the debug layer.
311  void RegisterDebugCallback(NetworkId networkId, const DebugCallbackFunction& func);
312 
313 protected:
314  IRuntime();
315  IRuntime(const IRuntime::CreationOptions& options);
316  ~IRuntime();
317 
318  std::unique_ptr<RuntimeImpl> pRuntimeImpl;
319 };
320 
321 
322 /// The following API is replaced by the backend options API.
323 using IGpuAccTunedParametersPtr = std::shared_ptr<IGpuAccTunedParameters>;
324 
325 /// Manages a set of GpuAcc parameters which have been tuned for maximum performance.
326 /// Passes an instance of this object to the IRuntime::Create() method (via IRuntime::CreationOptions) to use it
327 /// for all GPU workload execution.
328 ///
329 /// Can be created in two modes:
330 /// - In UseTunedParameters mode, the parameters stored in this object are used to execute GPU workloads.
331 /// - In UpdateTunedParameters mode, additionally, whenever a GPU workload is executed for the first time, the
332 /// optimum parameters will be found and stored in this object. WARNING - This tuning can be slow.
333 ///
334 /// The parameters can be loaded from and saved to a file so that you can first run a slow initial read-write
335 /// execution, save the parameters for later and then run fast read-only executions using the optimised parameters.
337 {
338 public:
339  enum class Mode
340  {
341  UseTunedParameters,
342  UpdateTunedParameters
343  };
344 
345  enum class TuningLevel
346  {
347  Rapid = 1,
348  Normal = 2,
349  Exhaustive = 3
350  };
351 
352  /// Creates an IClTunedParameters with the given mode.
353  /// @{
354  static IGpuAccTunedParameters* CreateRaw(Mode mode, TuningLevel tunerMode);
355  static IGpuAccTunedParametersPtr Create(Mode mode, TuningLevel tunerMode);
356  /// @}
357  static void Destroy(IGpuAccTunedParameters* params);
358 
359  /// Loads an existing set of tuned parameters from the given file.
360  /// If there is an error loading the file, an armnn::Exception is thrown.
361  virtual void Load(const char* filename) = 0;
362 
363  /// Saves the current set of tuned parameters to the given file.
364  /// If there is an error saving to the file, an armnn::Exception is thrown.
365  virtual void Save(const char* filename) const = 0;
366 
367 protected:
369 };
370 
371 } // namespace armnn
const MemorySource m_InputSource
Definition: IRuntime.hpp:95
const bool m_ImportEnabled
Deprecated and will be removed in future release.
Definition: IRuntime.hpp:85
std::string m_OutgoingCaptureFile
Path to a file in which outgoing timeline profiling messages will be stored.
Definition: IRuntime.hpp:173
std::unique_ptr< IRuntime, void(*)(IRuntime *runtime)> IRuntimePtr
Definition: IRuntime.hpp:31
std::vector< std::pair< LayerBindingId, class ConstTensor > > InputTensors
Definition: Tensor.hpp:392
const ProfilingDetailsMethod m_OutputNetworkDetailsMethod
Definition: IRuntime.hpp:93
std::map< BackendId, std::shared_ptr< IMemoryOptimizerStrategy > > m_MemoryOptimizerStrategyMap
A map to define a custom memory optimizer strategy for specific backend Ids.
Definition: IRuntime.hpp:153
Copyright (c) 2021 ARM Limited and Contributors.
void IgnoreUnused(Ts &&...)
std::string m_IncomingCaptureFile
Path to a file in which incoming timeline profiling messages will be stored.
Definition: IRuntime.hpp:175
bool m_EnableProfiling
Indicates whether external profiling is enabled or not.
Definition: IRuntime.hpp:169
bool m_FileOnly
Enable profiling output to file only.
Definition: IRuntime.hpp:177
std::function< void(LayerGuid guid, unsigned int slotIndex, ITensorHandle *tensorHandle)> DebugCallbackFunction
Define the type of callback for the Debug layer to call.
Definition: Types.hpp:357
int LayerBindingId
Type of identifiers for bindable layers (inputs, outputs).
Definition: Types.hpp:277
ProfilingDetailsMethod
Define the behaviour of the internal profiler when outputting network details.
Definition: Types.hpp:58
std::shared_ptr< IGpuAccTunedParameters > m_GpuAccTunedParameters
If set, uses the GpuAcc tuned parameters from the given object when executing GPU workloads...
Definition: IRuntime.hpp:121
std::vector< BackendOptions > m_BackendOptions
Pass backend specific options.
Definition: IRuntime.hpp:220
const bool m_ExternalMemoryManagementEnabled
Definition: IRuntime.hpp:98
std::vector< armnn::profiling::ILocalPacketHandlerSharedPtr > m_LocalPacketHandlers
Definition: IRuntime.hpp:182
int NetworkId
Definition: IRuntime.hpp:25
std::vector< std::pair< LayerBindingId, class Tensor > > OutputTensors
Definition: Tensor.hpp:393
Status
enumeration
Definition: Types.hpp:29
std::unique_ptr< IOptimizedNetwork, void(*)(IOptimizedNetwork *network)> IOptimizedNetworkPtr
Definition: INetwork.hpp:198
INetworkProperties(bool asyncEnabled, MemorySource inputSource, MemorySource outputSource, bool profilingEnabled=false, ProfilingDetailsMethod detailsMethod=ProfilingDetailsMethod::Undefined, bool externalMemoryManagementEnabled=false)
Definition: IRuntime.hpp:68
const bool m_ExportEnabled
Deprecated and will be removed in future release.
Definition: IRuntime.hpp:87
std::map< BackendId, std::shared_ptr< ICustomAllocator > > m_CustomAllocatorMap
A map to define a custom memory allocator for specific backend Ids.
Definition: IRuntime.hpp:145
std::unique_ptr< RuntimeImpl > pRuntimeImpl
Definition: IRuntime.hpp:318
Device specific knowledge to be passed to the optimizer.
Definition: Types.hpp:267
constexpr unsigned int LOWEST_CAPTURE_PERIOD
The lowest performance data capture interval we support is 10 miliseconds.
Definition: Types.hpp:21
class ARMNN_DEPRECATED_MSG_REMOVAL_DATE("Use ABI stable IStrategy instead.", "22.05") ILayerVisitor
std::shared_ptr< IGpuAccTunedParameters > IGpuAccTunedParametersPtr
The following API is replaced by the backend options API.
Definition: IRuntime.hpp:323
bool m_ProtectedMode
Setting this flag will allow the user to create the Runtime in protected mode.
Definition: IRuntime.hpp:136
std::string m_DynamicBackendsPath
Setting this value will override the paths set by the DYNAMIC_BACKEND_PATHS compiler directive Only a...
Definition: IRuntime.hpp:129
bool m_EnableGpuProfiling
Setting this flag will allow the user to obtain GPU profiling information from the runtime...
Definition: IRuntime.hpp:124
Manages a set of GpuAcc parameters which have been tuned for maximum performance. ...
Definition: IRuntime.hpp:336
MemorySource
Define the Memory Source to reduce copies.
Definition: Types.hpp:217
uint32_t m_CapturePeriod
The duration at which captured profiling messages will be flushed.
Definition: IRuntime.hpp:179
bool m_TimelineEnabled
Indicates whether external timeline profiling is enabled or not.
Definition: IRuntime.hpp:171
const MemorySource m_OutputSource
Definition: IRuntime.hpp:96
ExternalProfilingOptions m_ProfilingOptions
Definition: IRuntime.hpp:184
const bool m_ProfilingEnabled
Definition: IRuntime.hpp:91
std::string m_FileFormat
The format of the file used for outputting profiling data.
Definition: IRuntime.hpp:181