ArmNN
 22.08
IRuntime.hpp
Go to the documentation of this file.
1 //
2 // Copyright © 2017 Arm Ltd. All rights reserved.
3 // SPDX-License-Identifier: MIT
4 //
5 #pragma once
6 
7 #include "BackendOptions.hpp"
8 #include "INetwork.hpp"
9 #include "IProfiler.hpp"
10 #include "IWorkingMemHandle.hpp"
12 #include "Tensor.hpp"
13 #include "Types.hpp"
14 #include "TypesUtils.hpp"
15 
18 
19 #include <client/include/ILocalPacketHandler.hpp>
20 
21 #include <memory>
22 #include <map>
23 
24 namespace armnn
25 {
26 
27 using NetworkId = int;
28 
30 
31 struct RuntimeImpl;
32 class IRuntime;
33 using IRuntimePtr = std::unique_ptr<IRuntime, void(*)(IRuntime* runtime)>;
34 
36 {
37  INetworkProperties(bool asyncEnabled,
38  MemorySource inputSource,
39  MemorySource outputSource,
40  bool profilingEnabled = false,
42  bool externalMemoryManagementEnabled = false)
43  : m_ImportEnabled(inputSource != MemorySource::Undefined),
44  m_ExportEnabled(outputSource != MemorySource::Undefined),
45  m_AsyncEnabled(asyncEnabled),
46  m_ProfilingEnabled(profilingEnabled),
47  m_OutputNetworkDetailsMethod(detailsMethod),
48  m_InputSource(inputSource),
49  m_OutputSource(outputSource),
50  m_ExternalMemoryManagementEnabled(externalMemoryManagementEnabled)
51  {}
52 
53  /// Deprecated and will be removed in future release.
54  const bool m_ImportEnabled;
55  /// Deprecated and will be removed in future release.
56  const bool m_ExportEnabled;
57 
58  const bool m_AsyncEnabled;
59 
60  const bool m_ProfilingEnabled;
61 
63 
66 
68 
69  virtual ~INetworkProperties() {}
70 };
71 
72 using namespace armnn::experimental;
73 
74 class IRuntime
75 {
76 public:
78  {
80  : m_GpuAccTunedParameters(nullptr)
81  , m_EnableGpuProfiling(false)
82  , m_DynamicBackendsPath("")
83  , m_ProtectedMode(false)
84  , m_CustomAllocatorMap()
85  , m_MemoryOptimizerStrategyMap()
86  {}
87 
88  /// If set, uses the GpuAcc tuned parameters from the given object when executing GPU workloads.
89  /// It will also be updated with new tuned parameters if it is configured to do so.
90  std::shared_ptr<IGpuAccTunedParameters> m_GpuAccTunedParameters;
91 
92  /// Setting this flag will allow the user to obtain GPU profiling information from the runtime.
94 
95  /// Setting this value will override the paths set by the DYNAMIC_BACKEND_PATHS compiler directive
96  /// Only a single path is allowed for the override
97  /// It defines the path to search for any [dynamic backend libraries](src/dynamic/README.md).
98  std::string m_DynamicBackendsPath;
99 
100  /// Setting this flag will allow the user to create the Runtime in protected mode.
101  /// It will run all the inferences on protected memory and will make sure that
102  /// INetworkProperties::m_ImportEnabled set to true with MemorySource::DmaBufProtected option
103  /// This requires that the backend supports Protected Memory and has an allocator capable of
104  /// allocating Protected Memory associated with it.
106 
107  /// @brief A map to define a custom memory allocator for specific backend Ids.
108  ///
109  /// @details A Custom Allocator is used for allocation of working memory in the backends.
110  /// Set this if you need to take control of how memory is allocated on a backend. Required for
111  /// Protected Mode in order to correctly allocate Protected Memory
112  ///
113  /// @note Only supported for GpuAcc
114  std::map<BackendId, std::shared_ptr<ICustomAllocator>> m_CustomAllocatorMap;
115 
116  /// @brief A map to define a custom memory optimizer strategy for specific backend Ids.
117  ///
118  /// @details A Memory Optimizer Strategy provides a solution to an abstract representation of
119  /// a network's memory requirements. This can also be used to return a pre-computed solution
120  /// for a specific network. Set this if you want to implement a Custom Memory Optimizer Strategy
121  /// for a given backend.
122  std::map<BackendId, std::shared_ptr<IMemoryOptimizerStrategy>> m_MemoryOptimizerStrategyMap;
123 
125  {
127  : m_EnableProfiling(false)
128  , m_TimelineEnabled(false)
129  , m_OutgoingCaptureFile("")
130  , m_IncomingCaptureFile("")
131  , m_FileOnly(false)
132  , m_CapturePeriod(LOWEST_CAPTURE_PERIOD)
133  , m_FileFormat("binary")
134  , m_LocalPacketHandlers()
135  {}
136 
137  /// Indicates whether external profiling is enabled or not.
139  /// Indicates whether external timeline profiling is enabled or not.
141  /// Path to a file in which outgoing timeline profiling messages will be stored.
143  /// Path to a file in which incoming timeline profiling messages will be stored.
145  /// Enable profiling output to file only.
147  /// The duration at which captured profiling messages will be flushed.
148  uint32_t m_CapturePeriod;
149  /// The format of the file used for outputting profiling data.
150  std::string m_FileFormat;
151  std::vector<arm::pipe::ILocalPacketHandlerSharedPtr> m_LocalPacketHandlers;
152  };
154 
155  /// Pass backend specific options.
156  ///
157  /// For example, to enable GpuAcc tuning add the following
158  /// ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~.cpp
159  /// m_BackendOption.emplace_back(
160  /// BackendOptions{"GpuAcc",
161  /// {
162  /// {"TuningLevel", 2},
163  /// {"TuningFile", filename}
164  /// {"MemoryOptimizerStrategy", strategyname}
165  /// }
166  /// });
167  /// ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
168  /// Execute representative workloads through the runtime to generate tuning data.
169  /// The tuning file is written once the runtime is destroyed
170 
171  /// To execute with the tuning data, start up with just the tuning file specified.
172  /// ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~.cpp
173  /// m_BackendOption.emplace_back(
174  /// BackendOptions{"GpuAcc",
175  /// {
176  /// {"TuningFile", filename}
177  /// }
178  /// });
179  /// ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
180 
181  /// The following backend options are available:
182  /// AllBackends:
183  /// "MemoryOptimizerStrategy" : string [stategynameString]
184  /// (Existing Memory Optimizer Strategies: ConstantMemoryStrategy)
185  /// GpuAcc:
186  /// "TuningLevel" : int [0..3] (0=UseOnly(default) | 1=RapidTuning | 2=NormalTuning | 3=ExhaustiveTuning)
187  /// "TuningFile" : string [filenameString]
188  /// "KernelProfilingEnabled" : bool [true | false]
189  std::vector<BackendOptions> m_BackendOptions;
190  };
191 
192  static IRuntime* CreateRaw(const CreationOptions& options);
193  static IRuntimePtr Create(const CreationOptions& options);
194  static void Destroy(IRuntime* runtime);
195 
196  /// Loads a complete network into the IRuntime.
197  /// @param [out] networkIdOut - Unique identifier for the network is returned in this reference.
198  /// @param [in] network - Complete network to load into the IRuntime.
199  /// The runtime takes ownership of the network once passed in.
200  /// @return armnn::Status
201  Status LoadNetwork(NetworkId& networkIdOut, IOptimizedNetworkPtr network);
202 
203  /// Load a complete network into the IRuntime.
204  /// @param [out] networkIdOut Unique identifier for the network is returned in this reference.
205  /// @param [in] network Complete network to load into the IRuntime.
206  /// @param [out] errorMessage Error message if there were any errors.
207  /// The runtime takes ownership of the network once passed in.
208  /// @return armnn::Status
209  Status LoadNetwork(NetworkId& networkIdOut,
210  IOptimizedNetworkPtr network,
211  std::string& errorMessage);
212 
213  Status LoadNetwork(NetworkId& networkIdOut,
214  IOptimizedNetworkPtr network,
215  std::string& errorMessage,
216  const INetworkProperties& networkProperties);
217 
218  TensorInfo GetInputTensorInfo(NetworkId networkId, LayerBindingId layerId) const;
219  TensorInfo GetOutputTensorInfo(NetworkId networkId, LayerBindingId layerId) const;
220 
221  /// ImportInputs separates the importing and mapping of InputTensors from network execution.
222  /// Allowing for a set of InputTensors to be imported and mapped once, but used in execution many times.
223  /// This function is not thread safe and must not be used while other threads are calling Execute().
224  /// No exceptions are thrown for failed imports. It is the caller's responsibility to check whether
225  /// tensors have been successfully imported by comparing returned ids with those passed in the InputTensors.
226  /// Whether a tensor can be imported or not is backend specific.
227  std::vector<ImportedInputId> ImportInputs(NetworkId networkId, const InputTensors& inputTensors,
228  MemorySource forceImportMemorySource = MemorySource::Undefined);
229 
230  /// ImportOutputs separates the importing and mapping of OutputTensors from network execution.
231  /// Allowing for a set of OutputTensors to be imported and mapped once, but used in execution many times.
232  /// This function is not thread safe and must not be used while other threads are calling Execute().
233  /// No exceptions are thrown for failed imports. It is the caller's responsibility to check whether
234  /// tensors have been successfully imported by comparing returned ids with those passed in the OutputTensors.
235  /// Whether a tensor can be imported or not is backend specific.
236  std::vector<ImportedOutputId> ImportOutputs(NetworkId networkId, const OutputTensors& outputTensors,
237  MemorySource forceImportMemorySource = MemorySource::Undefined);
238 
239  /// Un-import and delete the imported InputTensor/s
240  /// This function is not thread safe and must not be used while other threads are calling Execute().
241  /// Only compatible with AsyncEnabled networks
242  void ClearImportedInputs(NetworkId networkId, const std::vector<ImportedInputId> inputIds);
243 
244  /// Un-import and delete the imported OutputTensor/s
245  /// This function is not thread safe and must not be used while other threads are calling Execute().
246  /// Only compatible with AsyncEnabled networks
247  void ClearImportedOutputs(NetworkId networkId, const std::vector<ImportedOutputId> outputIds);
248 
249  /// Evaluates a network using input in inputTensors and outputs filled into outputTensors
250  Status EnqueueWorkload(NetworkId networkId,
251  const InputTensors& inputTensors,
252  const OutputTensors& outputTensors,
253  std::vector<ImportedInputId> preImportedInputIds = {},
254  std::vector<ImportedOutputId> preImportedOutputIds = {});
255 
256  /// This is an experimental function.
257  /// Evaluates a network using input in inputTensors and outputs filled into outputTensors.
258  /// This function performs a thread safe execution of the network. Returns once execution is complete.
259  /// Will block until this and any other thread using the same workingMem object completes.
260  Status Execute(IWorkingMemHandle& workingMemHandle,
261  const InputTensors& inputTensors,
262  const OutputTensors& outputTensors,
263  std::vector<ImportedInputId> preImportedInputs = {},
264  std::vector<ImportedOutputId> preImportedOutputs = {});
265 
266  /// Unloads a network from the IRuntime.
267  /// At the moment this only removes the network from the m_Impl->m_Network.
268  /// This might need more work in the future to be AndroidNN compliant.
269  /// @param [in] networkId - Unique identifier for the network to be unloaded. Generated in LoadNetwork().
270  /// @return armnn::Status
271  Status UnloadNetwork(NetworkId networkId);
272 
273  const IDeviceSpec& GetDeviceSpec() const;
274 
275  /// Create a new unique WorkingMemHandle object. Create multiple handles if you wish to have
276  /// overlapped Execution by calling this function from different threads.
277  std::unique_ptr<IWorkingMemHandle> CreateWorkingMemHandle(NetworkId networkId);
278 
279  /// Gets the profiler corresponding to the given network id.
280  /// @param networkId The id of the network for which to get the profile.
281  /// @return A pointer to the requested profiler, or nullptr if not found.
282  const std::shared_ptr<IProfiler> GetProfiler(NetworkId networkId) const;
283 
284  /// Registers a callback function to debug layers performing custom computations on intermediate tensors.
285  /// @param networkId The id of the network to register the callback.
286  /// @param func callback function to pass to the debug layer.
287  void RegisterDebugCallback(NetworkId networkId, const DebugCallbackFunction& func);
288 
289 protected:
290  IRuntime();
291  IRuntime(const IRuntime::CreationOptions& options);
292  ~IRuntime();
293 
294  std::unique_ptr<RuntimeImpl> pRuntimeImpl;
295 };
296 
297 
298 /// The following API is replaced by the backend options API.
299 using IGpuAccTunedParametersPtr = std::shared_ptr<IGpuAccTunedParameters>;
300 
301 /// Manages a set of GpuAcc parameters which have been tuned for maximum performance.
302 /// Passes an instance of this object to the IRuntime::Create() method (via IRuntime::CreationOptions) to use it
303 /// for all GPU workload execution.
304 ///
305 /// Can be created in two modes:
306 /// - In UseTunedParameters mode, the parameters stored in this object are used to execute GPU workloads.
307 /// - In UpdateTunedParameters mode, additionally, whenever a GPU workload is executed for the first time, the
308 /// optimum parameters will be found and stored in this object. WARNING - This tuning can be slow.
309 ///
310 /// The parameters can be loaded from and saved to a file so that you can first run a slow initial read-write
311 /// execution, save the parameters for later and then run fast read-only executions using the optimised parameters.
313 {
314 public:
315  enum class Mode
316  {
317  UseTunedParameters,
318  UpdateTunedParameters
319  };
320 
321  enum class TuningLevel
322  {
323  Rapid = 1,
324  Normal = 2,
325  Exhaustive = 3
326  };
327 
328  /// Creates an IClTunedParameters with the given mode.
329  /// @{
330  static IGpuAccTunedParameters* CreateRaw(Mode mode, TuningLevel tunerMode);
331  static IGpuAccTunedParametersPtr Create(Mode mode, TuningLevel tunerMode);
332  /// @}
333  static void Destroy(IGpuAccTunedParameters* params);
334 
335  /// Loads an existing set of tuned parameters from the given file.
336  /// If there is an error loading the file, an armnn::Exception is thrown.
337  virtual void Load(const char* filename) = 0;
338 
339  /// Saves the current set of tuned parameters to the given file.
340  /// If there is an error saving to the file, an armnn::Exception is thrown.
341  virtual void Save(const char* filename) const = 0;
342 
343 protected:
345 };
346 
347 } // namespace armnn
const MemorySource m_InputSource
Definition: IRuntime.hpp:64
const bool m_ImportEnabled
Deprecated and will be removed in future release.
Definition: IRuntime.hpp:54
std::string m_OutgoingCaptureFile
Path to a file in which outgoing timeline profiling messages will be stored.
Definition: IRuntime.hpp:142
std::vector< arm::pipe::ILocalPacketHandlerSharedPtr > m_LocalPacketHandlers
Definition: IRuntime.hpp:151
std::unique_ptr< IRuntime, void(*)(IRuntime *runtime)> IRuntimePtr
Definition: IRuntime.hpp:33
std::vector< std::pair< LayerBindingId, class ConstTensor > > InputTensors
Definition: Tensor.hpp:392
const ProfilingDetailsMethod m_OutputNetworkDetailsMethod
Definition: IRuntime.hpp:62
std::map< BackendId, std::shared_ptr< IMemoryOptimizerStrategy > > m_MemoryOptimizerStrategyMap
A map to define a custom memory optimizer strategy for specific backend Ids.
Definition: IRuntime.hpp:122
Copyright (c) 2021 ARM Limited and Contributors.
std::string m_IncomingCaptureFile
Path to a file in which incoming timeline profiling messages will be stored.
Definition: IRuntime.hpp:144
bool m_EnableProfiling
Indicates whether external profiling is enabled or not.
Definition: IRuntime.hpp:138
bool m_FileOnly
Enable profiling output to file only.
Definition: IRuntime.hpp:146
std::function< void(LayerGuid guid, unsigned int slotIndex, ITensorHandle *tensorHandle)> DebugCallbackFunction
Define the type of callback for the Debug layer to call.
Definition: Types.hpp:379
int LayerBindingId
Type of identifiers for bindable layers (inputs, outputs).
Definition: Types.hpp:290
ProfilingDetailsMethod
Define the behaviour of the internal profiler when outputting network details.
Definition: Types.hpp:71
std::shared_ptr< IGpuAccTunedParameters > m_GpuAccTunedParameters
If set, uses the GpuAcc tuned parameters from the given object when executing GPU workloads...
Definition: IRuntime.hpp:90
std::vector< BackendOptions > m_BackendOptions
Pass backend specific options.
Definition: IRuntime.hpp:189
const bool m_ExternalMemoryManagementEnabled
Definition: IRuntime.hpp:67
int NetworkId
Definition: IRuntime.hpp:27
std::vector< std::pair< LayerBindingId, class Tensor > > OutputTensors
Definition: Tensor.hpp:393
Status
enumeration
Definition: Types.hpp:42
std::unique_ptr< IOptimizedNetwork, void(*)(IOptimizedNetwork *network)> IOptimizedNetworkPtr
Definition: INetwork.hpp:239
INetworkProperties(bool asyncEnabled, MemorySource inputSource, MemorySource outputSource, bool profilingEnabled=false, ProfilingDetailsMethod detailsMethod=ProfilingDetailsMethod::Undefined, bool externalMemoryManagementEnabled=false)
Definition: IRuntime.hpp:37
const bool m_ExportEnabled
Deprecated and will be removed in future release.
Definition: IRuntime.hpp:56
std::map< BackendId, std::shared_ptr< ICustomAllocator > > m_CustomAllocatorMap
A map to define a custom memory allocator for specific backend Ids.
Definition: IRuntime.hpp:114
std::unique_ptr< RuntimeImpl > pRuntimeImpl
Definition: IRuntime.hpp:294
Device specific knowledge to be passed to the optimizer.
Definition: Types.hpp:280
constexpr unsigned int LOWEST_CAPTURE_PERIOD
The lowest performance data capture interval we support is 10 miliseconds.
Definition: Types.hpp:34
std::shared_ptr< IGpuAccTunedParameters > IGpuAccTunedParametersPtr
The following API is replaced by the backend options API.
Definition: IRuntime.hpp:299
bool m_ProtectedMode
Setting this flag will allow the user to create the Runtime in protected mode.
Definition: IRuntime.hpp:105
std::string m_DynamicBackendsPath
Setting this value will override the paths set by the DYNAMIC_BACKEND_PATHS compiler directive Only a...
Definition: IRuntime.hpp:98
bool m_EnableGpuProfiling
Setting this flag will allow the user to obtain GPU profiling information from the runtime...
Definition: IRuntime.hpp:93
Manages a set of GpuAcc parameters which have been tuned for maximum performance. ...
Definition: IRuntime.hpp:312
MemorySource
Define the Memory Source to reduce copies.
Definition: Types.hpp:230
uint32_t m_CapturePeriod
The duration at which captured profiling messages will be flushed.
Definition: IRuntime.hpp:148
bool m_TimelineEnabled
Indicates whether external timeline profiling is enabled or not.
Definition: IRuntime.hpp:140
const MemorySource m_OutputSource
Definition: IRuntime.hpp:65
ExternalProfilingOptions m_ProfilingOptions
Definition: IRuntime.hpp:153
const bool m_ProfilingEnabled
Definition: IRuntime.hpp:60
std::string m_FileFormat
The format of the file used for outputting profiling data.
Definition: IRuntime.hpp:150
virtual ~INetworkProperties()
Definition: IRuntime.hpp:69