ArmNN
 21.05
IRuntime.hpp
Go to the documentation of this file.
1 //
2 // Copyright © 2017 Arm Ltd. All rights reserved.
3 // SPDX-License-Identifier: MIT
4 //
5 #pragma once
6 
7 #include "BackendOptions.hpp"
8 #include "INetwork.hpp"
9 #include "IProfiler.hpp"
10 #include "IWorkingMemHandle.hpp"
12 #include "Tensor.hpp"
13 #include "Types.hpp"
14 #include "TypesUtils.hpp"
16 
17 #include <memory>
18 
19 namespace armnn
20 {
21 
22 using NetworkId = int;
23 
25 
26 struct RuntimeImpl;
27 class IRuntime;
28 using IRuntimePtr = std::unique_ptr<IRuntime, void(*)(IRuntime* runtime)>;
29 
31 {
32  ARMNN_DEPRECATED_MSG("Please use INetworkProperties constructor with MemorySource argument")
33  INetworkProperties(bool importEnabled = false,
34  bool exportEnabled = false,
35  bool asyncEnabled = false,
36  size_t numThreads = 0)
37  : m_ImportEnabled(importEnabled)
38  , m_ExportEnabled(exportEnabled)
39  , m_AsyncEnabled(asyncEnabled)
40  , m_NumThreads(numThreads)
43  {}
44 
45  INetworkProperties(bool asyncEnabled,
48  size_t numThreads = 0)
49  : m_ImportEnabled(m_InputSource != MemorySource::Undefined)
50  , m_ExportEnabled(m_OutputSource != MemorySource::Undefined)
51  , m_AsyncEnabled(asyncEnabled)
52  , m_NumThreads(numThreads)
53  , m_InputSource(m_InputSource)
54  , m_OutputSource(m_OutputSource)
55  {}
56 
57  /// Deprecated and will be removed in future release.
58  const bool m_ImportEnabled;
59  /// Deprecated and will be removed in future release.
60  const bool m_ExportEnabled;
61 
62  const bool m_AsyncEnabled;
63  const size_t m_NumThreads;
64 
67 
68  virtual ~INetworkProperties() {}
69 };
70 
71 using namespace armnn::experimental;
72 
73 class IRuntime
74 {
75 public:
77  {
79  : m_GpuAccTunedParameters(nullptr)
80  , m_EnableGpuProfiling(false)
81  , m_DynamicBackendsPath("")
82  {}
83 
84  /// If set, uses the GpuAcc tuned parameters from the given object when executing GPU workloads.
85  /// It will also be updated with new tuned parameters if it is configured to do so.
86  std::shared_ptr<IGpuAccTunedParameters> m_GpuAccTunedParameters;
87 
88  /// Setting this flag will allow the user to obtain GPU profiling information from the runtime.
90 
91  /// Setting this value will override the paths set by the DYNAMIC_BACKEND_PATHS compiler directive
92  /// Only a single path is allowed for the override
93  std::string m_DynamicBackendsPath;
94 
96  {
98  : m_EnableProfiling(false)
99  , m_TimelineEnabled(false)
100  , m_OutgoingCaptureFile("")
101  , m_IncomingCaptureFile("")
102  , m_FileOnly(false)
103  , m_CapturePeriod(LOWEST_CAPTURE_PERIOD)
104  , m_FileFormat("binary")
105  , m_LocalPacketHandlers()
106  {}
107 
113  uint32_t m_CapturePeriod;
114  std::string m_FileFormat;
115  std::vector<armnn::profiling::ILocalPacketHandlerSharedPtr> m_LocalPacketHandlers;
116  };
118 
119  /// Pass backend specific options.
120  ///
121  /// For example, to enable GpuAcc tuning add the following
122  /// ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~.cpp
123  /// m_BackendOption.emplace_back(
124  /// BackendOptions{"GpuAcc",
125  /// {
126  /// {"TuningLevel", 2},
127  /// {"TuningFile", filename}
128  /// }
129  /// });
130  /// ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
131  /// Execute representative workloads through the runtime to generate tuning data.
132  /// The tuning file is written once the runtime is destroyed
133 
134  /// To execute with the tuning data, start up with just the tuning file specified.
135  /// ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~.cpp
136  /// m_BackendOption.emplace_back(
137  /// BackendOptions{"GpuAcc",
138  /// {
139  /// {"TuningFile", filename}
140  /// }
141  /// });
142  /// ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
143 
144  /// The following backend options are available:
145  /// GpuAcc:
146  /// "TuningLevel" : int [0..3] (0=UseOnly(default) | 1=RapidTuning | 2=NormalTuning | 3=ExhaustiveTuning)
147  /// "TuningFile" : string [filenameString]
148  /// "KernelProfilingEnabled" : bool [true | false]
149  std::vector<BackendOptions> m_BackendOptions;
150  };
151 
152  static IRuntime* CreateRaw(const CreationOptions& options);
153  static IRuntimePtr Create(const CreationOptions& options);
154  static void Destroy(IRuntime* runtime);
155 
156  /// Loads a complete network into the IRuntime.
157  /// @param [out] networkIdOut - Unique identifier for the network is returned in this reference.
158  /// @param [in] network - Complete network to load into the IRuntime.
159  /// The runtime takes ownership of the network once passed in.
160  /// @return armnn::Status
161  Status LoadNetwork(NetworkId& networkIdOut, IOptimizedNetworkPtr network);
162 
163  /// Load a complete network into the IRuntime.
164  /// @param [out] networkIdOut Unique identifier for the network is returned in this reference.
165  /// @param [in] network Complete network to load into the IRuntime.
166  /// @param [out] errorMessage Error message if there were any errors.
167  /// The runtime takes ownership of the network once passed in.
168  /// @return armnn::Status
169  Status LoadNetwork(NetworkId& networkIdOut,
170  IOptimizedNetworkPtr network,
171  std::string& errorMessage);
172 
173  Status LoadNetwork(NetworkId& networkIdOut,
174  IOptimizedNetworkPtr network,
175  std::string& errorMessage,
176  const INetworkProperties& networkProperties);
177 
178  TensorInfo GetInputTensorInfo(NetworkId networkId, LayerBindingId layerId) const;
179  TensorInfo GetOutputTensorInfo(NetworkId networkId, LayerBindingId layerId) const;
180 
181  /// Evaluates a network using input in inputTensors and outputs filled into outputTensors
182  Status EnqueueWorkload(NetworkId networkId,
183  const InputTensors& inputTensors,
184  const OutputTensors& outputTensors);
185 
186  /// This is an experimental function.
187  /// Evaluates a network using input in inputTensors and outputs filled into outputTensors.
188  /// This function performs a thread safe execution of the network. Returns once execution is complete.
189  /// Will block until this and any other thread using the same workingMem object completes.
190  Status Execute(IWorkingMemHandle& workingMemHandle,
191  const InputTensors& inputTensors,
192  const OutputTensors& outputTensors);
193 
194  /// This is an experimental function
195  /// Schedule a thread safe execution by taking the input tensors and an execution priority for Quality of Service.
196  /// The output tensors will then be filled and the callback object will notify that the execution has either
197  /// succeeded or failed.
198  void Schedule(NetworkId networkId,
199  const InputTensors& inputTensors,
200  const OutputTensors& outputTensors,
201  const QosExecPriority priority,
202  std::shared_ptr<IAsyncExecutionCallback> callback);
203 
204  /// Unloads a network from the IRuntime.
205  /// At the moment this only removes the network from the m_Impl->m_Network.
206  /// This might need more work in the future to be AndroidNN compliant.
207  /// @param [in] networkId - Unique identifier for the network to be unloaded. Generated in LoadNetwork().
208  /// @return armnn::Status
209  Status UnloadNetwork(NetworkId networkId);
210 
211  const IDeviceSpec& GetDeviceSpec() const;
212 
213  /// Create a new unique WorkingMemHandle object. Create multiple handles if you wish to have
214  /// overlapped Execution by calling this function from different threads.
215  std::unique_ptr<IWorkingMemHandle> CreateWorkingMemHandle(NetworkId networkId);
216 
217  /// Gets the profiler corresponding to the given network id.
218  /// @param networkId The id of the network for which to get the profile.
219  /// @return A pointer to the requested profiler, or nullptr if not found.
220  const std::shared_ptr<IProfiler> GetProfiler(NetworkId networkId) const;
221 
222  /// Registers a callback function to debug layers performing custom computations on intermediate tensors.
223  /// @param networkId The id of the network to register the callback.
224  /// @param func callback function to pass to the debug layer.
225  void RegisterDebugCallback(NetworkId networkId, const DebugCallbackFunction& func);
226 
227 protected:
228  IRuntime();
229  IRuntime(const IRuntime::CreationOptions& options);
230  ~IRuntime();
231 
232  std::unique_ptr<RuntimeImpl> pRuntimeImpl;
233 };
234 
235 
236 /// The following API is replaced by the backend options API.
237 using IGpuAccTunedParametersPtr = std::shared_ptr<IGpuAccTunedParameters>;
238 
239 /// Manages a set of GpuAcc parameters which have been tuned for maximum performance.
240 /// Passes an instance of this object to the IRuntime::Create() method (via IRuntime::CreationOptions) to use it
241 /// for all GPU workload execution.
242 ///
243 /// Can be created in two modes:
244 /// - In UseTunedParameters mode, the parameters stored in this object are used to execute GPU workloads.
245 /// - In UpdateTunedParameters mode, additionally, whenever a GPU workload is executed for the first time, the
246 /// optimum parameters will be found and stored in this object. WARNING - This tuning can be slow.
247 ///
248 /// The parameters can be loaded from and saved to a file so that you can first run a slow initial read-write
249 /// execution, save the parameters for later and then run fast read-only executions using the optimised parameters.
251 {
252 public:
253  enum class Mode
254  {
255  UseTunedParameters,
256  UpdateTunedParameters
257  };
258 
259  enum class TuningLevel
260  {
261  Rapid = 1,
262  Normal = 2,
263  Exhaustive = 3
264  };
265 
266  /// Creates an IClTunedParameters with the given mode.
267  /// @{
268  static IGpuAccTunedParameters* CreateRaw(Mode mode, TuningLevel tunerMode);
269  static IGpuAccTunedParametersPtr Create(Mode mode, TuningLevel tunerMode);
270  /// @}
271  static void Destroy(IGpuAccTunedParameters* params);
272 
273  /// Loads an existing set of tuned parameters from the given file.
274  /// If there is an error loading the file, an armnn::Exception is thrown.
275  virtual void Load(const char* filename) = 0;
276 
277  /// Saves the current set of tuned parameters to the given file.
278  /// If there is an error saving to the file, an armnn::Exception is thrown.
279  virtual void Save(const char* filename) const = 0;
280 
281 protected:
283 };
284 
285 } // namespace armnn
const MemorySource m_InputSource
Definition: IRuntime.hpp:65
const size_t m_NumThreads
Definition: IRuntime.hpp:63
const bool m_ImportEnabled
Deprecated and will be removed in future release.
Definition: IRuntime.hpp:58
DataLayout::NCHW false
std::unique_ptr< IRuntime, void(*)(IRuntime *runtime)> IRuntimePtr
Definition: IRuntime.hpp:28
std::vector< std::pair< LayerBindingId, class ConstTensor > > InputTensors
Definition: Tensor.hpp:340
Copyright (c) 2021 ARM Limited and Contributors.
std::function< void(LayerGuid guid, unsigned int slotIndex, ITensorHandle *tensorHandle)> DebugCallbackFunction
Define the type of callback for the Debug layer to call.
Definition: Types.hpp:316
int LayerBindingId
Type of identifiers for bindable layers (inputs, outputs).
Definition: Types.hpp:243
std::shared_ptr< IGpuAccTunedParameters > m_GpuAccTunedParameters
If set, uses the GpuAcc tuned parameters from the given object when executing GPU workloads...
Definition: IRuntime.hpp:86
INetworkProperties(bool asyncEnabled, MemorySource m_InputSource, MemorySource m_OutputSource, size_t numThreads=0)
Definition: IRuntime.hpp:45
std::vector< BackendOptions > m_BackendOptions
Pass backend specific options.
Definition: IRuntime.hpp:149
std::vector< armnn::profiling::ILocalPacketHandlerSharedPtr > m_LocalPacketHandlers
Definition: IRuntime.hpp:115
int NetworkId
Definition: IRuntime.hpp:22
std::vector< std::pair< LayerBindingId, class Tensor > > OutputTensors
Definition: Tensor.hpp:341
Status
enumeration
Definition: Types.hpp:30
std::unique_ptr< IOptimizedNetwork, void(*)(IOptimizedNetwork *network)> IOptimizedNetworkPtr
Definition: INetwork.hpp:174
const bool m_ExportEnabled
Deprecated and will be removed in future release.
Definition: IRuntime.hpp:60
std::unique_ptr< RuntimeImpl > pRuntimeImpl
Definition: IRuntime.hpp:232
Device specific knowledge to be passed to the optimizer.
Definition: Types.hpp:233
constexpr unsigned int LOWEST_CAPTURE_PERIOD
The lowest performance data capture interval we support is 10 miliseconds.
Definition: Types.hpp:22
std::shared_ptr< IGpuAccTunedParameters > IGpuAccTunedParametersPtr
The following API is replaced by the backend options API.
Definition: IRuntime.hpp:237
std::string m_DynamicBackendsPath
Setting this value will override the paths set by the DYNAMIC_BACKEND_PATHS compiler directive Only a...
Definition: IRuntime.hpp:93
bool m_EnableGpuProfiling
Setting this flag will allow the user to obtain GPU profiling information from the runtime...
Definition: IRuntime.hpp:89
Manages a set of GpuAcc parameters which have been tuned for maximum performance. ...
Definition: IRuntime.hpp:250
MemorySource
Define the Memory Source to reduce copies.
Definition: Types.hpp:197
const MemorySource m_OutputSource
Definition: IRuntime.hpp:66
QosExecPriority
Definition: Types.hpp:60
#define ARMNN_DEPRECATED_MSG(message)
Definition: Deprecated.hpp:43
ExternalProfilingOptions m_ProfilingOptions
Definition: IRuntime.hpp:117
virtual ~INetworkProperties()
Definition: IRuntime.hpp:68