include/armnn/IRuntime.hpp


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341

//
// Copyright © 2017 Arm Ltd. All rights reserved.
// SPDX-License-Identifier: MIT
//
#pragma once

#include "BackendOptions.hpp"
#include "INetwork.hpp"
#include "IProfiler.hpp"
#include "IWorkingMemHandle.hpp"
#include "IAsyncExecutionCallback.hpp"
#include "Tensor.hpp"
#include "Types.hpp"
#include "TypesUtils.hpp"
#include "profiling/ILocalPacketHandler.hpp"

#include <armnn/backends/ICustomAllocator.hpp>
#include <armnn/backends/IMemoryOptimizerStrategy.hpp>
#include <memory>
#include <map>

namespace armnn
{

using NetworkId = int;

class IGpuAccTunedParameters;

struct RuntimeImpl;
class IRuntime;
using IRuntimePtr = std::unique_ptr<IRuntime, void(*)(IRuntime* runtime)>;

struct INetworkProperties
{   
    INetworkProperties(bool asyncEnabled,
                       MemorySource inputSource,
                       MemorySource outputSource,
                       bool profilingEnabled = false,
                       ProfilingDetailsMethod detailsMethod = ProfilingDetailsMethod::Undefined,
                       bool externalMemoryManagementEnabled = false)
        : m_ImportEnabled(inputSource != MemorySource::Undefined),
          m_ExportEnabled(outputSource != MemorySource::Undefined),
          m_AsyncEnabled(asyncEnabled),
          m_ProfilingEnabled(profilingEnabled),
          m_OutputNetworkDetailsMethod(detailsMethod),
          m_InputSource(inputSource),
          m_OutputSource(outputSource),
          m_ExternalMemoryManagementEnabled(externalMemoryManagementEnabled)
    {}

    /// Deprecated and will be removed in future release.
    const bool m_ImportEnabled;
    /// Deprecated and will be removed in future release.
    const bool m_ExportEnabled;

    const bool m_AsyncEnabled;

    const bool m_ProfilingEnabled;

    const ProfilingDetailsMethod m_OutputNetworkDetailsMethod;

    const MemorySource m_InputSource;
    const MemorySource m_OutputSource;

    const bool m_ExternalMemoryManagementEnabled;

    virtual ~INetworkProperties() {}
};

using namespace armnn::experimental;

class IRuntime
{
public:
    struct CreationOptions
    {
        CreationOptions()
            : m_GpuAccTunedParameters(nullptr)
            , m_EnableGpuProfiling(false)
            , m_DynamicBackendsPath("")
            , m_ProtectedMode(false)
            , m_CustomAllocatorMap()
            , m_MemoryOptimizerStrategyMap()
        {}

        /// If set, uses the GpuAcc tuned parameters from the given object when executing GPU workloads.
        /// It will also be updated with new tuned parameters if it is configured to do so.
        std::shared_ptr<IGpuAccTunedParameters> m_GpuAccTunedParameters;

        /// Setting this flag will allow the user to obtain GPU profiling information from the runtime.
        bool m_EnableGpuProfiling;

        /// Setting this value will override the paths set by the DYNAMIC_BACKEND_PATHS compiler directive
        /// Only a single path is allowed for the override
        /// It defines the path to search for any [dynamic backend libraries](src/dynamic/README.md).
        std::string m_DynamicBackendsPath;

        /// Setting this flag will allow the user to create the Runtime in protected mode.
        /// It will run all the inferences on protected memory and will make sure that
        /// INetworkProperties::m_ImportEnabled set to true with MemorySource::DmaBufProtected option
        /// This requires that the backend supports Protected Memory and has an allocator capable of
        /// allocating Protected Memory associated with it.
        bool m_ProtectedMode;

        /// @brief A map to define a custom memory allocator for specific backend Ids.
        ///
        /// @details  A Custom Allocator is used for allocation of working memory in the backends.
        /// Set this if you need to take control of how memory is allocated on a backend. Required for
        /// Protected Mode in order to correctly allocate Protected Memory
        ///
        /// @note Only supported for GpuAcc
        std::map<BackendId, std::shared_ptr<ICustomAllocator>> m_CustomAllocatorMap;

        /// @brief A map to define a custom memory optimizer strategy for specific backend Ids.
        ///
        /// @details  A Memory Optimizer Strategy provides a solution to an abstract representation of
        /// a network's memory requirements. This can also be used to return a pre-computed solution
        /// for a specific network. Set this if you want to implement a Custom Memory Optimizer Strategy
        /// for a given backend.
        std::map<BackendId, std::shared_ptr<IMemoryOptimizerStrategy>> m_MemoryOptimizerStrategyMap;

        struct ExternalProfilingOptions
        {
            ExternalProfilingOptions()
                : m_EnableProfiling(false)
                , m_TimelineEnabled(false)
                , m_OutgoingCaptureFile("")
                , m_IncomingCaptureFile("")
                , m_FileOnly(false)
                , m_CapturePeriod(LOWEST_CAPTURE_PERIOD)
                , m_FileFormat("binary")
                , m_LocalPacketHandlers()
            {}

            /// Indicates whether external profiling is enabled or not.
            bool        m_EnableProfiling;
            /// Indicates whether external timeline profiling is enabled or not.
            bool        m_TimelineEnabled;
            /// Path to a file in which outgoing timeline profiling messages will be stored.
            std::string m_OutgoingCaptureFile;
            /// Path to a file in which incoming timeline profiling messages will be stored.
            std::string m_IncomingCaptureFile;
            /// Enable profiling output to file only.
            bool        m_FileOnly;
            /// The duration at which captured profiling messages will be flushed.
            uint32_t    m_CapturePeriod;
            /// The format of the file used for outputting profiling data.
            std::string m_FileFormat;
            std::vector<armnn::profiling::ILocalPacketHandlerSharedPtr> m_LocalPacketHandlers;
        };
        ExternalProfilingOptions m_ProfilingOptions;

        /// Pass backend specific options.
        ///
        /// For example, to enable GpuAcc tuning add the following
        /// ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~.cpp
        /// m_BackendOption.emplace_back(
        ///     BackendOptions{"GpuAcc",
        ///       {
        ///         {"TuningLevel", 2},
        ///         {"TuningFile", filename}
        ///         {"MemoryOptimizerStrategy", strategyname}
        ///       }
        ///     });
        /// ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
        /// Execute representative workloads through the runtime to generate tuning data.
        /// The tuning file is written once the runtime is destroyed

        /// To execute with the tuning data, start up with just the tuning file specified.
        /// ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~.cpp
        /// m_BackendOption.emplace_back(
        ///     BackendOptions{"GpuAcc",
        ///       {
        ///         {"TuningFile", filename}
        ///       }
        ///     });
        /// ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

        /// The following backend options are available:
        /// AllBackends:
        ///   "MemoryOptimizerStrategy" : string [stategynameString]
        ///    (Existing Memory Optimizer Strategies: ConstantMemoryStrategy)
        /// GpuAcc:
        ///   "TuningLevel" : int [0..3] (0=UseOnly(default) | 1=RapidTuning | 2=NormalTuning | 3=ExhaustiveTuning)
        ///   "TuningFile" : string [filenameString]
        ///   "KernelProfilingEnabled" : bool [true | false]
        std::vector<BackendOptions> m_BackendOptions;
    };

    static IRuntime* CreateRaw(const CreationOptions& options);
    static IRuntimePtr Create(const CreationOptions& options);
    static void Destroy(IRuntime* runtime);

    /// Loads a complete network into the IRuntime.
    /// @param [out] networkIdOut - Unique identifier for the network is returned in this reference.
    /// @param [in] network - Complete network to load into the IRuntime.
    /// The runtime takes ownership of the network once passed in.
    /// @return armnn::Status
    Status LoadNetwork(NetworkId& networkIdOut, IOptimizedNetworkPtr network);

    /// Load a complete network into the IRuntime.
    /// @param [out] networkIdOut Unique identifier for the network is returned in this reference.
    /// @param [in] network Complete network to load into the IRuntime.
    /// @param [out] errorMessage Error message if there were any errors.
    /// The runtime takes ownership of the network once passed in.
    /// @return armnn::Status
    Status LoadNetwork(NetworkId& networkIdOut,
                       IOptimizedNetworkPtr network,
                       std::string& errorMessage);

    Status LoadNetwork(NetworkId& networkIdOut,
                       IOptimizedNetworkPtr network,
                       std::string& errorMessage,
                       const INetworkProperties& networkProperties);

    TensorInfo GetInputTensorInfo(NetworkId networkId, LayerBindingId layerId) const;
    TensorInfo GetOutputTensorInfo(NetworkId networkId, LayerBindingId layerId) const;

    /// ImportInputs separates the importing and mapping of InputTensors from network execution.
    /// Allowing for a set of InputTensors to be imported and mapped once, but used in execution many times.
    /// This function is not thread safe and must not be used while other threads are calling Execute().
    /// Only compatible with AsyncEnabled networks and aligned memory import
    std::vector<ImportedInputId> ImportInputs(NetworkId networkId, const InputTensors& inputTensors,
                                              MemorySource forceImportMemorySource = MemorySource::Undefined);

    /// ImportOutputs separates the importing and mapping of OutputTensors from network execution.
    /// Allowing for a set of OutputTensors to be imported and mapped once, but used in execution many times.
    /// This function is not thread safe and must not be used while other threads are calling Execute().
    /// Only compatible with AsyncEnabled networks and aligned memory import
    std::vector<ImportedOutputId> ImportOutputs(NetworkId networkId, const OutputTensors& outputTensors,
                                                MemorySource forceImportMemorySource = MemorySource::Undefined);

    /// Un-import and delete the imported InputTensor/s
    /// This function is not thread safe and must not be used while other threads are calling Execute().
    /// Only compatible with AsyncEnabled networks
    void ClearImportedInputs(NetworkId networkId, const std::vector<ImportedInputId> inputIds);

    /// Un-import and delete the imported OutputTensor/s
    /// This function is not thread safe and must not be used while other threads are calling Execute().
    /// Only compatible with AsyncEnabled networks
    void ClearImportedOutputs(NetworkId networkId, const std::vector<ImportedOutputId> outputIds);

    /// Evaluates a network using input in inputTensors and outputs filled into outputTensors
    Status EnqueueWorkload(NetworkId networkId,
                           const InputTensors& inputTensors,
                           const OutputTensors& outputTensors,
                           std::vector<ImportedInputId> preImportedInputIds = {},
                           std::vector<ImportedOutputId> preImportedOutputIds = {});

    /// This is an experimental function.
    /// Evaluates a network using input in inputTensors and outputs filled into outputTensors.
    /// This function performs a thread safe execution of the network. Returns once execution is complete.
    /// Will block until this and any other thread using the same workingMem object completes.
    Status Execute(IWorkingMemHandle& workingMemHandle,
                   const InputTensors& inputTensors,
                   const OutputTensors& outputTensors,
                   std::vector<ImportedInputId> preImportedInputs = {},
                   std::vector<ImportedOutputId> preImportedOutputs = {});

    /// Unloads a network from the IRuntime.
    /// At the moment this only removes the network from the m_Impl->m_Network.
    /// This might need more work in the future to be AndroidNN compliant.
    /// @param [in] networkId - Unique identifier for the network to be unloaded. Generated in LoadNetwork().
    /// @return armnn::Status
    Status UnloadNetwork(NetworkId networkId);

    const IDeviceSpec& GetDeviceSpec() const;

    /// Create a new unique WorkingMemHandle object. Create multiple handles if you wish to have
    /// overlapped Execution by calling this function from different threads.
    std::unique_ptr<IWorkingMemHandle> CreateWorkingMemHandle(NetworkId networkId);

    /// Gets the profiler corresponding to the given network id.
    /// @param networkId The id of the network for which to get the profile.
    /// @return A pointer to the requested profiler, or nullptr if not found.
    const std::shared_ptr<IProfiler> GetProfiler(NetworkId networkId) const;

    /// Registers a callback function to debug layers performing custom computations on intermediate tensors.
    /// @param networkId The id of the network to register the callback.
    /// @param func callback function to pass to the debug layer.
    void RegisterDebugCallback(NetworkId networkId, const DebugCallbackFunction& func);

protected:
    IRuntime();
    IRuntime(const IRuntime::CreationOptions& options);
    ~IRuntime();

    std::unique_ptr<RuntimeImpl> pRuntimeImpl;
};


/// The following API is replaced by the backend options API.
using IGpuAccTunedParametersPtr = std::shared_ptr<IGpuAccTunedParameters>;

/// Manages a set of GpuAcc parameters which have been tuned for maximum performance.
/// Passes an instance of this object to the IRuntime::Create() method (via IRuntime::CreationOptions) to use it
/// for all GPU workload execution.
///
/// Can be created in two modes:
///     - In UseTunedParameters mode, the parameters stored in this object are used to execute GPU workloads.
///     - In UpdateTunedParameters mode, additionally, whenever a GPU workload is executed for the first time, the
///       optimum parameters will be found and stored in this object. WARNING - This tuning can be slow.
///
/// The parameters can be loaded from and saved to a file so that you can first run a slow initial read-write
/// execution, save the parameters for later and then run fast read-only executions using the optimised parameters.
class IGpuAccTunedParameters
{
public:
    enum class Mode
    {
        UseTunedParameters,
        UpdateTunedParameters
    };

    enum class TuningLevel
    {
        Rapid = 1,
        Normal = 2,
        Exhaustive = 3
    };

    /// Creates an IClTunedParameters with the given mode.
    /// @{
    static IGpuAccTunedParameters* CreateRaw(Mode mode, TuningLevel tunerMode);
    static IGpuAccTunedParametersPtr Create(Mode mode, TuningLevel tunerMode);
    /// @}
    static void Destroy(IGpuAccTunedParameters* params);

    /// Loads an existing set of tuned parameters from the given file.
    /// If there is an error loading the file, an armnn::Exception is thrown.
    virtual void Load(const char* filename) = 0;

    /// Saves the current set of tuned parameters to the given file.
    /// If there is an error saving to the file, an armnn::Exception is thrown.
    virtual void Save(const char* filename) const = 0;

protected:
    virtual ~IGpuAccTunedParameters() {};
};

} // namespace armnn