// // Copyright © 2017 Arm Ltd. All rights reserved. // SPDX-License-Identifier: MIT // #pragma once #include "BackendOptions.hpp" #include "INetwork.hpp" #include "IProfiler.hpp" #include "IWorkingMemHandle.hpp" #include "IAsyncExecutionCallback.hpp" #include "Tensor.hpp" #include "Types.hpp" #include "TypesUtils.hpp" #include "profiling/ILocalPacketHandler.hpp" #include #include #include #include namespace armnn { using NetworkId = int; class IGpuAccTunedParameters; struct RuntimeImpl; class IRuntime; using IRuntimePtr = std::unique_ptr; struct INetworkProperties { INetworkProperties(bool asyncEnabled, MemorySource inputSource, MemorySource outputSource, bool profilingEnabled = false, ProfilingDetailsMethod detailsMethod = ProfilingDetailsMethod::Undefined, bool externalMemoryManagementEnabled = false) : m_ImportEnabled(inputSource != MemorySource::Undefined), m_ExportEnabled(outputSource != MemorySource::Undefined), m_AsyncEnabled(asyncEnabled), m_ProfilingEnabled(profilingEnabled), m_OutputNetworkDetailsMethod(detailsMethod), m_InputSource(inputSource), m_OutputSource(outputSource), m_ExternalMemoryManagementEnabled(externalMemoryManagementEnabled) {} /// Deprecated and will be removed in future release. const bool m_ImportEnabled; /// Deprecated and will be removed in future release. const bool m_ExportEnabled; const bool m_AsyncEnabled; const bool m_ProfilingEnabled; const ProfilingDetailsMethod m_OutputNetworkDetailsMethod; const MemorySource m_InputSource; const MemorySource m_OutputSource; const bool m_ExternalMemoryManagementEnabled; virtual ~INetworkProperties() {} }; using namespace armnn::experimental; class IRuntime { public: struct CreationOptions { CreationOptions() : m_GpuAccTunedParameters(nullptr) , m_EnableGpuProfiling(false) , m_DynamicBackendsPath("") , m_ProtectedMode(false) , m_CustomAllocatorMap() , m_MemoryOptimizerStrategyMap() {} /// If set, uses the GpuAcc tuned parameters from the given object when executing GPU workloads. /// It will also be updated with new tuned parameters if it is configured to do so. std::shared_ptr m_GpuAccTunedParameters; /// Setting this flag will allow the user to obtain GPU profiling information from the runtime. bool m_EnableGpuProfiling; /// Setting this value will override the paths set by the DYNAMIC_BACKEND_PATHS compiler directive /// Only a single path is allowed for the override /// It defines the path to search for any [dynamic backend libraries](src/dynamic/README.md). std::string m_DynamicBackendsPath; /// Setting this flag will allow the user to create the Runtime in protected mode. /// It will run all the inferences on protected memory and will make sure that /// INetworkProperties::m_ImportEnabled set to true with MemorySource::DmaBufProtected option /// This requires that the backend supports Protected Memory and has an allocator capable of /// allocating Protected Memory associated with it. bool m_ProtectedMode; /// @brief A map to define a custom memory allocator for specific backend Ids. /// /// @details A Custom Allocator is used for allocation of working memory in the backends. /// Set this if you need to take control of how memory is allocated on a backend. Required for /// Protected Mode in order to correctly allocate Protected Memory /// /// @note Only supported for GpuAcc std::map> m_CustomAllocatorMap; /// @brief A map to define a custom memory optimizer strategy for specific backend Ids. /// /// @details A Memory Optimizer Strategy provides a solution to an abstract representation of /// a network's memory requirements. This can also be used to return a pre-computed solution /// for a specific network. Set this if you want to implement a Custom Memory Optimizer Strategy /// for a given backend. std::map> m_MemoryOptimizerStrategyMap; struct ExternalProfilingOptions { ExternalProfilingOptions() : m_EnableProfiling(false) , m_TimelineEnabled(false) , m_OutgoingCaptureFile("") , m_IncomingCaptureFile("") , m_FileOnly(false) , m_CapturePeriod(LOWEST_CAPTURE_PERIOD) , m_FileFormat("binary") , m_LocalPacketHandlers() {} /// Indicates whether external profiling is enabled or not. bool m_EnableProfiling; /// Indicates whether external timeline profiling is enabled or not. bool m_TimelineEnabled; /// Path to a file in which outgoing timeline profiling messages will be stored. std::string m_OutgoingCaptureFile; /// Path to a file in which incoming timeline profiling messages will be stored. std::string m_IncomingCaptureFile; /// Enable profiling output to file only. bool m_FileOnly; /// The duration at which captured profiling messages will be flushed. uint32_t m_CapturePeriod; /// The format of the file used for outputting profiling data. std::string m_FileFormat; std::vector m_LocalPacketHandlers; }; ExternalProfilingOptions m_ProfilingOptions; /// Pass backend specific options. /// /// For example, to enable GpuAcc tuning add the following /// ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~.cpp /// m_BackendOption.emplace_back( /// BackendOptions{"GpuAcc", /// { /// {"TuningLevel", 2}, /// {"TuningFile", filename} /// {"MemoryOptimizerStrategy", strategyname} /// } /// }); /// ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /// Execute representative workloads through the runtime to generate tuning data. /// The tuning file is written once the runtime is destroyed /// To execute with the tuning data, start up with just the tuning file specified. /// ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~.cpp /// m_BackendOption.emplace_back( /// BackendOptions{"GpuAcc", /// { /// {"TuningFile", filename} /// } /// }); /// ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /// The following backend options are available: /// AllBackends: /// "MemoryOptimizerStrategy" : string [stategynameString] /// (Existing Memory Optimizer Strategies: ConstantMemoryStrategy) /// GpuAcc: /// "TuningLevel" : int [0..3] (0=UseOnly(default) | 1=RapidTuning | 2=NormalTuning | 3=ExhaustiveTuning) /// "TuningFile" : string [filenameString] /// "KernelProfilingEnabled" : bool [true | false] std::vector m_BackendOptions; }; static IRuntime* CreateRaw(const CreationOptions& options); static IRuntimePtr Create(const CreationOptions& options); static void Destroy(IRuntime* runtime); /// Loads a complete network into the IRuntime. /// @param [out] networkIdOut - Unique identifier for the network is returned in this reference. /// @param [in] network - Complete network to load into the IRuntime. /// The runtime takes ownership of the network once passed in. /// @return armnn::Status Status LoadNetwork(NetworkId& networkIdOut, IOptimizedNetworkPtr network); /// Load a complete network into the IRuntime. /// @param [out] networkIdOut Unique identifier for the network is returned in this reference. /// @param [in] network Complete network to load into the IRuntime. /// @param [out] errorMessage Error message if there were any errors. /// The runtime takes ownership of the network once passed in. /// @return armnn::Status Status LoadNetwork(NetworkId& networkIdOut, IOptimizedNetworkPtr network, std::string& errorMessage); Status LoadNetwork(NetworkId& networkIdOut, IOptimizedNetworkPtr network, std::string& errorMessage, const INetworkProperties& networkProperties); TensorInfo GetInputTensorInfo(NetworkId networkId, LayerBindingId layerId) const; TensorInfo GetOutputTensorInfo(NetworkId networkId, LayerBindingId layerId) const; /// ImportInputs separates the importing and mapping of InputTensors from network execution. /// Allowing for a set of InputTensors to be imported and mapped once, but used in execution many times. /// This function is not thread safe and must not be used while other threads are calling Execute(). /// Only compatible with AsyncEnabled networks and aligned memory import std::vector ImportInputs(NetworkId networkId, const InputTensors& inputTensors, MemorySource forceImportMemorySource = MemorySource::Undefined); /// ImportOutputs separates the importing and mapping of OutputTensors from network execution. /// Allowing for a set of OutputTensors to be imported and mapped once, but used in execution many times. /// This function is not thread safe and must not be used while other threads are calling Execute(). /// Only compatible with AsyncEnabled networks and aligned memory import std::vector ImportOutputs(NetworkId networkId, const OutputTensors& outputTensors, MemorySource forceImportMemorySource = MemorySource::Undefined); /// Un-import and delete the imported InputTensor/s /// This function is not thread safe and must not be used while other threads are calling Execute(). /// Only compatible with AsyncEnabled networks void ClearImportedInputs(NetworkId networkId, const std::vector inputIds); /// Un-import and delete the imported OutputTensor/s /// This function is not thread safe and must not be used while other threads are calling Execute(). /// Only compatible with AsyncEnabled networks void ClearImportedOutputs(NetworkId networkId, const std::vector outputIds); /// Evaluates a network using input in inputTensors and outputs filled into outputTensors Status EnqueueWorkload(NetworkId networkId, const InputTensors& inputTensors, const OutputTensors& outputTensors, std::vector preImportedInputIds = {}, std::vector preImportedOutputIds = {}); /// This is an experimental function. /// Evaluates a network using input in inputTensors and outputs filled into outputTensors. /// This function performs a thread safe execution of the network. Returns once execution is complete. /// Will block until this and any other thread using the same workingMem object completes. Status Execute(IWorkingMemHandle& workingMemHandle, const InputTensors& inputTensors, const OutputTensors& outputTensors, std::vector preImportedInputs = {}, std::vector preImportedOutputs = {}); /// Unloads a network from the IRuntime. /// At the moment this only removes the network from the m_Impl->m_Network. /// This might need more work in the future to be AndroidNN compliant. /// @param [in] networkId - Unique identifier for the network to be unloaded. Generated in LoadNetwork(). /// @return armnn::Status Status UnloadNetwork(NetworkId networkId); const IDeviceSpec& GetDeviceSpec() const; /// Create a new unique WorkingMemHandle object. Create multiple handles if you wish to have /// overlapped Execution by calling this function from different threads. std::unique_ptr CreateWorkingMemHandle(NetworkId networkId); /// Gets the profiler corresponding to the given network id. /// @param networkId The id of the network for which to get the profile. /// @return A pointer to the requested profiler, or nullptr if not found. const std::shared_ptr GetProfiler(NetworkId networkId) const; /// Registers a callback function to debug layers performing custom computations on intermediate tensors. /// @param networkId The id of the network to register the callback. /// @param func callback function to pass to the debug layer. void RegisterDebugCallback(NetworkId networkId, const DebugCallbackFunction& func); protected: IRuntime(); IRuntime(const IRuntime::CreationOptions& options); ~IRuntime(); std::unique_ptr pRuntimeImpl; }; /// The following API is replaced by the backend options API. using IGpuAccTunedParametersPtr = std::shared_ptr; /// Manages a set of GpuAcc parameters which have been tuned for maximum performance. /// Passes an instance of this object to the IRuntime::Create() method (via IRuntime::CreationOptions) to use it /// for all GPU workload execution. /// /// Can be created in two modes: /// - In UseTunedParameters mode, the parameters stored in this object are used to execute GPU workloads. /// - In UpdateTunedParameters mode, additionally, whenever a GPU workload is executed for the first time, the /// optimum parameters will be found and stored in this object. WARNING - This tuning can be slow. /// /// The parameters can be loaded from and saved to a file so that you can first run a slow initial read-write /// execution, save the parameters for later and then run fast read-only executions using the optimised parameters. class IGpuAccTunedParameters { public: enum class Mode { UseTunedParameters, UpdateTunedParameters }; enum class TuningLevel { Rapid = 1, Normal = 2, Exhaustive = 3 }; /// Creates an IClTunedParameters with the given mode. /// @{ static IGpuAccTunedParameters* CreateRaw(Mode mode, TuningLevel tunerMode); static IGpuAccTunedParametersPtr Create(Mode mode, TuningLevel tunerMode); /// @} static void Destroy(IGpuAccTunedParameters* params); /// Loads an existing set of tuned parameters from the given file. /// If there is an error loading the file, an armnn::Exception is thrown. virtual void Load(const char* filename) = 0; /// Saves the current set of tuned parameters to the given file. /// If there is an error saving to the file, an armnn::Exception is thrown. virtual void Save(const char* filename) const = 0; protected: virtual ~IGpuAccTunedParameters() {}; }; } // namespace armnn