plain/23.05/_i_runtime_8hpp_source.xhtml

//

// Copyright © 2017 Arm Ltd. All rights reserved.

// SPDX-License-Identifier: MIT

//

#pragma once


#include "BackendOptions.hpp"

#include "INetwork.hpp"

#include "IProfiler.hpp"

#include "IWorkingMemHandle.hpp"

#include "IAsyncExecutionCallback.hpp"

#include "Tensor.hpp"

#include "Types.hpp"

#include "TypesUtils.hpp"


#include <armnn/backends/ICustomAllocator.hpp>

#include <armnn/backends/IMemoryOptimizerStrategy.hpp>


#include <memory>

#include <map>


namespace arm

{


namespace pipe

{

class ILocalPacketHandler;

using ILocalPacketHandlerSharedPtr = std::shared_ptr<ILocalPacketHandler>;

} // pipe

} // arm


namespace armnn

{


using NetworkId = int;


class IGpuAccTunedParameters;


struct RuntimeImpl;

class IRuntime;

using IRuntimePtr = std::unique_ptr<IRuntime, void(*)(IRuntime* runtime)>;


struct INetworkProperties

{

    INetworkProperties(bool asyncEnabled,

                       MemorySource inputSource,

                       MemorySource outputSource,

                       bool profilingEnabled = false,

                       ProfilingDetailsMethod detailsMethod = ProfilingDetailsMethod::Undefined,

                       bool externalMemoryManagementEnabled = false)

        : m_ImportEnabled(inputSource != MemorySource::Undefined),

          m_ExportEnabled(outputSource != MemorySource::Undefined),

          m_AsyncEnabled(asyncEnabled),

          m_ProfilingEnabled(profilingEnabled),

          m_OutputNetworkDetailsMethod(detailsMethod),

          m_InputSource(inputSource),

          m_OutputSource(outputSource),

          m_ExternalMemoryManagementEnabled(externalMemoryManagementEnabled)

    {}


    /// Deprecated and will be removed in future release.

    const bool m_ImportEnabled;

    /// Deprecated and will be removed in future release.

    const bool m_ExportEnabled;


    const bool m_AsyncEnabled;


    const bool m_ProfilingEnabled;


    const ProfilingDetailsMethod m_OutputNetworkDetailsMethod;


    const MemorySource m_InputSource;

    const MemorySource m_OutputSource;


    const bool m_ExternalMemoryManagementEnabled;


    virtual ~INetworkProperties() {}

};


using namespace armnn::experimental;


class IRuntime

{

public:

    struct CreationOptions

    {

        CreationOptions()

            : m_GpuAccTunedParameters(nullptr)

            , m_EnableGpuProfiling(false)

            , m_DynamicBackendsPath("")

            , m_ProtectedMode(false)

            , m_CustomAllocatorMap()

            , m_MemoryOptimizerStrategyMap()

        {}


        /// If set, uses the GpuAcc tuned parameters from the given object when executing GPU workloads.

        /// It will also be updated with new tuned parameters if it is configured to do so.

        std::shared_ptr<IGpuAccTunedParameters> m_GpuAccTunedParameters;


        /// Setting this flag will allow the user to obtain GPU profiling information from the runtime.

        bool m_EnableGpuProfiling;


        /// Setting this value will override the paths set by the DYNAMIC_BACKEND_PATHS compiler directive

        /// Only a single path is allowed for the override

        /// It defines the path to search for any [dynamic backend libraries](src/dynamic/README.md).

        std::string m_DynamicBackendsPath;


        /// Setting this flag will allow the user to create the Runtime in protected mode.

        /// It will run all the inferences on protected memory and will make sure that

        /// INetworkProperties::m_ImportEnabled set to true with MemorySource::DmaBufProtected option

        /// This requires that the backend supports Protected Memory and has an allocator capable of

        /// allocating Protected Memory associated with it.

        bool m_ProtectedMode;


        /// @brief A map to define a custom memory allocator for specific backend Ids.

        ///

        /// @details  A Custom Allocator is used for allocation of working memory in the backends.

        /// Set this if you need to take control of how memory is allocated on a backend. Required for

        /// Protected Mode in order to correctly allocate Protected Memory

        ///

        /// @note Only supported for GpuAcc

        std::map<BackendId, std::shared_ptr<ICustomAllocator>> m_CustomAllocatorMap;


        /// @brief A map to define a custom memory optimizer strategy for specific backend Ids.

        ///

        /// @details  A Memory Optimizer Strategy provides a solution to an abstract representation of

        /// a network's memory requirements. This can also be used to return a pre-computed solution

        /// for a specific network. Set this if you want to implement a Custom Memory Optimizer Strategy

        /// for a given backend.

        std::map<BackendId, std::shared_ptr<IMemoryOptimizerStrategy>> m_MemoryOptimizerStrategyMap;


        struct ExternalProfilingOptions

        {

            ExternalProfilingOptions()

                : m_EnableProfiling(false)

                , m_TimelineEnabled(false)

                , m_OutgoingCaptureFile("")

                , m_IncomingCaptureFile("")

                , m_FileOnly(false)

                , m_CapturePeriod(LOWEST_CAPTURE_PERIOD)

                , m_FileFormat("binary")

                , m_LocalPacketHandlers()

            {}


            /// Indicates whether external profiling is enabled or not.

            bool        m_EnableProfiling;

            /// Indicates whether external timeline profiling is enabled or not.

            bool        m_TimelineEnabled;

            /// Path to a file in which outgoing timeline profiling messages will be stored.

            std::string m_OutgoingCaptureFile;

            /// Path to a file in which incoming timeline profiling messages will be stored.

            std::string m_IncomingCaptureFile;

            /// Enable profiling output to file only.

            bool        m_FileOnly;

            /// The duration at which captured profiling messages will be flushed.

            uint32_t    m_CapturePeriod;

            /// The format of the file used for outputting profiling data.

            std::string m_FileFormat;

            std::vector<arm::pipe::ILocalPacketHandlerSharedPtr> m_LocalPacketHandlers;

        };

        ExternalProfilingOptions m_ProfilingOptions;


        /// Pass backend specific options.

        ///

        /// For example, to enable GpuAcc tuning add the following

        /// ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~.cpp

        /// m_BackendOption.emplace_back(

        ///     BackendOptions{"GpuAcc",

        ///       {

        ///         {"TuningLevel", 2},

        ///         {"TuningFile", filename}

        ///         {"MemoryOptimizerStrategy", strategyname}

        ///       }

        ///     });

        /// ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

        /// Execute representative workloads through the runtime to generate tuning data.

        /// The tuning file is written once the runtime is destroyed


        /// To execute with the tuning data, start up with just the tuning file specified.

        /// ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~.cpp

        /// m_BackendOption.emplace_back(

        ///     BackendOptions{"GpuAcc",

        ///       {

        ///         {"TuningFile", filename}

        ///       }

        ///     });

        /// ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~


        /// The following backend options are available:

        /// AllBackends:

        ///   "MemoryOptimizerStrategy" : string [stategynameString]

        ///    (Existing Memory Optimizer Strategies: ConstantMemoryStrategy)

        /// GpuAcc:

        ///   "TuningLevel" : int [0..3] (0=UseOnly(default) | 1=RapidTuning | 2=NormalTuning | 3=ExhaustiveTuning)

        ///   "TuningFile" : string [filenameString]

        ///   "KernelProfilingEnabled" : bool [true | false]

        std::vector<BackendOptions> m_BackendOptions;

    };


    static IRuntime* CreateRaw(const CreationOptions& options);

    static IRuntimePtr Create(const CreationOptions& options);

    static void Destroy(IRuntime* runtime);


    /// Loads a complete network into the IRuntime.

    /// @param [out] networkIdOut - Unique identifier for the network is returned in this reference.

    /// @param [in] network - Complete network to load into the IRuntime.

    /// The runtime takes ownership of the network once passed in.

    /// @return armnn::Status

    Status LoadNetwork(NetworkId& networkIdOut, IOptimizedNetworkPtr network);


    /// Load a complete network into the IRuntime.

    /// @param [out] networkIdOut Unique identifier for the network is returned in this reference.

    /// @param [in] network Complete network to load into the IRuntime.

    /// @param [out] errorMessage Error message if there were any errors.

    /// The runtime takes ownership of the network once passed in.

    /// @return armnn::Status

    Status LoadNetwork(NetworkId& networkIdOut,

                       IOptimizedNetworkPtr network,

                       std::string& errorMessage);


    Status LoadNetwork(NetworkId& networkIdOut,

                       IOptimizedNetworkPtr network,

                       std::string& errorMessage,

                       const INetworkProperties& networkProperties);


    TensorInfo GetInputTensorInfo(NetworkId networkId, LayerBindingId layerId) const;

    TensorInfo GetOutputTensorInfo(NetworkId networkId, LayerBindingId layerId) const;


    /// ImportInputs separates the importing and mapping of InputTensors from network execution.

    /// Allowing for a set of InputTensors to be imported and mapped once, but used in execution many times.

    /// This function is not thread safe and must not be used while other threads are calling Execute().

    /// No exceptions are thrown for failed imports. It is the caller's responsibility to check whether

    /// tensors have been successfully imported by comparing returned ids with those passed in the InputTensors.

    /// Whether a tensor can be imported or not is backend specific.

    std::vector<ImportedInputId> ImportInputs(NetworkId networkId, const InputTensors& inputTensors,

                                              MemorySource forceImportMemorySource = MemorySource::Undefined);


    /// ImportOutputs separates the importing and mapping of OutputTensors from network execution.

    /// Allowing for a set of OutputTensors to be imported and mapped once, but used in execution many times.

    /// This function is not thread safe and must not be used while other threads are calling Execute().

    /// No exceptions are thrown for failed imports. It is the caller's responsibility to check whether

    /// tensors have been successfully imported by comparing returned ids with those passed in the OutputTensors.

    /// Whether a tensor can be imported or not is backend specific.

    std::vector<ImportedOutputId> ImportOutputs(NetworkId networkId, const OutputTensors& outputTensors,

                                                MemorySource forceImportMemorySource = MemorySource::Undefined);


    /// Un-import and delete the imported InputTensor/s

    /// This function is not thread safe and must not be used while other threads are calling Execute().

    /// Only compatible with AsyncEnabled networks

    void ClearImportedInputs(NetworkId networkId, const std::vector<ImportedInputId> inputIds);


    /// Un-import and delete the imported OutputTensor/s

    /// This function is not thread safe and must not be used while other threads are calling Execute().

    /// Only compatible with AsyncEnabled networks

    void ClearImportedOutputs(NetworkId networkId, const std::vector<ImportedOutputId> outputIds);


    /// Evaluates a network using input in inputTensors and outputs filled into outputTensors

    Status EnqueueWorkload(NetworkId networkId,

                           const InputTensors& inputTensors,

                           const OutputTensors& outputTensors,

                           std::vector<ImportedInputId> preImportedInputIds = {},

                           std::vector<ImportedOutputId> preImportedOutputIds = {});


    /// This is an experimental function.

    /// Evaluates a network using input in inputTensors and outputs filled into outputTensors.

    /// This function performs a thread safe execution of the network. Returns once execution is complete.

    /// Will block until this and any other thread using the same workingMem object completes.

    Status Execute(IWorkingMemHandle& workingMemHandle,

                   const InputTensors& inputTensors,

                   const OutputTensors& outputTensors,

                   std::vector<ImportedInputId> preImportedInputs = {},

                   std::vector<ImportedOutputId> preImportedOutputs = {});


    /// Unloads a network from the IRuntime.

    /// At the moment this only removes the network from the m_Impl->m_Network.

    /// This might need more work in the future to be AndroidNN compliant.

    /// @param [in] networkId - Unique identifier for the network to be unloaded. Generated in LoadNetwork().

    /// @return armnn::Status

    Status UnloadNetwork(NetworkId networkId);


    const IDeviceSpec& GetDeviceSpec() const;


    /// Create a new unique WorkingMemHandle object. Create multiple handles if you wish to have

    /// overlapped Execution by calling this function from different threads.

    std::unique_ptr<IWorkingMemHandle> CreateWorkingMemHandle(NetworkId networkId);


    /// Gets the profiler corresponding to the given network id.

    /// @param networkId The id of the network for which to get the profile.

    /// @return A pointer to the requested profiler, or nullptr if not found.

    const std::shared_ptr<IProfiler> GetProfiler(NetworkId networkId) const;


    /// Registers a callback function to debug layers performing custom computations on intermediate tensors.

    /// @param networkId The id of the network to register the callback.

    /// @param func callback function to pass to the debug layer.

    void RegisterDebugCallback(NetworkId networkId, const DebugCallbackFunction& func);


protected:

    IRuntime();

    IRuntime(const IRuntime::CreationOptions& options);

    ~IRuntime();


    std::unique_ptr<RuntimeImpl> pRuntimeImpl;

};


/// The following API is replaced by the backend options API.

using IGpuAccTunedParametersPtr = std::shared_ptr<IGpuAccTunedParameters>;


/// Manages a set of GpuAcc parameters which have been tuned for maximum performance.

/// Passes an instance of this object to the IRuntime::Create() method (via IRuntime::CreationOptions) to use it

/// for all GPU workload execution.

///

/// Can be created in two modes:

///     - In UseTunedParameters mode, the parameters stored in this object are used to execute GPU workloads.

///     - In UpdateTunedParameters mode, additionally, whenever a GPU workload is executed for the first time, the

///       optimum parameters will be found and stored in this object. WARNING - This tuning can be slow.

///

/// The parameters can be loaded from and saved to a file so that you can first run a slow initial read-write

/// execution, save the parameters for later and then run fast read-only executions using the optimised parameters.

class IGpuAccTunedParameters

{

public:

    enum class Mode

    {

        UseTunedParameters,

        UpdateTunedParameters

    };


    enum class TuningLevel

    {

        Rapid = 1,

        Normal = 2,

        Exhaustive = 3

    };


    /// Creates an IClTunedParameters with the given mode.

    /// @{

    static IGpuAccTunedParameters* CreateRaw(Mode mode, TuningLevel tunerMode);

    static IGpuAccTunedParametersPtr Create(Mode mode, TuningLevel tunerMode);

    /// @}

    static void Destroy(IGpuAccTunedParameters* params);


    /// Loads an existing set of tuned parameters from the given file.

    /// If there is an error loading the file, an armnn::Exception is thrown.

    virtual void Load(const char* filename) = 0;


    /// Saves the current set of tuned parameters to the given file.

    /// If there is an error saving to the file, an armnn::Exception is thrown.

    virtual void Save(const char* filename) const = 0;


protected:

    virtual ~IGpuAccTunedParameters() {};

};


} // namespace armnn