// // Copyright © 2017 Arm Ltd and Contributors. All rights reserved. // SPDX-License-Identifier: MIT // #pragma once #include "Network.hpp" #include "LayerFwd.hpp" #include "Profiling.hpp" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include namespace cl { class Context; class CommandQueue; class Device; } namespace armnn { class LoadedNetwork { public: using WorkloadQueue = std::vector>; ~LoadedNetwork() { FreeWorkingMemory(); } /// Create a new unique WorkingMemHandle object. Create multiple handles if you wish to have /// overlapped Execution by calling this function from different threads. std::unique_ptr CreateWorkingMemHandle(NetworkId networkId); TensorInfo GetInputTensorInfo(LayerBindingId layerId) const; TensorInfo GetOutputTensorInfo(LayerBindingId layerId) const; std::vector ImportInputs(const InputTensors& inputTensors, MemorySource forceImportMemorySource = MemorySource::Undefined); std::vector ImportOutputs(const OutputTensors& outputTensors, MemorySource forceImportMemorySource = MemorySource::Undefined); void ClearImportedInputs(const std::vector inputIds); void ClearImportedOutputs(const std::vector outputIds); /// Single thread execution of the loaded network Status EnqueueWorkload(const InputTensors& inputTensors, const OutputTensors& outputTensors, std::vector preImportedInputIds = {}, std::vector preImportedOutputIds = {}); /// Thread safe execution of the loaded network Status Execute(const InputTensors& inputTensors, const OutputTensors& outputTensors, IWorkingMemHandle& workingMemHandle, std::vector preImportedInputs = {}, std::vector preImportedOutputs = {}); static std::unique_ptr MakeLoadedNetwork(std::unique_ptr net, std::string& errorMessage, const INetworkProperties& networkProperties, profiling::ProfilingService& profilingService); // NOTE we return by reference as the purpose of this method is only to provide // access to the private m_Profiler and in theory we should not need to increment // the shared_ptr's reference counter const std::shared_ptr& GetProfiler() const { return m_OptimizedNetwork->GetProfiler(); } void FreeWorkingMemory(); void RegisterDebugCallback(const DebugCallbackFunction& func); void SendNetworkStructure(); bool IsAsyncEnabled() { return m_NetworkProperties.m_AsyncEnabled; } profiling::ProfilingGuid GetNetworkGuid(); private: void AllocateWorkingMemory(std::lock_guard& lock); void AllocateAndExecuteConstantWorkloads(); void AllocateAndExecuteConstantWorkloadsAsync(); std::unordered_map> m_ConstantWorkloads; std::unordered_map m_ConstantTensorHandles; std::unique_ptr m_ConstantStrategy = std::make_unique(); LoadedNetwork(std::unique_ptr net, const INetworkProperties& networkProperties, profiling::ProfilingService& profilingService); void EnqueueInput(const BindableLayer& layer, ITensorHandle* tensorHandle, const TensorInfo& tensorInfo); void EnqueueOutput(const BindableLayer& layer, ITensorHandle* tensorHandle, const TensorInfo& tensorInfo); void EnqueueInput(const ConstTensor& inputTensor, ITensorHandle* inputTensorHandle); void ImportOutputTensor(const Tensor& outputTensor, ITensorHandle* outputTensorHandle); bool Execute(std::unique_ptr& timelineUtils, profiling::ProfilingGuid inferenceGuid); const IWorkloadFactory& GetWorkloadFactory(const Layer& layer) const; inline LayerBindingId ValidateImportedInputID(ImportedInputId id); inline LayerBindingId ValidateImportedOutputID(ImportedOutputId id); void CreateMemoryProfile(); void CreateMemoryProfileAsync(); std::unique_ptr CreateExternalMemoryManger( std::vector, MemorySource>>& tensorMemory); using BackendPtrMap = std::unordered_map; BackendPtrMap m_Backends; std::vector m_BackendMemoryMangers; using WorkloadFactoryMap = std::unordered_map; WorkloadFactoryMap m_WorkloadFactories; std::unique_ptr m_OptimizedNetwork; WorkloadQueue m_InputQueue; WorkloadQueue m_WorkloadQueue; WorkloadQueue m_OutputQueue; mutable std::mutex m_WorkingMemMutex; bool m_IsWorkingMemAllocated = false; INetworkProperties m_NetworkProperties; TensorHandleFactoryRegistry m_TensorHandleFactoryRegistry; profiling::ProfilingService& m_ProfilingService; struct ImportedTensorHandlePin { ImportedTensorHandlePin() {} ImportedTensorHandlePin(LayerBindingId layerBindingId, std::unique_ptr tensorHandle) : m_LayerBindingId(layerBindingId) , m_TensorHandle(std::move(tensorHandle)) {} ImportedTensorHandlePin(ImportedTensorHandlePin&&) = default; ~ImportedTensorHandlePin() { if (m_TensorHandle) { m_TensorHandle->Unimport(); } } LayerBindingId m_LayerBindingId; std::unique_ptr m_TensorHandle; }; std::vector m_PreImportedInputHandles; std::vector m_PreImportedOutputHandles; ImportedInputId m_CurImportedInputId = 0; ImportedInputId m_CurImportedOutputId = 0; std::unordered_map> m_MemBlockMap; std::unordered_map> m_MemBinMap; std::vector m_Tensorhandles; std::vector, MemorySource>> m_TensorMemory; std::unique_ptr m_ExternalMemoryManager; std::unordered_map m_SupportsExternallyManagedMemory; // A set of vectors to record the workload queue indexes and their corresponding Input/Output Slot indexes // which are connected to Inputs and Outputs for the network. std::unordered_map> m_InputWorkloadSlotPairs; std::unordered_map> m_OutputWorkloadSlotPairs; }; }