From e2af6f4322a1e2b8b3c391fb721a6a80c281477f Mon Sep 17 00:00:00 2001 From: Narumol Prangnawarat Date: Fri, 28 Jan 2022 17:59:18 +0000 Subject: IVGCVSW-6552 Add support of aligned host memory * Add AllocatedData functions to OutputHandler * Enable import aligned memory in ImportInputs * Enable import aligned memory in ImportOutputs * Allow to import input and output if the memory is aligned * Implement Reconfigure function on ClConvolution2dWorkload * End-to-end test on Ref and Cl to ensure that input and output memory are imported when aligned Signed-off-by: Narumol Prangnawarat Change-Id: I9e5e4c26d1ac2f1d806803ade5f64c6479c51718 --- src/armnn/LoadedNetwork.cpp | 378 +++++++++++++++++++++++++++++++++++--------- src/armnn/LoadedNetwork.hpp | 15 +- src/armnn/OutputHandler.hpp | 5 + src/armnn/Runtime.cpp | 34 ++-- src/armnn/Runtime.hpp | 10 +- 5 files changed, 351 insertions(+), 91 deletions(-) (limited to 'src/armnn') diff --git a/src/armnn/LoadedNetwork.cpp b/src/armnn/LoadedNetwork.cpp index 1d1aae53a5..45891f7dc3 100644 --- a/src/armnn/LoadedNetwork.cpp +++ b/src/armnn/LoadedNetwork.cpp @@ -314,21 +314,22 @@ LoadedNetwork::LoadedNetwork(std::unique_ptr net, { if (layer->GetNumInputSlots() >= 1) { - unsigned int slotIndex = 0; + unsigned int inputSlotIndex = 0; for (auto& inputSlot : layer->GetInputSlots()) { if (inputSlot.GetOwningLayer().GetType() == LayerType::Input) { - m_InputWorkloadSlotPairs.push_back( - std::make_pair(m_WorkloadQueue.size(), slotIndex)); + auto inputLayer = PolymorphicDowncast(&inputSlot.GetOwningLayer()); + m_InputWorkloadSlotPairs[inputLayer->GetBindingId()] = + std::make_pair(m_WorkloadQueue.size(), inputSlotIndex); } - ++slotIndex; + ++inputSlotIndex; } } if (layer->GetNumOutputSlots() >= 1) { - unsigned int slotIndex = 0; + unsigned int outputSlotIndex = 0; for (auto& outputSlot : layer->GetOutputSlots()) { for (unsigned int i = 0; i < outputSlot.GetNumConnections(); i++) @@ -337,12 +338,14 @@ LoadedNetwork::LoadedNetwork(std::unique_ptr net, // Add its index within layer->GetOutputSlots() to m_OutputWorkloadSlotPairs if (outputSlot.GetConnection(i)->GetOwningLayer().GetType() == LayerType::Output) { - m_OutputWorkloadSlotPairs.push_back( - std::make_pair(m_WorkloadQueue.size(), slotIndex)); + auto outputLayer = PolymorphicDowncast( + &outputSlot.GetConnection(i)->GetOwningLayer()); + m_OutputWorkloadSlotPairs[outputLayer->GetBindingId()] = + std::make_pair(m_WorkloadQueue.size(), outputSlotIndex); continue; } } - ++slotIndex; + ++outputSlotIndex; } } m_WorkloadQueue.push_back(std::move(workload)); @@ -667,7 +670,9 @@ private: } Status LoadedNetwork::EnqueueWorkload(const InputTensors& inputTensors, - const OutputTensors& outputTensors) + const OutputTensors& outputTensors, + std::vector preImportedInputIds, + std::vector preImportedOutputIds) { const Graph& graph = m_OptimizedNetwork->pOptimizedNetworkImpl->GetGraph(); @@ -691,10 +696,26 @@ Status LoadedNetwork::EnqueueWorkload(const InputTensors& inputTensors, ARMNN_SCOPED_PROFILING_EVENT(Compute::Undefined, "PrepareInputs"); m_InputQueue.clear(); m_InputQueue.reserve(graph.GetNumInputs()); + for (const BindableLayer* inputLayer : graph.GetInputLayers()) { - const TensorPin& pin = workloadData.GetInputTensorPin(inputLayer->GetBindingId()); - EnqueueInput(*inputLayer, pin.GetTensorHandle(), pin.GetTensorInfo()); + if (preImportedInputIds.size() != m_PreImportedInputHandles.size()) + { + throw InvalidArgumentException("Invalid number of preImportedInputIds"); + } + auto layerBindingId = inputLayer->GetBindingId(); + auto it = std::find_if(preImportedInputIds.begin(), preImportedInputIds.end(), + [=](auto preImportedInputId) + { + return m_PreImportedInputHandles[preImportedInputId].m_LayerBindingId == layerBindingId; + }); + + if (it == preImportedInputIds.end()) + { + // InputTensorHandle is not imported yet, process to enqueue input + const TensorPin& pin = workloadData.GetInputTensorPin(inputLayer->GetBindingId()); + EnqueueInput(*inputLayer, pin.GetTensorHandle(), pin.GetTensorInfo()); + } } } @@ -703,12 +724,57 @@ Status LoadedNetwork::EnqueueWorkload(const InputTensors& inputTensors, ARMNN_SCOPED_PROFILING_EVENT(Compute::Undefined, "PrepareOutputs"); m_OutputQueue.clear(); m_OutputQueue.reserve(graph.GetNumOutputs()); + for (const BindableLayer* outputLayer : graph.GetOutputLayers()) { + if (preImportedOutputIds.size() != m_PreImportedOutputHandles.size()) + { + throw InvalidArgumentException("Invalid number of preImportedOutputIds"); + } + auto layerBindingId = outputLayer->GetBindingId(); + auto it = std::find_if(preImportedOutputIds.begin(), preImportedOutputIds.end(), + [=](auto preImportedOutputId) + { + return m_PreImportedOutputHandles[preImportedOutputId].m_LayerBindingId == layerBindingId; + }); + const TensorPin& pin = workloadData.GetOutputTensorPin(outputLayer->GetBindingId()); - EnqueueOutput(*outputLayer, pin.GetTensorHandle(), pin.GetTensorInfo()); + + if (it == preImportedOutputIds.end()) + { + // OutputTensorHandle is not imported yet, process to enqueue Output + EnqueueOutput(*outputLayer, pin.GetTensorHandle(), pin.GetTensorInfo()); + } + else + { + // Insert synchronization workload for the imported output + OutputQueueDescriptor outputQueueDescriptor; + WorkloadInfo info; + + outputQueueDescriptor.m_Outputs.push_back(pin.GetTensorHandle()); + info.m_OutputTensorInfos.push_back(pin.GetTensorInfo()); + + // Gets the output handler from the previous node. + const OutputHandler& outputHandler = + outputLayer->GetInputSlots()[0].GetConnectedOutputSlot()->GetOutputHandler(); + + const TensorInfo& inputTensorInfo = outputHandler.GetTensorInfo(); + ITensorHandle* inputTensorHandle = outputHandler.GetData(); + ARMNN_ASSERT_MSG(inputTensorHandle != nullptr, "Data should have been allocated."); + MemSyncQueueDescriptor syncDesc; + syncDesc.m_Inputs.push_back(inputTensorHandle); + info.m_InputTensorInfos.push_back(inputTensorInfo); + auto syncWorkload = std::make_unique(syncDesc, info); + ARMNN_ASSERT_MSG(syncWorkload, "No sync workload created"); + m_OutputQueue.push_back(move(syncWorkload)); + } } } + // Clear m_PreImportedInputHandles and m_PreImportedOutputHandles + m_PreImportedInputHandles.clear(); + m_PreImportedOutputHandles.clear(); + m_CurImportedInputId = 0; + m_CurImportedOutputId = 0; std::unique_ptr timelineUtils = TimelineUtilityMethods::GetTimelineUtils(m_ProfilingService); @@ -1120,90 +1186,260 @@ const armnn::Tensor GetOutputTensor(const LayerBindingId layerId, const OutputTe throw InvalidArgumentException("Output does not exist."); } -std::vector LoadedNetwork::ImportInputs(const InputTensors& inputTensors) +std::vector LoadedNetwork::ImportInputs(const InputTensors& inputTensors, + MemorySource forceImportMemorySource) { - if (!m_NetworkProperties.m_ImportEnabled) // Try import the input tensor - { - throw MemoryImportException("ImportInputs: Memory Import failed, NetworkProperties.m_ImportEnabled"); - } - - std::vector importedInputs; - Graph& graph = m_OptimizedNetwork->pOptimizedNetworkImpl->GetGraph().TopologicalSort(); - - for (auto inputTensor : inputTensors) + if (!m_NetworkProperties.m_ImportEnabled) { - auto layerBindingId = inputTensor.first; - auto it = std::find_if(graph.GetInputLayers().begin(), graph.GetInputLayers().end(), [=](auto* layer) + // Cannot import if import is not enabled and forceImportMemorySource is undefined + if (forceImportMemorySource == MemorySource::Undefined) { - return layer->GetBindingId() == layerBindingId; - }); - - if (it == graph.GetInputLayers().end()) + throw MemoryImportException("ImportInputs: Memory Import failed, NetworkProperties.m_ImportEnabled"); + } + // If forceImportMemorySource is defined, try import if memory is aligned + if (inputTensors.size() != m_OptimizedNetwork->pOptimizedNetworkImpl->GetGraph().GetNumInputs()) { - throw MemoryImportException(fmt::format("ImportInputs: Memory Import failed, unknown LayerBindingId: {}", - layerBindingId)); + throw MemoryImportException("ImportInputs: Force Import failed, incorrect number of tensors"); } - const Layer* layer = *it; - if (layer->GetType() != LayerType::Input) + std::vector importedInputs; + Graph& graph = m_OptimizedNetwork->pOptimizedNetworkImpl->GetGraph().TopologicalSort(); + for (auto inputTensor : inputTensors) { - throw InvalidArgumentException("ImportInputs: given layer not an InputLayer"); + auto layerBindingId = inputTensor.first; + auto it = std::find_if(graph.GetInputLayers().begin(), graph.GetInputLayers().end(), [=](auto* layer) + { + return layer->GetBindingId() == layerBindingId; + }); + + if (it == graph.GetInputLayers().end()) + { + throw MemoryImportException(fmt::format( + "ImportInputs: Memory Import failed, unknown LayerBindingId: {}", layerBindingId)); + } + + const Layer* layer = *it; + if (layer->GetType() != LayerType::Input) + { + throw InvalidArgumentException("ImportInputs: given layer not an InputLayer"); + } + const OutputSlot& outputSlot = layer->GetOutputSlots()[0]; + ITensorHandleFactory::FactoryId factoryId = outputSlot.GetTensorHandleFactoryId(); + // Get matching import factory Id + ITensorHandleFactory::FactoryId importFactoryId = + m_TensorHandleFactoryRegistry.GetMatchingImportFactoryId(factoryId); + ITensorHandleFactory* importFactory = + m_TensorHandleFactoryRegistry.GetFactory(importFactoryId, forceImportMemorySource); + if (!importFactory) + { + throw MemoryImportException("ImportInputs: Force Import failed, cannot find matching Import Factory"); + } + + OutputHandler& handler = const_cast(layer->GetOutputHandler(0)); + handler.SetAllocatedData(); + handler.CreateTensorHandles(*importFactory, false); + ITensorHandle* outputTensorHandle = handler.GetData(); + std::unique_ptr passThroughTensorHandle = + std::make_unique(inputTensor.second.GetInfo(), + inputTensor.second.GetMemoryArea()); + // Check if the input memory can be imported + if (outputTensorHandle->CanBeImported(passThroughTensorHandle->Map(), forceImportMemorySource)) + { + passThroughTensorHandle->Unmap(); + if (outputTensorHandle->Import(passThroughTensorHandle->Map(), forceImportMemorySource)) + { + passThroughTensorHandle->Unmap(); + try + { + m_WorkloadQueue[m_InputWorkloadSlotPairs[layerBindingId].first].get()->ReplaceInputTensorHandle( + outputTensorHandle, m_InputWorkloadSlotPairs[layerBindingId].second); + importedInputs.push_back(m_CurImportedInputId++); + // For force import, we want OutputHandler to own the TensorHandle, + // so we do not move the TensorHandle to m_PreImportedInputHandles as in AsyncEnabled networks + ImportedTensorHandlePin importedTensorHandlePin{layerBindingId, nullptr}; + m_PreImportedInputHandles.push_back(std::move(importedTensorHandlePin)); + } + catch(armnn::UnimplementedException& e) + { + IgnoreUnused(e); + // Method not implement, cannot use import tensor and have to use allocated data instead + handler.UseAllocatedData(); + } + } + } + else + { + // Cannot import, use allocated data + handler.UseAllocatedData(); + } + } - auto& backend = m_Backends.at(layer->GetBackendId()); - if (!HasCapability(BackendOptions::BackendOption{"PreImportIOTensors", true}, backend->GetCapabilities())) + return importedInputs; + } + else + { + // Import when the import of network properties is enabled + std::vector importedInputs; + Graph& graph = m_OptimizedNetwork->pOptimizedNetworkImpl->GetGraph().TopologicalSort(); + + for (auto inputTensor : inputTensors) { - std::string er = backend->GetId(); - er += " does not have PreImportIOTensors capability"; - throw BackendCapabilityException(er); - } + auto layerBindingId = inputTensor.first; + auto it = std::find_if(graph.GetInputLayers().begin(), graph.GetInputLayers().end(), [=](auto* layer) + { + return layer->GetBindingId() == layerBindingId; + }); + + if (it == graph.GetInputLayers().end()) + { + throw MemoryImportException(fmt::format( + "ImportInputs: Memory Import failed, unknown LayerBindingId: {}", layerBindingId)); + } - const OutputSlot& outputSlot = layer->GetOutputSlots()[0]; + const Layer* layer = *it; + if (layer->GetType() != LayerType::Input) + { + throw InvalidArgumentException("ImportInputs: given layer not an InputLayer"); + } - ITensorHandleFactory::FactoryId factoryId = outputSlot.GetTensorHandleFactoryId(); - const TensorInfo& tensorInfo = outputSlot.GetTensorInfo(); + auto& backend = m_Backends.at(layer->GetBackendId()); + if (!HasCapability(BackendOptions::BackendOption{"PreImportIOTensors", true}, backend->GetCapabilities())) + { + std::string er = backend->GetId(); + er += " does not have PreImportIOTensors capability"; + throw BackendCapabilityException(er); + } - ITensorHandleFactory* handleFactory = m_TensorHandleFactoryRegistry.GetFactory(factoryId); - ARMNN_ASSERT(handleFactory); + const OutputSlot& outputSlot = layer->GetOutputSlots()[0]; - ImportedTensorHandlePin importedTensorHandlePin{layerBindingId, - handleFactory->CreateTensorHandle(tensorInfo, false)}; + ITensorHandleFactory::FactoryId factoryId = outputSlot.GetTensorHandleFactoryId(); + const TensorInfo& tensorInfo = outputSlot.GetTensorInfo(); - ITensorHandle* tensorHandle = importedTensorHandlePin.m_TensorHandle.get(); + ITensorHandleFactory* handleFactory = m_TensorHandleFactoryRegistry.GetFactory(factoryId); + ARMNN_ASSERT(handleFactory); - if (!CheckFlag(tensorHandle->GetImportFlags(), m_NetworkProperties.m_InputSource)) - { - throw MemoryImportException( - fmt::format("ImportInputs: Memory Import failed, backend: {} does not support importing from source {}" - , factoryId, m_NetworkProperties.m_InputSource)); - } + ImportedTensorHandlePin importedTensorHandlePin{layerBindingId, + handleFactory->CreateTensorHandle(tensorInfo, false)}; + + ITensorHandle* tensorHandle = importedTensorHandlePin.m_TensorHandle.get(); + + if (!CheckFlag(tensorHandle->GetImportFlags(), m_NetworkProperties.m_InputSource)) + { + throw MemoryImportException( + fmt::format("ImportInputs: Memory Import failed, backend: " + "{} does not support importing from source {}" + , factoryId, m_NetworkProperties.m_InputSource)); + } + + std::unique_ptr passThroughTensorHandle = + std::make_unique(inputTensor.second.GetInfo(), + inputTensor.second.GetMemoryArea()); + + if (tensorHandle->Import(passThroughTensorHandle->Map(), m_NetworkProperties.m_InputSource)) + { + importedInputs.push_back(m_CurImportedInputId++); + passThroughTensorHandle->Unmap(); + } + else + { + passThroughTensorHandle->Unmap(); + throw MemoryImportException("ImportInputs: Memory Import failed"); + } - std::unique_ptr passThroughTensorHandle = - std::make_unique(inputTensor.second.GetInfo(), - inputTensor.second.GetMemoryArea()); + m_PreImportedInputHandles.push_back(std::move(importedTensorHandlePin)); + } + return importedInputs; + } +} - if (tensorHandle->Import(passThroughTensorHandle->Map(), m_NetworkProperties.m_InputSource)) +std::vector LoadedNetwork::ImportOutputs(const OutputTensors& outputTensors, + MemorySource forceImportMemorySource) +{ + if (!m_NetworkProperties.m_ExportEnabled) + { + // Cannot import if import is not enabled and forceImportMemorySource is undefined + if (forceImportMemorySource == MemorySource::Undefined) { - importedInputs.push_back(m_CurImportedInputId++); - passThroughTensorHandle->Unmap(); + throw MemoryImportException("ImportOutputs: Memory Import failed, NetworkProperties.m_ImportEnabled"); } - else + // If forceImportMemorySource is defined, try import if memory is aligned + if (outputTensors.size() != m_OptimizedNetwork->pOptimizedNetworkImpl->GetGraph().GetNumOutputs()) { - passThroughTensorHandle->Unmap(); - throw MemoryImportException("ImportInputs: Memory Import failed"); + throw MemoryImportException("ImportOutputs: Force Import failed, incorrect number of tensors"); } + std::vector importedOutputs; + Graph& graph = m_OptimizedNetwork->pOptimizedNetworkImpl->GetGraph().TopologicalSort(); + for (auto outputTensor : outputTensors) + { + auto layerBindingId = outputTensor.first; + auto it = std::find_if(graph.GetOutputLayers().begin(), graph.GetOutputLayers().end(), [=](auto* layer) + { + return layer->GetBindingId() == layerBindingId; + }); - m_PreImportedInputHandles.push_back(std::move(importedTensorHandlePin)); - } + if (it == graph.GetOutputLayers().end()) + { + throw MemoryImportException(fmt::format("ImportOutputs: Memory Import failed, " + "unknown LayerBindingId: {}", + layerBindingId)); + } - return importedInputs; -} + const Layer* layer = *it; + if (layer->GetType() != LayerType::Output) + { + throw InvalidArgumentException("ImportOutputs: given layer not an OutputLayer"); + } -std::vector LoadedNetwork::ImportOutputs(const OutputTensors& outputTensors) -{ - if (!m_NetworkProperties.m_ExportEnabled) // Try import the output tensor - { - throw MemoryImportException("ImportOutputs: Memory Import failed, NetworkProperties.m_ImportEnabled"); + const OutputSlot* outputSlot = layer->GetInputSlots()[0].GetConnectedOutputSlot(); + ITensorHandleFactory::FactoryId factoryId = outputSlot->GetTensorHandleFactoryId(); + ITensorHandleFactory::FactoryId importFactoryId = + m_TensorHandleFactoryRegistry.GetMatchingImportFactoryId(factoryId); + ITensorHandleFactory* importFactory = + m_TensorHandleFactoryRegistry.GetFactory(importFactoryId, forceImportMemorySource); + if (!importFactory) + { + throw MemoryImportException("ImportOutputs: Force Import failed, cannot find matching Import Factory"); + } + + OutputHandler& outputHandler = + const_cast(layer->GetInputSlots()[0].GetConnectedOutputSlot()->GetOutputHandler()); + outputHandler.SetAllocatedData(); + ITensorHandle* inputTensorHandle = outputHandler.GetData(); + outputHandler.CreateTensorHandles(*importFactory, false); + inputTensorHandle = outputHandler.GetData(); + + // Check if the output memory can be imported + if (inputTensorHandle->CanBeImported(outputTensor.second.GetMemoryArea(), forceImportMemorySource)) + { + if (inputTensorHandle->Import(outputTensor.second.GetMemoryArea(), forceImportMemorySource)) + { + try + { + m_WorkloadQueue[m_OutputWorkloadSlotPairs[layerBindingId].first].get()-> + ReplaceOutputTensorHandle(inputTensorHandle, + m_OutputWorkloadSlotPairs[layerBindingId].second); + importedOutputs.push_back(m_CurImportedOutputId++); + // For force import, we want OutputHandler to own the TensorHandle, + // so we do not move the TensorHandle to m_PreImportedOutputHandles as in AsyncEnabled networks + ImportedTensorHandlePin importedTensorHandlePin{layerBindingId, nullptr}; + m_PreImportedOutputHandles.push_back(std::move(importedTensorHandlePin)); + } + catch(armnn::UnimplementedException& e) + { + IgnoreUnused(e); + // Method not implement, cannot use import tensor and have to use allocated data instead + outputHandler.UseAllocatedData(); + } + } + } + else + { + // Cannot import, use allocated memory + outputHandler.UseAllocatedData(); + } + } + return importedOutputs; } std::vector importedOutputs; diff --git a/src/armnn/LoadedNetwork.hpp b/src/armnn/LoadedNetwork.hpp index 9de6307938..f637dec8eb 100644 --- a/src/armnn/LoadedNetwork.hpp +++ b/src/armnn/LoadedNetwork.hpp @@ -55,14 +55,18 @@ public: TensorInfo GetInputTensorInfo(LayerBindingId layerId) const; TensorInfo GetOutputTensorInfo(LayerBindingId layerId) const; - std::vector ImportInputs(const InputTensors& inputTensors); - std::vector ImportOutputs(const OutputTensors& outputTensors); + std::vector ImportInputs(const InputTensors& inputTensors, + MemorySource forceImportMemorySource = MemorySource::Undefined); + std::vector ImportOutputs(const OutputTensors& outputTensors, + MemorySource forceImportMemorySource = MemorySource::Undefined); void ClearImportedInputs(const std::vector inputIds); void ClearImportedOutputs(const std::vector outputIds); /// Single thread execution of the loaded network - Status EnqueueWorkload(const InputTensors& inputTensors, const OutputTensors& outputTensors); + Status EnqueueWorkload(const InputTensors& inputTensors, const OutputTensors& outputTensors, + std::vector preImportedInputIds = {}, + std::vector preImportedOutputIds = {}); /// Thread safe execution of the loaded network Status Execute(const InputTensors& inputTensors, @@ -200,8 +204,9 @@ private: // A set of vectors to record the workload queue indexes and their corresponding Input/Output Slot indexes // which are connected to Inputs and Outputs for the network. - std::vector> m_InputWorkloadSlotPairs; - std::vector> m_OutputWorkloadSlotPairs; + std::unordered_map> m_InputWorkloadSlotPairs; + std::unordered_map> m_OutputWorkloadSlotPairs; + }; } diff --git a/src/armnn/OutputHandler.hpp b/src/armnn/OutputHandler.hpp index 41a49af031..3fd2519ed5 100644 --- a/src/armnn/OutputHandler.hpp +++ b/src/armnn/OutputHandler.hpp @@ -50,10 +50,15 @@ public: void SetData(std::unique_ptr data) { m_TensorHandle = std::move(data); } + void SetAllocatedData() { m_AllocatedTensorHandle = std::move(m_TensorHandle); } + + void UseAllocatedData() { m_TensorHandle = std::move(m_AllocatedTensorHandle); } + /// @brief Returns true if SetTensorInfo() has been called at least once on this. bool IsTensorInfoSet() const { return m_bTensorInfoSet; } private: std::unique_ptr m_TensorHandle; + std::unique_ptr m_AllocatedTensorHandle; TensorInfo m_TensorInfo; bool m_bTensorInfoSet = false; }; diff --git a/src/armnn/Runtime.cpp b/src/armnn/Runtime.cpp index 2752e7209c..95fb8a3abb 100644 --- a/src/armnn/Runtime.cpp +++ b/src/armnn/Runtime.cpp @@ -77,14 +77,16 @@ armnn::TensorInfo IRuntime::GetOutputTensorInfo(NetworkId networkId, LayerBindin return pRuntimeImpl->GetOutputTensorInfo(networkId, layerId); } -std::vector IRuntime::ImportInputs(NetworkId networkId, const InputTensors& inputTensors) +std::vector IRuntime::ImportInputs(NetworkId networkId, const InputTensors& inputTensors, + MemorySource forceImportMemorySource) { - return pRuntimeImpl->ImportInputs(networkId, inputTensors); + return pRuntimeImpl->ImportInputs(networkId, inputTensors, forceImportMemorySource); } -std::vector IRuntime::ImportOutputs(NetworkId networkId, const OutputTensors& outputTensors) +std::vector IRuntime::ImportOutputs(NetworkId networkId, const OutputTensors& outputTensors, + MemorySource forceImportMemorySource) { - return pRuntimeImpl->ImportOutputs(networkId, outputTensors); + return pRuntimeImpl->ImportOutputs(networkId, outputTensors, forceImportMemorySource); } void IRuntime::ClearImportedInputs(NetworkId networkId, const std::vector inputIds) @@ -98,9 +100,12 @@ void IRuntime::ClearImportedOutputs(NetworkId networkId, const std::vector preImportedInputIds, + std::vector preImportedOutputIds) { - return pRuntimeImpl->EnqueueWorkload(networkId, inputTensors, outputTensors); + return pRuntimeImpl->EnqueueWorkload(networkId, inputTensors, outputTensors, + preImportedInputIds, preImportedOutputIds); } Status IRuntime::Execute(IWorkingMemHandle& workingMemHandle, @@ -566,14 +571,16 @@ TensorInfo RuntimeImpl::GetOutputTensorInfo(NetworkId networkId, LayerBindingId return GetLoadedNetworkPtr(networkId)->GetOutputTensorInfo(layerId); } -std::vector RuntimeImpl::ImportInputs(NetworkId networkId, const InputTensors& inputTensors) +std::vector RuntimeImpl::ImportInputs(NetworkId networkId, const InputTensors& inputTensors, + MemorySource forceImportMemorySource) { - return GetLoadedNetworkPtr(networkId)->ImportInputs(inputTensors); + return GetLoadedNetworkPtr(networkId)->ImportInputs(inputTensors, forceImportMemorySource); } -std::vector RuntimeImpl::ImportOutputs(NetworkId networkId, const OutputTensors& outputTensors) +std::vector RuntimeImpl::ImportOutputs(NetworkId networkId, const OutputTensors& outputTensors, + MemorySource forceImportMemorySource) { - return GetLoadedNetworkPtr(networkId)->ImportOutputs(outputTensors); + return GetLoadedNetworkPtr(networkId)->ImportOutputs(outputTensors, forceImportMemorySource); } void RuntimeImpl::ClearImportedInputs(NetworkId networkId, const std::vector inputIds) @@ -587,7 +594,9 @@ void RuntimeImpl::ClearImportedOutputs(NetworkId networkId, const std::vector preImportedInputIds, + std::vector preImportedOutputIds) { const auto startTime = armnn::GetTimeNow(); @@ -617,7 +626,8 @@ Status RuntimeImpl::EnqueueWorkload(NetworkId networkId, } lastId=networkId; - auto status = loadedNetwork->EnqueueWorkload(inputTensors, outputTensors); + auto status = loadedNetwork->EnqueueWorkload(inputTensors, outputTensors, + preImportedInputIds, preImportedOutputIds); ARMNN_LOG(info) << "Execution time: " << std::setprecision(2) << std::fixed << armnn::GetTimeDuration(startTime).count() << " ms."; diff --git a/src/armnn/Runtime.hpp b/src/armnn/Runtime.hpp index 4052bb6d3a..bd37013ad0 100644 --- a/src/armnn/Runtime.hpp +++ b/src/armnn/Runtime.hpp @@ -55,8 +55,10 @@ public: armnn::TensorInfo GetInputTensorInfo(NetworkId networkId, LayerBindingId layerId) const; armnn::TensorInfo GetOutputTensorInfo(NetworkId networkId, LayerBindingId layerId) const; - std::vector ImportInputs(NetworkId networkId, const InputTensors& inputTensors); - std::vector ImportOutputs(NetworkId networkId, const OutputTensors& outputTensors); + std::vector ImportInputs(NetworkId networkId, const InputTensors& inputTensors, + MemorySource forceImportMemorySource = MemorySource::Undefined); + std::vector ImportOutputs(NetworkId networkId, const OutputTensors& outputTensors, + MemorySource forceImportMemorySource = MemorySource::Undefined); void ClearImportedInputs(NetworkId networkId, const std::vector inputIds); void ClearImportedOutputs(NetworkId networkId, const std::vector outputIds); @@ -64,7 +66,9 @@ public: // Evaluates network using input in inputTensors, outputs filled into outputTensors. Status EnqueueWorkload(NetworkId networkId, const InputTensors& inputTensors, - const OutputTensors& outputTensors); + const OutputTensors& outputTensors, + std::vector preImportedInputIds = {}, + std::vector preImportedOutputIds = {}); /// This is an experimental function. /// Evaluates a network using input in inputTensors and outputs filled into outputTensors. -- cgit v1.2.1