aboutsummaryrefslogtreecommitdiff
path: root/src/armnn/LoadedNetwork.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'src/armnn/LoadedNetwork.cpp')
-rw-r--r--src/armnn/LoadedNetwork.cpp378
1 files changed, 307 insertions, 71 deletions
diff --git a/src/armnn/LoadedNetwork.cpp b/src/armnn/LoadedNetwork.cpp
index 1d1aae53a5..45891f7dc3 100644
--- a/src/armnn/LoadedNetwork.cpp
+++ b/src/armnn/LoadedNetwork.cpp
@@ -314,21 +314,22 @@ LoadedNetwork::LoadedNetwork(std::unique_ptr<IOptimizedNetwork> net,
{
if (layer->GetNumInputSlots() >= 1)
{
- unsigned int slotIndex = 0;
+ unsigned int inputSlotIndex = 0;
for (auto& inputSlot : layer->GetInputSlots())
{
if (inputSlot.GetOwningLayer().GetType() == LayerType::Input)
{
- m_InputWorkloadSlotPairs.push_back(
- std::make_pair(m_WorkloadQueue.size(), slotIndex));
+ auto inputLayer = PolymorphicDowncast<InputLayer*>(&inputSlot.GetOwningLayer());
+ m_InputWorkloadSlotPairs[inputLayer->GetBindingId()] =
+ std::make_pair(m_WorkloadQueue.size(), inputSlotIndex);
}
- ++slotIndex;
+ ++inputSlotIndex;
}
}
if (layer->GetNumOutputSlots() >= 1)
{
- unsigned int slotIndex = 0;
+ unsigned int outputSlotIndex = 0;
for (auto& outputSlot : layer->GetOutputSlots())
{
for (unsigned int i = 0; i < outputSlot.GetNumConnections(); i++)
@@ -337,12 +338,14 @@ LoadedNetwork::LoadedNetwork(std::unique_ptr<IOptimizedNetwork> net,
// Add its index within layer->GetOutputSlots() to m_OutputWorkloadSlotPairs
if (outputSlot.GetConnection(i)->GetOwningLayer().GetType() == LayerType::Output)
{
- m_OutputWorkloadSlotPairs.push_back(
- std::make_pair(m_WorkloadQueue.size(), slotIndex));
+ auto outputLayer = PolymorphicDowncast<OutputLayer*>(
+ &outputSlot.GetConnection(i)->GetOwningLayer());
+ m_OutputWorkloadSlotPairs[outputLayer->GetBindingId()] =
+ std::make_pair(m_WorkloadQueue.size(), outputSlotIndex);
continue;
}
}
- ++slotIndex;
+ ++outputSlotIndex;
}
}
m_WorkloadQueue.push_back(std::move(workload));
@@ -667,7 +670,9 @@ private:
}
Status LoadedNetwork::EnqueueWorkload(const InputTensors& inputTensors,
- const OutputTensors& outputTensors)
+ const OutputTensors& outputTensors,
+ std::vector<ImportedInputId> preImportedInputIds,
+ std::vector<ImportedOutputId> preImportedOutputIds)
{
const Graph& graph = m_OptimizedNetwork->pOptimizedNetworkImpl->GetGraph();
@@ -691,10 +696,26 @@ Status LoadedNetwork::EnqueueWorkload(const InputTensors& inputTensors,
ARMNN_SCOPED_PROFILING_EVENT(Compute::Undefined, "PrepareInputs");
m_InputQueue.clear();
m_InputQueue.reserve(graph.GetNumInputs());
+
for (const BindableLayer* inputLayer : graph.GetInputLayers())
{
- const TensorPin& pin = workloadData.GetInputTensorPin(inputLayer->GetBindingId());
- EnqueueInput(*inputLayer, pin.GetTensorHandle(), pin.GetTensorInfo());
+ if (preImportedInputIds.size() != m_PreImportedInputHandles.size())
+ {
+ throw InvalidArgumentException("Invalid number of preImportedInputIds");
+ }
+ auto layerBindingId = inputLayer->GetBindingId();
+ auto it = std::find_if(preImportedInputIds.begin(), preImportedInputIds.end(),
+ [=](auto preImportedInputId)
+ {
+ return m_PreImportedInputHandles[preImportedInputId].m_LayerBindingId == layerBindingId;
+ });
+
+ if (it == preImportedInputIds.end())
+ {
+ // InputTensorHandle is not imported yet, process to enqueue input
+ const TensorPin& pin = workloadData.GetInputTensorPin(inputLayer->GetBindingId());
+ EnqueueInput(*inputLayer, pin.GetTensorHandle(), pin.GetTensorInfo());
+ }
}
}
@@ -703,12 +724,57 @@ Status LoadedNetwork::EnqueueWorkload(const InputTensors& inputTensors,
ARMNN_SCOPED_PROFILING_EVENT(Compute::Undefined, "PrepareOutputs");
m_OutputQueue.clear();
m_OutputQueue.reserve(graph.GetNumOutputs());
+
for (const BindableLayer* outputLayer : graph.GetOutputLayers())
{
+ if (preImportedOutputIds.size() != m_PreImportedOutputHandles.size())
+ {
+ throw InvalidArgumentException("Invalid number of preImportedOutputIds");
+ }
+ auto layerBindingId = outputLayer->GetBindingId();
+ auto it = std::find_if(preImportedOutputIds.begin(), preImportedOutputIds.end(),
+ [=](auto preImportedOutputId)
+ {
+ return m_PreImportedOutputHandles[preImportedOutputId].m_LayerBindingId == layerBindingId;
+ });
+
const TensorPin& pin = workloadData.GetOutputTensorPin(outputLayer->GetBindingId());
- EnqueueOutput(*outputLayer, pin.GetTensorHandle(), pin.GetTensorInfo());
+
+ if (it == preImportedOutputIds.end())
+ {
+ // OutputTensorHandle is not imported yet, process to enqueue Output
+ EnqueueOutput(*outputLayer, pin.GetTensorHandle(), pin.GetTensorInfo());
+ }
+ else
+ {
+ // Insert synchronization workload for the imported output
+ OutputQueueDescriptor outputQueueDescriptor;
+ WorkloadInfo info;
+
+ outputQueueDescriptor.m_Outputs.push_back(pin.GetTensorHandle());
+ info.m_OutputTensorInfos.push_back(pin.GetTensorInfo());
+
+ // Gets the output handler from the previous node.
+ const OutputHandler& outputHandler =
+ outputLayer->GetInputSlots()[0].GetConnectedOutputSlot()->GetOutputHandler();
+
+ const TensorInfo& inputTensorInfo = outputHandler.GetTensorInfo();
+ ITensorHandle* inputTensorHandle = outputHandler.GetData();
+ ARMNN_ASSERT_MSG(inputTensorHandle != nullptr, "Data should have been allocated.");
+ MemSyncQueueDescriptor syncDesc;
+ syncDesc.m_Inputs.push_back(inputTensorHandle);
+ info.m_InputTensorInfos.push_back(inputTensorInfo);
+ auto syncWorkload = std::make_unique<SyncMemGenericWorkload>(syncDesc, info);
+ ARMNN_ASSERT_MSG(syncWorkload, "No sync workload created");
+ m_OutputQueue.push_back(move(syncWorkload));
+ }
}
}
+ // Clear m_PreImportedInputHandles and m_PreImportedOutputHandles
+ m_PreImportedInputHandles.clear();
+ m_PreImportedOutputHandles.clear();
+ m_CurImportedInputId = 0;
+ m_CurImportedOutputId = 0;
std::unique_ptr<TimelineUtilityMethods> timelineUtils =
TimelineUtilityMethods::GetTimelineUtils(m_ProfilingService);
@@ -1120,90 +1186,260 @@ const armnn::Tensor GetOutputTensor(const LayerBindingId layerId, const OutputTe
throw InvalidArgumentException("Output does not exist.");
}
-std::vector<ImportedInputId> LoadedNetwork::ImportInputs(const InputTensors& inputTensors)
+std::vector<ImportedInputId> LoadedNetwork::ImportInputs(const InputTensors& inputTensors,
+ MemorySource forceImportMemorySource)
{
- if (!m_NetworkProperties.m_ImportEnabled) // Try import the input tensor
- {
- throw MemoryImportException("ImportInputs: Memory Import failed, NetworkProperties.m_ImportEnabled");
- }
-
- std::vector<ImportedInputId> importedInputs;
- Graph& graph = m_OptimizedNetwork->pOptimizedNetworkImpl->GetGraph().TopologicalSort();
-
- for (auto inputTensor : inputTensors)
+ if (!m_NetworkProperties.m_ImportEnabled)
{
- auto layerBindingId = inputTensor.first;
- auto it = std::find_if(graph.GetInputLayers().begin(), graph.GetInputLayers().end(), [=](auto* layer)
+ // Cannot import if import is not enabled and forceImportMemorySource is undefined
+ if (forceImportMemorySource == MemorySource::Undefined)
{
- return layer->GetBindingId() == layerBindingId;
- });
-
- if (it == graph.GetInputLayers().end())
+ throw MemoryImportException("ImportInputs: Memory Import failed, NetworkProperties.m_ImportEnabled");
+ }
+ // If forceImportMemorySource is defined, try import if memory is aligned
+ if (inputTensors.size() != m_OptimizedNetwork->pOptimizedNetworkImpl->GetGraph().GetNumInputs())
{
- throw MemoryImportException(fmt::format("ImportInputs: Memory Import failed, unknown LayerBindingId: {}",
- layerBindingId));
+ throw MemoryImportException("ImportInputs: Force Import failed, incorrect number of tensors");
}
- const Layer* layer = *it;
- if (layer->GetType() != LayerType::Input)
+ std::vector<ImportedInputId> importedInputs;
+ Graph& graph = m_OptimizedNetwork->pOptimizedNetworkImpl->GetGraph().TopologicalSort();
+ for (auto inputTensor : inputTensors)
{
- throw InvalidArgumentException("ImportInputs: given layer not an InputLayer");
+ auto layerBindingId = inputTensor.first;
+ auto it = std::find_if(graph.GetInputLayers().begin(), graph.GetInputLayers().end(), [=](auto* layer)
+ {
+ return layer->GetBindingId() == layerBindingId;
+ });
+
+ if (it == graph.GetInputLayers().end())
+ {
+ throw MemoryImportException(fmt::format(
+ "ImportInputs: Memory Import failed, unknown LayerBindingId: {}", layerBindingId));
+ }
+
+ const Layer* layer = *it;
+ if (layer->GetType() != LayerType::Input)
+ {
+ throw InvalidArgumentException("ImportInputs: given layer not an InputLayer");
+ }
+ const OutputSlot& outputSlot = layer->GetOutputSlots()[0];
+ ITensorHandleFactory::FactoryId factoryId = outputSlot.GetTensorHandleFactoryId();
+ // Get matching import factory Id
+ ITensorHandleFactory::FactoryId importFactoryId =
+ m_TensorHandleFactoryRegistry.GetMatchingImportFactoryId(factoryId);
+ ITensorHandleFactory* importFactory =
+ m_TensorHandleFactoryRegistry.GetFactory(importFactoryId, forceImportMemorySource);
+ if (!importFactory)
+ {
+ throw MemoryImportException("ImportInputs: Force Import failed, cannot find matching Import Factory");
+ }
+
+ OutputHandler& handler = const_cast<OutputHandler&>(layer->GetOutputHandler(0));
+ handler.SetAllocatedData();
+ handler.CreateTensorHandles(*importFactory, false);
+ ITensorHandle* outputTensorHandle = handler.GetData();
+ std::unique_ptr<ITensorHandle> passThroughTensorHandle =
+ std::make_unique<ConstPassthroughTensorHandle>(inputTensor.second.GetInfo(),
+ inputTensor.second.GetMemoryArea());
+ // Check if the input memory can be imported
+ if (outputTensorHandle->CanBeImported(passThroughTensorHandle->Map(), forceImportMemorySource))
+ {
+ passThroughTensorHandle->Unmap();
+ if (outputTensorHandle->Import(passThroughTensorHandle->Map(), forceImportMemorySource))
+ {
+ passThroughTensorHandle->Unmap();
+ try
+ {
+ m_WorkloadQueue[m_InputWorkloadSlotPairs[layerBindingId].first].get()->ReplaceInputTensorHandle(
+ outputTensorHandle, m_InputWorkloadSlotPairs[layerBindingId].second);
+ importedInputs.push_back(m_CurImportedInputId++);
+ // For force import, we want OutputHandler to own the TensorHandle,
+ // so we do not move the TensorHandle to m_PreImportedInputHandles as in AsyncEnabled networks
+ ImportedTensorHandlePin importedTensorHandlePin{layerBindingId, nullptr};
+ m_PreImportedInputHandles.push_back(std::move(importedTensorHandlePin));
+ }
+ catch(armnn::UnimplementedException& e)
+ {
+ IgnoreUnused(e);
+ // Method not implement, cannot use import tensor and have to use allocated data instead
+ handler.UseAllocatedData();
+ }
+ }
+ }
+ else
+ {
+ // Cannot import, use allocated data
+ handler.UseAllocatedData();
+ }
+
}
- auto& backend = m_Backends.at(layer->GetBackendId());
- if (!HasCapability(BackendOptions::BackendOption{"PreImportIOTensors", true}, backend->GetCapabilities()))
+ return importedInputs;
+ }
+ else
+ {
+ // Import when the import of network properties is enabled
+ std::vector<ImportedInputId> importedInputs;
+ Graph& graph = m_OptimizedNetwork->pOptimizedNetworkImpl->GetGraph().TopologicalSort();
+
+ for (auto inputTensor : inputTensors)
{
- std::string er = backend->GetId();
- er += " does not have PreImportIOTensors capability";
- throw BackendCapabilityException(er);
- }
+ auto layerBindingId = inputTensor.first;
+ auto it = std::find_if(graph.GetInputLayers().begin(), graph.GetInputLayers().end(), [=](auto* layer)
+ {
+ return layer->GetBindingId() == layerBindingId;
+ });
+
+ if (it == graph.GetInputLayers().end())
+ {
+ throw MemoryImportException(fmt::format(
+ "ImportInputs: Memory Import failed, unknown LayerBindingId: {}", layerBindingId));
+ }
- const OutputSlot& outputSlot = layer->GetOutputSlots()[0];
+ const Layer* layer = *it;
+ if (layer->GetType() != LayerType::Input)
+ {
+ throw InvalidArgumentException("ImportInputs: given layer not an InputLayer");
+ }
- ITensorHandleFactory::FactoryId factoryId = outputSlot.GetTensorHandleFactoryId();
- const TensorInfo& tensorInfo = outputSlot.GetTensorInfo();
+ auto& backend = m_Backends.at(layer->GetBackendId());
+ if (!HasCapability(BackendOptions::BackendOption{"PreImportIOTensors", true}, backend->GetCapabilities()))
+ {
+ std::string er = backend->GetId();
+ er += " does not have PreImportIOTensors capability";
+ throw BackendCapabilityException(er);
+ }
- ITensorHandleFactory* handleFactory = m_TensorHandleFactoryRegistry.GetFactory(factoryId);
- ARMNN_ASSERT(handleFactory);
+ const OutputSlot& outputSlot = layer->GetOutputSlots()[0];
- ImportedTensorHandlePin importedTensorHandlePin{layerBindingId,
- handleFactory->CreateTensorHandle(tensorInfo, false)};
+ ITensorHandleFactory::FactoryId factoryId = outputSlot.GetTensorHandleFactoryId();
+ const TensorInfo& tensorInfo = outputSlot.GetTensorInfo();
- ITensorHandle* tensorHandle = importedTensorHandlePin.m_TensorHandle.get();
+ ITensorHandleFactory* handleFactory = m_TensorHandleFactoryRegistry.GetFactory(factoryId);
+ ARMNN_ASSERT(handleFactory);
- if (!CheckFlag(tensorHandle->GetImportFlags(), m_NetworkProperties.m_InputSource))
- {
- throw MemoryImportException(
- fmt::format("ImportInputs: Memory Import failed, backend: {} does not support importing from source {}"
- , factoryId, m_NetworkProperties.m_InputSource));
- }
+ ImportedTensorHandlePin importedTensorHandlePin{layerBindingId,
+ handleFactory->CreateTensorHandle(tensorInfo, false)};
+
+ ITensorHandle* tensorHandle = importedTensorHandlePin.m_TensorHandle.get();
+
+ if (!CheckFlag(tensorHandle->GetImportFlags(), m_NetworkProperties.m_InputSource))
+ {
+ throw MemoryImportException(
+ fmt::format("ImportInputs: Memory Import failed, backend: "
+ "{} does not support importing from source {}"
+ , factoryId, m_NetworkProperties.m_InputSource));
+ }
+
+ std::unique_ptr<ITensorHandle> passThroughTensorHandle =
+ std::make_unique<ConstPassthroughTensorHandle>(inputTensor.second.GetInfo(),
+ inputTensor.second.GetMemoryArea());
+
+ if (tensorHandle->Import(passThroughTensorHandle->Map(), m_NetworkProperties.m_InputSource))
+ {
+ importedInputs.push_back(m_CurImportedInputId++);
+ passThroughTensorHandle->Unmap();
+ }
+ else
+ {
+ passThroughTensorHandle->Unmap();
+ throw MemoryImportException("ImportInputs: Memory Import failed");
+ }
- std::unique_ptr<ITensorHandle> passThroughTensorHandle =
- std::make_unique<ConstPassthroughTensorHandle>(inputTensor.second.GetInfo(),
- inputTensor.second.GetMemoryArea());
+ m_PreImportedInputHandles.push_back(std::move(importedTensorHandlePin));
+ }
+ return importedInputs;
+ }
+}
- if (tensorHandle->Import(passThroughTensorHandle->Map(), m_NetworkProperties.m_InputSource))
+std::vector<ImportedOutputId> LoadedNetwork::ImportOutputs(const OutputTensors& outputTensors,
+ MemorySource forceImportMemorySource)
+{
+ if (!m_NetworkProperties.m_ExportEnabled)
+ {
+ // Cannot import if import is not enabled and forceImportMemorySource is undefined
+ if (forceImportMemorySource == MemorySource::Undefined)
{
- importedInputs.push_back(m_CurImportedInputId++);
- passThroughTensorHandle->Unmap();
+ throw MemoryImportException("ImportOutputs: Memory Import failed, NetworkProperties.m_ImportEnabled");
}
- else
+ // If forceImportMemorySource is defined, try import if memory is aligned
+ if (outputTensors.size() != m_OptimizedNetwork->pOptimizedNetworkImpl->GetGraph().GetNumOutputs())
{
- passThroughTensorHandle->Unmap();
- throw MemoryImportException("ImportInputs: Memory Import failed");
+ throw MemoryImportException("ImportOutputs: Force Import failed, incorrect number of tensors");
}
+ std::vector<ImportedInputId> importedOutputs;
+ Graph& graph = m_OptimizedNetwork->pOptimizedNetworkImpl->GetGraph().TopologicalSort();
+ for (auto outputTensor : outputTensors)
+ {
+ auto layerBindingId = outputTensor.first;
+ auto it = std::find_if(graph.GetOutputLayers().begin(), graph.GetOutputLayers().end(), [=](auto* layer)
+ {
+ return layer->GetBindingId() == layerBindingId;
+ });
- m_PreImportedInputHandles.push_back(std::move(importedTensorHandlePin));
- }
+ if (it == graph.GetOutputLayers().end())
+ {
+ throw MemoryImportException(fmt::format("ImportOutputs: Memory Import failed, "
+ "unknown LayerBindingId: {}",
+ layerBindingId));
+ }
- return importedInputs;
-}
+ const Layer* layer = *it;
+ if (layer->GetType() != LayerType::Output)
+ {
+ throw InvalidArgumentException("ImportOutputs: given layer not an OutputLayer");
+ }
-std::vector<ImportedOutputId> LoadedNetwork::ImportOutputs(const OutputTensors& outputTensors)
-{
- if (!m_NetworkProperties.m_ExportEnabled) // Try import the output tensor
- {
- throw MemoryImportException("ImportOutputs: Memory Import failed, NetworkProperties.m_ImportEnabled");
+ const OutputSlot* outputSlot = layer->GetInputSlots()[0].GetConnectedOutputSlot();
+ ITensorHandleFactory::FactoryId factoryId = outputSlot->GetTensorHandleFactoryId();
+ ITensorHandleFactory::FactoryId importFactoryId =
+ m_TensorHandleFactoryRegistry.GetMatchingImportFactoryId(factoryId);
+ ITensorHandleFactory* importFactory =
+ m_TensorHandleFactoryRegistry.GetFactory(importFactoryId, forceImportMemorySource);
+ if (!importFactory)
+ {
+ throw MemoryImportException("ImportOutputs: Force Import failed, cannot find matching Import Factory");
+ }
+
+ OutputHandler& outputHandler =
+ const_cast<OutputHandler&>(layer->GetInputSlots()[0].GetConnectedOutputSlot()->GetOutputHandler());
+ outputHandler.SetAllocatedData();
+ ITensorHandle* inputTensorHandle = outputHandler.GetData();
+ outputHandler.CreateTensorHandles(*importFactory, false);
+ inputTensorHandle = outputHandler.GetData();
+
+ // Check if the output memory can be imported
+ if (inputTensorHandle->CanBeImported(outputTensor.second.GetMemoryArea(), forceImportMemorySource))
+ {
+ if (inputTensorHandle->Import(outputTensor.second.GetMemoryArea(), forceImportMemorySource))
+ {
+ try
+ {
+ m_WorkloadQueue[m_OutputWorkloadSlotPairs[layerBindingId].first].get()->
+ ReplaceOutputTensorHandle(inputTensorHandle,
+ m_OutputWorkloadSlotPairs[layerBindingId].second);
+ importedOutputs.push_back(m_CurImportedOutputId++);
+ // For force import, we want OutputHandler to own the TensorHandle,
+ // so we do not move the TensorHandle to m_PreImportedOutputHandles as in AsyncEnabled networks
+ ImportedTensorHandlePin importedTensorHandlePin{layerBindingId, nullptr};
+ m_PreImportedOutputHandles.push_back(std::move(importedTensorHandlePin));
+ }
+ catch(armnn::UnimplementedException& e)
+ {
+ IgnoreUnused(e);
+ // Method not implement, cannot use import tensor and have to use allocated data instead
+ outputHandler.UseAllocatedData();
+ }
+ }
+ }
+ else
+ {
+ // Cannot import, use allocated memory
+ outputHandler.UseAllocatedData();
+ }
+ }
+ return importedOutputs;
}
std::vector<ImportedOutputId> importedOutputs;