From 8636bc705cc33fd869f64ebf24b14836d5a40b29 Mon Sep 17 00:00:00 2001 From: Finn Williams Date: Sat, 2 Oct 2021 15:06:39 +0100 Subject: IVGCVSW-6313 Support pre-importing outputs * Add ClearImportedInputs/Outputs function to IRuntime * Add UnImport function to ITensorHandle * Remove mutex from IWorkingMemHandle Change-Id: I34c9b6e1618755e10f3b4597afa1d9a9ea97e5fe Signed-off-by: Finn Williams --- include/armnn/IRuntime.hpp | 18 +- include/armnn/IWorkingMemHandle.hpp | 15 +- include/armnn/Types.hpp | 2 + include/armnn/backends/ITensorHandle.hpp | 3 + src/armnn/LoadedNetwork.cpp | 563 ++++++++++++++------- src/armnn/LoadedNetwork.hpp | 35 +- src/armnn/Runtime.cpp | 39 +- src/armnn/Runtime.hpp | 7 +- src/armnn/WorkingMemHandle.cpp | 144 +++++- src/armnn/WorkingMemHandle.hpp | 64 ++- src/armnn/test/RuntimeTests.cpp | 246 +++++++-- .../backendsCommon/test/CommonTestUtils.hpp | 2 +- .../backendsCommon/test/CompatibilityTests.cpp | 6 +- src/backends/reference/RefBackend.hpp | 2 +- 14 files changed, 860 insertions(+), 286 deletions(-) diff --git a/include/armnn/IRuntime.hpp b/include/armnn/IRuntime.hpp index ca9a0ceec2..47bfef588a 100644 --- a/include/armnn/IRuntime.hpp +++ b/include/armnn/IRuntime.hpp @@ -242,6 +242,21 @@ public: /// Only compatible with AsyncEnabled networks std::vector ImportInputs(NetworkId networkId, const InputTensors& inputTensors); + /// ImportOutputs separates the importing and mapping of OutputTensors from network execution. + /// Allowing for a set of OutputTensors to be imported and mapped once, but used in execution many times. + /// This function is not thread safe and must not be used while other threads are calling Execute(). + /// Only compatible with AsyncEnabled networks + std::vector ImportOutputs(NetworkId networkId, const OutputTensors& outputTensors); + + /// Un-import and delete the imported InputTensor/s + /// This function is not thread safe and must not be used while other threads are calling Execute(). + /// Only compatible with AsyncEnabled networks + void ClearImportedInputs(NetworkId networkId, const std::vector inputIds); + + /// Un-import and delete the imported OutputTensor/s + /// This function is not thread safe and must not be used while other threads are calling Execute(). + /// Only compatible with AsyncEnabled networks + void ClearImportedOutputs(NetworkId networkId, const std::vector outputIds); /// Evaluates a network using input in inputTensors and outputs filled into outputTensors Status EnqueueWorkload(NetworkId networkId, @@ -255,7 +270,8 @@ public: Status Execute(IWorkingMemHandle& workingMemHandle, const InputTensors& inputTensors, const OutputTensors& outputTensors, - std::vector preImportedInputs = {}); + std::vector preImportedInputs = {}, + std::vector preImportedOutputs = {}); /// Unloads a network from the IRuntime. /// At the moment this only removes the network from the m_Impl->m_Network. diff --git a/include/armnn/IWorkingMemHandle.hpp b/include/armnn/IWorkingMemHandle.hpp index 171fa3d81c..bbc4913c59 100644 --- a/include/armnn/IWorkingMemHandle.hpp +++ b/include/armnn/IWorkingMemHandle.hpp @@ -5,8 +5,6 @@ #pragma once -#include - namespace armnn { @@ -26,23 +24,20 @@ public: virtual NetworkId GetNetworkId() = 0; /// Allocate the backing memory required for execution. If this is not called, then allocation will be - /// deferred to execution time. The mutex must be locked. + /// deferred to execution time. virtual void Allocate() = 0; - /// Free the backing memory required for execution. The mutex must be locked. + /// Free the backing memory required for execution. virtual void Free() = 0; - /// IsAllocated returns true if the backing memory is currently allocated. The mutex must be locked. + /// IsAllocated returns true if the backing memory is currently allocated. virtual bool IsAllocated() = 0; - /// Get a mutex which can be used for synchronizing access to the WorkingMemHandle object. - virtual std::mutex& GetMutex() = 0; - - /// Get the WorkingMemDescriptor for a Layer. The mutex must be locked. + /// Get the WorkingMemDescriptor for a Layer. virtual WorkingMemDescriptor& GetWorkingMemDescriptor(LayerGuid id) = 0; /// Get the WorkingMemDescriptor at an index. The WorkingMemDescriptors are stored in the same order as - /// the Workloads in a topologically sorted graph. The mutex must be locked. + /// the Workloads in a topologically sorted graph. virtual WorkingMemDescriptor& GetWorkingMemDescriptorAt(unsigned int id) = 0; }; diff --git a/include/armnn/Types.hpp b/include/armnn/Types.hpp index 02f265c6e3..7f2e192102 100644 --- a/include/armnn/Types.hpp +++ b/include/armnn/Types.hpp @@ -263,6 +263,8 @@ public: /// Type of identifiers for bindable layers (inputs, outputs). using LayerBindingId = int; using ImportedInputId = unsigned int; +using ImportedOutputId = unsigned int; + class PermutationVector { diff --git a/include/armnn/backends/ITensorHandle.hpp b/include/armnn/backends/ITensorHandle.hpp index 82f5ac6c75..d07909972c 100644 --- a/include/armnn/backends/ITensorHandle.hpp +++ b/include/armnn/backends/ITensorHandle.hpp @@ -77,6 +77,9 @@ public: IgnoreUnused(memory, source); return false; }; + /// Unimport externally allocated memory + virtual void Unimport() + {}; }; } diff --git a/src/armnn/LoadedNetwork.cpp b/src/armnn/LoadedNetwork.cpp index ac63c9aefc..d25575bd93 100644 --- a/src/armnn/LoadedNetwork.cpp +++ b/src/armnn/LoadedNetwork.cpp @@ -858,12 +858,11 @@ bool LoadedNetwork::Execute(std::unique_ptr& timelineUti return success; } -void LoadedNetwork::EnqueueInput(const ConstTensor& inputTensor, - ITensorHandle* inputTensorHandle) +void LoadedNetwork::EnqueueInput(const ConstTensor& inputTensor, ITensorHandle* inputTensorHandle) { - MemorySourceFlags importFlags = inputTensorHandle->GetImportFlags(); if (m_NetworkProperties.m_ImportEnabled) // Try import the input tensor { + MemorySourceFlags importFlags = inputTensorHandle->GetImportFlags(); if (CheckFlag(importFlags, m_NetworkProperties.m_InputSource) ) { std::unique_ptr tensorHandle = @@ -898,77 +897,50 @@ void LoadedNetwork::EnqueueInput(const ConstTensor& inputTensor, } } -void LoadedNetwork::EnqueueOutput(const BindableLayer& layer, const Tensor& outputTensor, WorkingMemHandle& handle) +// Note: We can only import the output pointer if all of the following hold true: +// a) The imported pointer is aligned sufficiently +// b) The tensor has zero padding +// c) There is only one connection to the OutputSlot and it is to an OutputLayer. +// d) The output pointer is allocated via malloc. (Other types will be supported in a later release) +// e) m_IsExportEnabled must be set to true +void LoadedNetwork::ImportOutputTensor(const Tensor& outputTensor, ITensorHandle* outputTensorHandle) { - if (layer.GetType() != LayerType::Output) + ARMNN_ASSERT_MSG(outputTensorHandle != nullptr, "Data should have been allocated."); + MemorySourceFlags importFlags = outputTensorHandle->GetImportFlags(); + if (CheckFlag(importFlags, m_NetworkProperties.m_OutputSource)) { - throw InvalidArgumentException("EnqueueOutput: given layer not an OutputLayer"); - } - ARMNN_ASSERT_MSG(layer.GetNumInputSlots() == 1, "Output Layer should have exactly one input."); - - LayerGuid id = layer.GetGuid(); - WorkingMemDescriptor descriptor = handle.GetWorkingMemDescriptor(id); + std::unique_ptr tensorHandle = + std::make_unique(outputTensor.GetInfo(), + outputTensor.GetMemoryArea()); - ITensorHandle* inputTensorHandle = descriptor.m_Inputs[0]; - ARMNN_ASSERT_MSG(inputTensorHandle != nullptr, "Data should have been allocated."); + void* mem = tensorHandle->Map(false); + bool importOk = outputTensorHandle->Import(mem, m_NetworkProperties.m_OutputSource); + tensorHandle->Unmap(); - // Try import the output tensor. - // Note: We can only import the output pointer if all of the following hold true: - // a) The imported pointer is aligned sufficiently - // b) The tensor has zero padding - // c) There is only one connection to the OutputSlot and it is to an OutputLayer. - // d) The output pointer is allocated via malloc. (Other types will be supported in a later release) - // e) m_IsExportEnabled must be set to true - if (m_NetworkProperties.m_ExportEnabled && - (layer.GetInputSlots()[0].GetConnectedOutputSlot()->GetNumConnections() == 1)) - { - if (layer.GetInputSlots()[0].GetConnectedOutputSlot()->GetOwningLayer().GetType() != LayerType::Input) + if (!importOk) { - MemorySourceFlags importFlags = inputTensorHandle->GetImportFlags(); - if (CheckFlag(importFlags, m_NetworkProperties.m_OutputSource)) - { - std::unique_ptr tensorHandle = - std::make_unique(outputTensor.GetInfo(), - outputTensor.GetMemoryArea()); - - void* mem = tensorHandle->Map(false); - bool importOk = inputTensorHandle->Import(mem, m_NetworkProperties.m_OutputSource); - tensorHandle->Unmap(); - - if (importOk) - { - ARMNN_SCOPED_PROFILING_EVENT(Compute::Undefined, "SyncMemGeneric_Execute"); - inputTensorHandle->Map(true); - inputTensorHandle->Unmap(); - } - else - { - throw MemoryExportException("EnqueueOutput: Memory Export failed"); - } - } - else - { - throw MemoryExportException("EnqueueOutput: Memory Export failed, backend does not support Export"); - } - } - else - { - throw MemoryExportException("EnqueueOutput: Memory Export failed, attempting to export Input Layer"); + throw MemoryExportException("ImportOutputTensor: Memory Export failed"); } } else { - auto copyFunc = [](void* dst, const void* src, size_t size) - { - memcpy(dst, src, size); - }; + throw MemoryExportException("ImportOutputTensor: Memory Export failed, attempting to export Input Layer"); + } - std::unique_ptr tensorHandle = - std::make_unique(outputTensor.GetInfo(), - outputTensor.GetMemoryArea()); +} - CopyTensorContentsGeneric(inputTensorHandle, tensorHandle.get(), copyFunc); - } +void CopyToOutputTensor(const Tensor& outputTensor, ITensorHandle* outputTensorHandle) +{ + auto copyFunc = [](void* dst, const void* src, size_t size) + { + memcpy(dst, src, size); + }; + + std::unique_ptr tensorHandle = + std::make_unique(outputTensor.GetInfo(), + outputTensor.GetMemoryArea()); + + CopyTensorContentsGeneric(outputTensorHandle, tensorHandle.get(), copyFunc); } @@ -1018,7 +990,8 @@ std::vector LoadedNetwork::ImportInputs(const InputTensors& inp if (it == graph.GetInputLayers().end()) { - throw MemoryImportException("ImportInputs: Memory Import failed, backend does not support Import"); + throw MemoryImportException(fmt::format("ImportInputs: Memory Import failed, unknown LayerBindingId: {}", + layerBindingId)); } const Layer* layer = *it; @@ -1027,6 +1000,14 @@ std::vector LoadedNetwork::ImportInputs(const InputTensors& inp throw InvalidArgumentException("ImportInputs: given layer not an InputLayer"); } + auto& backend = m_Backends.at(layer->GetBackendId()); + if (!HasCapability(BackendOptions::BackendOption{"PreImportIOTensors", true}, backend->GetCapabilities())) + { + std::string er = backend->GetId(); + er += " does not have PreImportIOTensors capability"; + throw BackendCapabilityException(er); + } + const OutputSlot& outputSlot = layer->GetOutputSlots()[0]; ITensorHandleFactory::FactoryId factoryId = outputSlot.GetTensorHandleFactoryId(); @@ -1035,10 +1016,10 @@ std::vector LoadedNetwork::ImportInputs(const InputTensors& inp ITensorHandleFactory* handleFactory = m_TensorHandleFactoryRegistry.GetFactory(factoryId); ARMNN_ASSERT(handleFactory); - m_PreImportedInputHandles.emplace_back(layerBindingId, - handleFactory->CreateTensorHandle(tensorInfo, false)); + ImportedTensorHandlePin importedTensorHandlePin{layerBindingId, + handleFactory->CreateTensorHandle(tensorInfo, false)}; - ITensorHandle* tensorHandle = m_PreImportedInputHandles.back().m_TensorHandle.get(); + ITensorHandle* tensorHandle = importedTensorHandlePin.m_TensorHandle.get(); if (!CheckFlag(tensorHandle->GetImportFlags(), m_NetworkProperties.m_InputSource)) { @@ -1061,92 +1042,212 @@ std::vector LoadedNetwork::ImportInputs(const InputTensors& inp passThroughTensorHandle->Unmap(); throw MemoryImportException("ImportInputs: Memory Import failed"); } + + m_PreImportedInputHandles.push_back(std::move(importedTensorHandlePin)); } return importedInputs; } -Status LoadedNetwork::Execute(const InputTensors& inputTensors, - const OutputTensors& outputTensors, - IWorkingMemHandle& iWorkingMemHandle, - std::vector preImportedInputs) +std::vector LoadedNetwork::ImportOutputs(const OutputTensors& outputTensors) { - const Graph& graph = m_OptimizedNetwork->pOptimizedNetworkImpl->GetGraph(); - - // Walk graph to determine the order of execution. - if (graph.GetNumLayers() < 2) + if (!m_NetworkProperties.m_ExportEnabled) // Try import the output tensor { - ARMNN_LOG(warning) << "IRuntime::EnqueueWorkload()::Less than two nodes in graph"; - return Status::Failure; + throw MemoryImportException("ImportOutputs: Memory Import failed, NetworkProperties.m_ImportEnabled"); } - if (inputTensors.size() + preImportedInputs.size() != graph.GetNumInputs() ) + std::vector importedOutputs; + Graph& graph = m_OptimizedNetwork->pOptimizedNetworkImpl->GetGraph().TopologicalSort(); + + for (const auto& outputTensor : outputTensors) { - if (preImportedInputs.empty()) + auto layerBindingId = outputTensor.first; + auto it = std::find_if(graph.GetOutputLayers().begin(), graph.GetOutputLayers().end(), [=](auto* layer) + { + return layer->GetBindingId() == layerBindingId; + }); + + if (it == graph.GetOutputLayers().end()) + { + throw MemoryImportException(fmt::format("ImportOutputs: Memory Import failed, unknown LayerBindingId: {}", + layerBindingId)); + } + + const Layer* layer = *it; + if (layer->GetType() != LayerType::Output) + { + throw InvalidArgumentException("ImportOutputs: given layer not an OutputLayer"); + } + + auto& backend = m_Backends.at(layer->GetBackendId()); + if (!HasCapability(BackendOptions::BackendOption{"PreImportIOTensors", true}, backend->GetCapabilities())) { - throw InvalidArgumentException("Number of inputs provided does not match network."); + std::string er = backend->GetId(); + er += " does not have PreImportIOTensors capability"; + throw BackendCapabilityException(er); + } + + const InputSlot& inputSlot = layer->GetInputSlots()[0]; + ITensorHandleFactory::FactoryId factoryId = inputSlot.GetConnectedOutputSlot()->GetTensorHandleFactoryId(); + const TensorInfo& tensorInfo = inputSlot.GetConnectedOutputSlot()->GetTensorInfo(); + + ITensorHandleFactory* handleFactory = m_TensorHandleFactoryRegistry.GetFactory(factoryId); + ARMNN_ASSERT(handleFactory); + + ImportedTensorHandlePin importedTensorHandlePin{layerBindingId, + handleFactory->CreateTensorHandle(tensorInfo, false)}; + + ITensorHandle* tensorHandle = importedTensorHandlePin.m_TensorHandle.get(); + + if (!CheckFlag(tensorHandle->GetImportFlags(), m_NetworkProperties.m_OutputSource)) + { + throw MemoryImportException(fmt::format("ImportInputs: Memory Import failed, backend: " + "{} does not support importing from source {}" + , factoryId, m_NetworkProperties.m_OutputSource)); + } + + if (tensorHandle->Import(outputTensor.second.GetMemoryArea(), m_NetworkProperties.m_OutputSource)) + { + importedOutputs.push_back(m_CurImportedOutputId++); } else { - throw InvalidArgumentException("Number of inputs + preImportedInputs provided does not match network."); + throw MemoryImportException("ImportInputs: Memory Import failed"); } + + m_PreImportedOutputHandles.push_back(std::move(importedTensorHandlePin)); } - WorkingMemHandle& workingMemHandle = dynamic_cast(iWorkingMemHandle); + return importedOutputs; +} - // This map is a quick way to check for duplicate or non-existing LayerBindingIds - std::unordered_map validationMap = workingMemHandle.GetValidationMap(); - for (auto pair : inputTensors) +void LoadedNetwork::ClearImportedInputs(const std::vector inputIds) +{ + for (auto id : inputIds) { - const LayerBindingId layerBindingId = pair.first; + if (id > m_PreImportedInputHandles.size()) + { + throw InvalidArgumentException(fmt::format("ClearImportedInputs::Unknown ImportedInputId: {}", id)); + } - try + auto& importedTensorHandle = m_PreImportedInputHandles[id].m_TensorHandle; + if (!importedTensorHandle) { - bool& previouslyUsed = validationMap.at(pair.first); - if (previouslyUsed) - { - throw InvalidArgumentException(fmt::format("Duplicate LayerbindingId: {} ", layerBindingId)); - } - else - { - previouslyUsed = true; - } + throw InvalidArgumentException( + fmt::format("ClearImportedInputs::ImportedInput with id: {} has already been deleted", id)); } - catch (const std::out_of_range& error) + // Call Unimport then destroy the tensorHandle + importedTensorHandle->Unimport(); + importedTensorHandle = {}; + } +} + +void LoadedNetwork::ClearImportedOutputs(const std::vector outputIds) +{ + for (auto id : outputIds) + { + if (id > m_PreImportedOutputHandles.size()) { - throw InvalidArgumentException(fmt::format("Unknown LayerBindingId id: {}", layerBindingId)); + throw InvalidArgumentException(fmt::format("ClearImportedOutputs::Unknown ImportedOutputId: {}", id)); } + + auto& importedTensorHandle = m_PreImportedOutputHandles[id].m_TensorHandle; + if (!importedTensorHandle) + { + throw InvalidArgumentException( + fmt::format("ClearImportedOutputs::ImportedOutput with id: {} has already been deleted", id)); + } + // Call Unimport then destroy the tensorHandle + importedTensorHandle->Unimport(); + importedTensorHandle = {}; } +} - if (!preImportedInputs.empty()) +Status LoadedNetwork::Execute(const InputTensors& inputTensors, + const OutputTensors& outputTensors, + IWorkingMemHandle& iWorkingMemHandle, + std::vector preImportedInputs, + std::vector preImportedOutputs) +{ + const Graph& graph = m_OptimizedNetwork->pOptimizedNetworkImpl->GetGraph(); + + if (inputTensors.size() + preImportedInputs.size() != graph.GetNumInputs()) { - const unsigned int maxPreImportedId = *std::max_element(preImportedInputs.begin(), preImportedInputs.end()); - if (maxPreImportedId > m_CurImportedInputId) + if (preImportedInputs.empty()) { - throw InvalidArgumentException(fmt::format("Invalid ImportedInputId: {}", maxPreImportedId)); + throw InvalidArgumentException("LoadedNetwork::Execute: Number of inputs provided does not match network."); } - for (ImportedInputId id : preImportedInputs) + else + { + throw InvalidArgumentException("LoadedNetwork::Execute: " + "Number of inputs + preImportedInputs provided does not match network."); + } + } + + if (outputTensors.size() + preImportedOutputs.size() != graph.GetNumOutputs()) + { + if (preImportedOutputs.empty()) + { + throw InvalidArgumentException("LoadedNetwork::Execute: " + "Number of outputs provided does not match network."); + } + else + { + throw InvalidArgumentException("LoadedNetwork::Execute: " + "Number of outputs + preImportedOutputs provided does not match network."); + } + } + + WorkingMemHandle& workingMemHandle = dynamic_cast(iWorkingMemHandle); + // Collect all the given LayerBindingIds and check them for duplicates and unknowns. + std::vector& bindingIds = workingMemHandle.GetBindingIdVector(); + unsigned int index = 0; + for (auto pair : inputTensors) + { + bindingIds[index++] = pair.first; + } + for (ImportedInputId id : preImportedInputs) + { + bindingIds[index++] = ValidateImportedInputID(id); + } + for (auto pair : outputTensors) + { + bindingIds[index++] = pair.first; + } + for (ImportedOutputId id : preImportedOutputs) + { + bindingIds[index++] = ValidateImportedOutputID(id); + } + + workingMemHandle.ValidateBindingIds(); + + auto resetMemHandle = [&]() + { + for (ImportedInputId id: preImportedInputs) { const LayerBindingId layerBindingId = m_PreImportedInputHandles[id].m_LayerBindingId; - try + auto inputHandle = workingMemHandle.GetInputHandle(layerBindingId); + auto inputConnections = workingMemHandle.GetInputConnections(layerBindingId); + for (auto it : inputConnections) { - bool& previouslyUsed = validationMap.at(layerBindingId); - if (previouslyUsed) - { - throw InvalidArgumentException(fmt::format("Duplicate LayerbindingId: {} ", layerBindingId)); - } - else - { - previouslyUsed = true; - } + *it = inputHandle; } - catch (const std::out_of_range& error) + } + + for (ImportedOutputId id: preImportedOutputs) + { + const LayerBindingId layerBindingId = m_PreImportedOutputHandles[id].m_LayerBindingId; + + auto outputHandle = workingMemHandle.GetOutputHandle(layerBindingId); + auto outputConnections = workingMemHandle.GetOutputConnection(layerBindingId); + + for (auto it : outputConnections) { - throw InvalidArgumentException(fmt::format("Unknown LayerBindingId id: {}", layerBindingId)); + *it = outputHandle; } } - } + }; std::unique_ptr timelineUtils = profiling::TimelineUtilityMethods::GetTimelineUtils(m_ProfilingService); @@ -1179,23 +1280,47 @@ Status LoadedNetwork::Execute(const InputTensors& inputTensors, { ARMNN_SCOPED_PROFILING_EVENT(Compute::Undefined, "PrepareInputs"); + for (auto pair : inputTensors) + { + EnqueueInput(pair.second, workingMemHandle.GetInputHandle(pair.first)); + } + // Swap in the pre-imported inputs if any for (ImportedInputId id : preImportedInputs) { - const ImportedInputHandlePin& importedInputPin = m_PreImportedInputHandles[id]; - + const ImportedTensorHandlePin& importedInputPin = m_PreImportedInputHandles[id]; const LayerBindingId layerBindingId = m_PreImportedInputHandles[id].m_LayerBindingId; - ITensorHandle* preimportedHandle = importedInputPin.m_TensorHandle.get(); + const auto& preimportedHandle = importedInputPin.m_TensorHandle; + auto inputConnections = workingMemHandle.GetInputConnections(layerBindingId); for (auto it : inputConnections) { - *it = preimportedHandle; + *it = preimportedHandle.get(); + } + } + } + { + ARMNN_SCOPED_PROFILING_EVENT(Compute::Undefined, "PrepareOutputs"); + if (m_NetworkProperties.m_ExportEnabled) + { + for (auto pair: outputTensors) + { + ImportOutputTensor(pair.second, workingMemHandle.GetOutputHandle(pair.first)); } } - for (auto pair : inputTensors) + for (ImportedOutputId id : preImportedOutputs) { - EnqueueInput(pair.second, workingMemHandle.GetInputHandle(pair.first)); + const ImportedTensorHandlePin& importedOutputPin = m_PreImportedOutputHandles[id]; + const LayerBindingId layerBindingId = m_PreImportedOutputHandles[id].m_LayerBindingId; + const auto& preimportedHandle = importedOutputPin.m_TensorHandle; + + auto outputConnections = workingMemHandle.GetOutputConnection(layerBindingId); + + for (auto it : outputConnections) + { + *it = preimportedHandle.get(); + } } } @@ -1226,33 +1351,34 @@ Status LoadedNetwork::Execute(const InputTensors& inputTensors, } catch (const RuntimeException& error) { + resetMemHandle(); Fail(error); } catch (const std::runtime_error& error) { + resetMemHandle(); Fail(error); } - // For each output to the network, call EnqueueOutput with the data passed by the user. + catch (...) { - ARMNN_SCOPED_PROFILING_EVENT(Compute::Undefined, "PrepareOutputs"); - for (const BindableLayer *outputLayer : graph.GetOutputLayers()) - { - EnqueueOutput(*outputLayer, GetOutputTensor(outputLayer->GetBindingId(), outputTensors), workingMemHandle); - } + resetMemHandle(); + throw; } - // Restore the workingMemHandle to its original state - for (ImportedInputId id : preImportedInputs) + if (!m_NetworkProperties.m_ExportEnabled) { - const LayerBindingId layerBindingId = m_PreImportedInputHandles[id].m_LayerBindingId; - - auto inputHandle = workingMemHandle.GetInputHandle(layerBindingId); - auto inputConnections = workingMemHandle.GetInputConnections(layerBindingId); - for (auto it : inputConnections) + for (auto pair: outputTensors) { - *it = inputHandle; + CopyToOutputTensor(pair.second, workingMemHandle.GetOutputHandle(pair.first)); } } + else + { + ARMNN_SCOPED_PROFILING_EVENT(Compute::Undefined, "SyncMemGeneric_Execute"); + workingMemHandle.MemSyncOutputs(); + } + + resetMemHandle(); return executionSucceeded ? Status::Success : Status::Failure; } @@ -1315,48 +1441,69 @@ std::unique_ptr LoadedNetwork::CreateWorkingMemHandle(Network struct HandleInfo { unsigned int m_ReferenceCount = 0; - bool isInputLayer = false; - bool isOutputLayer = false; - LayerBindingId m_LayerBindingId = -1; - }; + bool isInputLayerHandle = false; + bool isOutputLayerHandle = false; - std::vector inputConnections; - std::vector> inputIndexes; + WorkingMemHandle::InputMemDescriptorCoords m_InputMemDescriptorCoords; + WorkingMemHandle::OutputMemDescriptorCoords m_OutputMemDescriptorCoords; + }; std::unordered_map handleReferenceCounts; - - unsigned int workingMemDescriptorIndex = 0; + unsigned int layerIndex = 0; for (auto&& layer : order) { - WorkingMemDescriptor workingMemDescriptor; - // Constant layers execution and management is handled during loaded network construction if (layer->GetType() == LayerType::Constant) { continue; } + + WorkingMemDescriptor workingMemDescriptor; + bool isMemoryManaged = true; bool isInputLayer = false; - // Look for the layer with 1 OutputSlot which has 1 connection and that connection is an Output Layer - // If Export is enabled disable memory management so we can export, otherwise we do a copy - if ((layer->GetNumOutputSlots() == 1) && - (layer->GetOutputSlots()[0].GetNumConnections() == 1) && - (layer->GetOutputSlots()[0].GetConnection(0)->GetOwningLayer().GetType() == LayerType::Output)) - { - isMemoryManaged = !m_NetworkProperties.m_ExportEnabled; - } - else if (layer->GetType() == LayerType::Input || layer->GetType() == LayerType::MemImport) + bool isOutputLayer = false; + bool isConnectedToOutputLayer = false; + + if (layer->GetType() == LayerType::Input || layer->GetType() == LayerType::MemImport) { // Input layers/workloads will not be executed so the descriptor is not added to workingMemDescriptors // However we will still need to manage the tensorHandle isInputLayer = true; isMemoryManaged = !m_NetworkProperties.m_ImportEnabled; } + else if (layer->GetType() == LayerType::Output) + { + isOutputLayer = true; + } + unsigned int slotIndex = 0; // Create a tensor handle for each output slot of a layer // Once we create it, we start managing its lifetime for (auto& slot : layer->GetOutputSlots()) { + for (unsigned int i = 0; i < slot.GetNumConnections(); ++i) + { + if ((slot.GetConnection(i)->GetOwningLayer().GetType() == LayerType::Output)) + { + if (!isConnectedToOutputLayer) + { + isConnectedToOutputLayer = true; + // If Export is enabled disable memory management, so we can export, otherwise we do a copy + isMemoryManaged = !m_NetworkProperties.m_ExportEnabled; + } + else + { + // Importing in this case would likely cause unexpected behaviour, so we disallow it. + ARMNN_LOG(warning) << + fmt::format("Layer name: '{0}' guid: '{1}' has two or more OutputLayers connected to it. " + "This will prevent importing on the connected OutputLayers.", + layer->GetName(), layer->GetGuid()); + isMemoryManaged = true; + } + } + } + tensorHandleMap[layer->GetGuid()].emplace_back(GetTensorHandle(layer, slot, isMemoryManaged)); ITensorHandle* tensorHandle = tensorHandleMap[layer->GetGuid()].back().get(); @@ -1365,22 +1512,28 @@ std::unique_ptr LoadedNetwork::CreateWorkingMemHandle(Network unsigned int numConnections = slot.GetNumConnections(); ARMNN_ASSERT(numConnections != 0); - handleReferenceCounts[tensorHandle].m_ReferenceCount = numConnections; + HandleInfo& handleInfo = handleReferenceCounts[tensorHandle]; + handleInfo.m_ReferenceCount = numConnections; + // Store the coordinates of the current layer's OutputSlot that is connected to the OutputLayer + if (isConnectedToOutputLayer) + { + handleInfo.isOutputLayerHandle = true; + handleInfo.m_OutputMemDescriptorCoords.m_OutputSlotCoords = {layerIndex, slotIndex}; + } + // Store the LayerBindingId of the InputLayer if (isInputLayer) { - handleReferenceCounts[tensorHandle].isInputLayer = true; + handleInfo.isInputLayerHandle = true; LayerBindingId bindingId = static_cast(layer)->GetBindingId(); - - handleReferenceCounts[tensorHandle].m_LayerBindingId = bindingId; - - inputIndexes.emplace_back(std::make_pair(bindingId, layer->GetGuid())); + handleInfo.m_InputMemDescriptorCoords.m_LayerBindingId = bindingId; } + slotIndex++; } // Loop through the input slots in the same layer and decrement the reference counter associated // to each tensor handle we encounter. - // Once it reaches zero, the lifetime of the tensor handle has ended, and we mark it's memory as available - // so that the next tensor handle with a non overlapping lifetime can share it's memory. + // Once it reaches zero, the lifetime of the tensor handle has ended, and we mark its memory as available + // so that the next tensor handle with a non overlapping lifetime can share its memory. for (auto& slot : layer->GetInputSlots()) { ARMNN_ASSERT(slot.GetConnection()); @@ -1402,11 +1555,26 @@ std::unique_ptr LoadedNetwork::CreateWorkingMemHandle(Network HandleInfo& handleInfo = handleReferenceCounts.at(inputTensorHandle); - // Store the iterator to the - if (handleInfo.isInputLayer) + // Store the LayerBindingId of the OutputLayer + if (isOutputLayer) + { + LayerBindingId bindingId = static_cast(layer)->GetBindingId(); + handleInfo.m_OutputMemDescriptorCoords.m_LayerBindingIds.push_back(bindingId); + handleInfo.m_OutputMemDescriptorCoords.m_InputSlotCoords.push_back({layerIndex, slot.GetSlotIndex()}); + } + // In this case the layer is not an Output Layer but shares it's input tensorhandle with an OutputLayer + // It will need to be updated as well, if we swap out the tensorhandle + else if (handleInfo.isOutputLayerHandle) + { + handleInfo.m_OutputMemDescriptorCoords.m_InputSlotCoords.push_back({layerIndex, slot.GetSlotIndex()}); + } + + // Store the coordinates of the InputSlots connected to the InputLayer + // There can be more than one InputSlot connected to an InputLayer, so we use a vector + if (handleInfo.isInputLayerHandle) { - inputConnections.emplace_back(WorkingMemHandle::InputConnectionInfo{ - handleInfo.m_LayerBindingId, workingMemDescriptorIndex, slot.GetSlotIndex()}); + std::pair connectionLocation{layerIndex, slot.GetSlotIndex()}; + handleInfo.m_InputMemDescriptorCoords.m_InputSlotCoords.emplace_back(connectionLocation); } --handleInfo.m_ReferenceCount; @@ -1414,23 +1582,38 @@ std::unique_ptr LoadedNetwork::CreateWorkingMemHandle(Network { // Stop managing lifetime of tensor handle inputTensorHandle->Allocate(); - handleReferenceCounts.erase(inputTensorHandle); } } workingMemDescriptorMap.insert({layer->GetGuid(), workingMemDescriptor}); - // Input layers/workloads will not be executed, so the descriptor is not added to workingMemDescriptors + // Input/Output layers/workloads will not be executed, so the descriptor is not added to workingMemDescriptors // However we will still need to manage the tensorHandle if (!isInputLayer) { workingMemDescriptors.push_back(workingMemDescriptor); - workingMemDescriptorIndex++; + layerIndex++; + } + } + + std::vector inputConnectionsInfo; + std::vector outputConnectionsInfo; + + for (const auto& handleInfo: handleReferenceCounts) + { + if (handleInfo.second.isOutputLayerHandle) + { + outputConnectionsInfo.emplace_back(handleInfo.second.m_OutputMemDescriptorCoords); + } + + if (handleInfo.second.isInputLayerHandle) + { + inputConnectionsInfo.emplace_back(handleInfo.second.m_InputMemDescriptorCoords); } } return std::make_unique(networkId, - inputIndexes, - inputConnections, + inputConnectionsInfo, + outputConnectionsInfo, workingMemDescriptors, workingMemDescriptorMap, memoryManagers, @@ -1445,4 +1628,40 @@ void LoadedNetwork::RegisterDebugCallback(const DebugCallbackFunction& func) } } +LayerBindingId LoadedNetwork::ValidateImportedInputID(ImportedInputId id) +{ + try + { + const auto& importedTensorHandlePin = m_PreImportedInputHandles.at(id); + if (!importedTensorHandlePin.m_TensorHandle) + { + throw InvalidArgumentException(fmt::format("LoadedNetwork::Execute:" + "PreImportedInput: {} has been deleted", id)); + } + return importedTensorHandlePin.m_LayerBindingId; + } + catch (const std::out_of_range&) + { + throw InvalidArgumentException(fmt::format("LoadedNetwork::Execute: Unknown ImportedInputId: {}", id)); + } +} + +LayerBindingId LoadedNetwork::ValidateImportedOutputID(ImportedOutputId id) +{ + try + { + const auto& importedTensorHandlePin = m_PreImportedOutputHandles.at(id); + if (!importedTensorHandlePin.m_TensorHandle) + { + throw InvalidArgumentException(fmt::format("LoadedNetwork::Execute: " + "PreImportedOutput: {} has been deleted", id)); + } + return importedTensorHandlePin.m_LayerBindingId; + } + catch (const std::out_of_range&) + { + throw InvalidArgumentException(fmt::format("LoadedNetwork::Execute: Unknown ImportedOutputId: {}", id)); + } +} + } diff --git a/src/armnn/LoadedNetwork.hpp b/src/armnn/LoadedNetwork.hpp index e713be215a..99dac556ae 100644 --- a/src/armnn/LoadedNetwork.hpp +++ b/src/armnn/LoadedNetwork.hpp @@ -50,6 +50,10 @@ public: TensorInfo GetOutputTensorInfo(LayerBindingId layerId) const; std::vector ImportInputs(const InputTensors& inputTensors); + std::vector ImportOutputs(const OutputTensors& outputTensors); + + void ClearImportedInputs(const std::vector inputIds); + void ClearImportedOutputs(const std::vector outputIds); /// Single thread execution of the loaded network Status EnqueueWorkload(const InputTensors& inputTensors, const OutputTensors& outputTensors); @@ -58,7 +62,8 @@ public: Status Execute(const InputTensors& inputTensors, const OutputTensors& outputTensors, IWorkingMemHandle& workingMemHandle, - std::vector preImportedInputs = {}); + std::vector preImportedInputs = {}, + std::vector preImportedOutputs = {}); static std::unique_ptr MakeLoadedNetwork(std::unique_ptr net, std::string& errorMessage, @@ -105,13 +110,16 @@ private: void EnqueueInput(const ConstTensor& inputTensor, ITensorHandle* inputTensorHandle); - void EnqueueOutput(const BindableLayer& layer, const Tensor& outputTensor, WorkingMemHandle& handle); + void ImportOutputTensor(const Tensor& outputTensor, ITensorHandle* outputTensorHandle); bool Execute(std::unique_ptr& timelineUtils, profiling::ProfilingGuid inferenceGuid); const IWorkloadFactory& GetWorkloadFactory(const Layer& layer) const; + inline LayerBindingId ValidateImportedInputID(ImportedInputId id); + inline LayerBindingId ValidateImportedOutputID(ImportedOutputId id); + using BackendPtrMap = std::unordered_map; BackendPtrMap m_Backends; @@ -134,21 +142,36 @@ private: profiling::ProfilingService& m_ProfilingService; - struct ImportedInputHandlePin + struct ImportedTensorHandlePin { - ImportedInputHandlePin(LayerBindingId layerBindingId, - std::unique_ptr tensorHandle) + ImportedTensorHandlePin() + {} + + ImportedTensorHandlePin(LayerBindingId layerBindingId, + std::unique_ptr tensorHandle) : m_LayerBindingId(layerBindingId) , m_TensorHandle(std::move(tensorHandle)) {} + ImportedTensorHandlePin(ImportedTensorHandlePin&&) = default; + + ~ImportedTensorHandlePin() + { + if (m_TensorHandle) + { + m_TensorHandle->Unimport(); + } + } + LayerBindingId m_LayerBindingId; std::unique_ptr m_TensorHandle; }; - std::vector m_PreImportedInputHandles; + std::vector m_PreImportedInputHandles; + std::vector m_PreImportedOutputHandles; ImportedInputId m_CurImportedInputId = 0; + ImportedInputId m_CurImportedOutputId = 0; }; } diff --git a/src/armnn/Runtime.cpp b/src/armnn/Runtime.cpp index 50068ebe36..a54b71225d 100644 --- a/src/armnn/Runtime.cpp +++ b/src/armnn/Runtime.cpp @@ -82,6 +82,19 @@ std::vector IRuntime::ImportInputs(NetworkId networkId, const I return pRuntimeImpl->ImportInputs(networkId, inputTensors); } +std::vector IRuntime::ImportOutputs(NetworkId networkId, const OutputTensors& outputTensors) +{ + return pRuntimeImpl->ImportOutputs(networkId, outputTensors); +} + +void IRuntime::ClearImportedInputs(NetworkId networkId, const std::vector inputIds) +{ + return pRuntimeImpl->ClearImportedInputs(networkId, inputIds); +} +void IRuntime::ClearImportedOutputs(NetworkId networkId, const std::vector outputIds) +{ + return pRuntimeImpl->ClearImportedOutputs(networkId, outputIds); +} Status IRuntime::EnqueueWorkload(NetworkId networkId, const InputTensors& inputTensors, @@ -93,9 +106,10 @@ Status IRuntime::EnqueueWorkload(NetworkId networkId, Status IRuntime::Execute(IWorkingMemHandle& workingMemHandle, const InputTensors& inputTensors, const OutputTensors& outputTensors, - std::vector preImportedInputs) + std::vector preImportedInputs, + std::vector preImportedOutputs) { - return pRuntimeImpl->Execute(workingMemHandle, inputTensors, outputTensors, preImportedInputs); + return pRuntimeImpl->Execute(workingMemHandle, inputTensors, outputTensors, preImportedInputs, preImportedOutputs); } Status IRuntime::UnloadNetwork(NetworkId networkId) @@ -528,7 +542,19 @@ std::vector RuntimeImpl::ImportInputs(NetworkId networkId, cons return GetLoadedNetworkPtr(networkId)->ImportInputs(inputTensors); } +std::vector RuntimeImpl::ImportOutputs(NetworkId networkId, const OutputTensors& outputTensors) +{ + return GetLoadedNetworkPtr(networkId)->ImportOutputs(outputTensors); +} +void RuntimeImpl::ClearImportedInputs(NetworkId networkId, const std::vector inputIds) +{ + return GetLoadedNetworkPtr(networkId)->ClearImportedInputs(inputIds); +} +void RuntimeImpl::ClearImportedOutputs(NetworkId networkId, const std::vector outputIds) +{ + return GetLoadedNetworkPtr(networkId)->ClearImportedOutputs(outputIds); +} Status RuntimeImpl::EnqueueWorkload(NetworkId networkId, const InputTensors& inputTensors, @@ -566,7 +592,8 @@ Status RuntimeImpl::EnqueueWorkload(NetworkId networkId, Status RuntimeImpl::Execute(IWorkingMemHandle& iWorkingMemHandle, const InputTensors& inputTensors, const OutputTensors& outputTensors, - std::vector preImportedInputs) + std::vector preImportedInputs, + std::vector preImportedOutputs) { NetworkId networkId = iWorkingMemHandle.GetNetworkId(); LoadedNetwork* loadedNetwork = GetLoadedNetworkPtr(networkId); @@ -585,7 +612,11 @@ Status RuntimeImpl::Execute(IWorkingMemHandle& iWorkingMemHandle, ARMNN_SCOPED_PROFILING_EVENT(Compute::Undefined, "Execute"); - return loadedNetwork->Execute(inputTensors, outputTensors, iWorkingMemHandle, preImportedInputs); + return loadedNetwork->Execute(inputTensors, + outputTensors, + iWorkingMemHandle, + preImportedInputs, + preImportedOutputs); } /// Create a new unique WorkingMemHandle object. Create multiple handles if you wish to have diff --git a/src/armnn/Runtime.hpp b/src/armnn/Runtime.hpp index e947dceb52..05de372eee 100644 --- a/src/armnn/Runtime.hpp +++ b/src/armnn/Runtime.hpp @@ -56,6 +56,10 @@ public: TensorInfo GetOutputTensorInfo(NetworkId networkId, LayerBindingId layerId) const; std::vector ImportInputs(NetworkId networkId, const InputTensors& inputTensors); + std::vector ImportOutputs(NetworkId networkId, const OutputTensors& outputTensors); + + void ClearImportedInputs(NetworkId networkId, const std::vector inputIds); + void ClearImportedOutputs(NetworkId networkId, const std::vector outputIds); // Evaluates network using input in inputTensors, outputs filled into outputTensors. Status EnqueueWorkload(NetworkId networkId, @@ -69,7 +73,8 @@ public: Status Execute(IWorkingMemHandle& workingMemHandle, const InputTensors& inputTensors, const OutputTensors& outputTensors, - std::vector preImportedInputs); + std::vector preImportedInputs, + std::vector preImportedOutputs); /// Unloads a network from the Runtime. /// At the moment this only removes the network from the m_Impl->m_Network. diff --git a/src/armnn/WorkingMemHandle.cpp b/src/armnn/WorkingMemHandle.cpp index e402684bb8..7dde67372f 100644 --- a/src/armnn/WorkingMemHandle.cpp +++ b/src/armnn/WorkingMemHandle.cpp @@ -7,6 +7,7 @@ #include "WorkingMemHandle.hpp" #include "Network.hpp" #include +#include namespace armnn { @@ -15,8 +16,8 @@ namespace experimental { WorkingMemHandle::WorkingMemHandle(NetworkId networkId, - std::vector> inputHandles, - std::vector inputConnections, + std::vector inputLayerInfo, + std::vector ouputLayerInfo, std::vector workingMemDescriptors, std::unordered_map workingMemDescriptorMap, std::vector> memoryManagers, @@ -26,39 +27,66 @@ WorkingMemHandle::WorkingMemHandle(NetworkId networkId, , m_WorkingMemDescriptorMap(workingMemDescriptorMap) , m_MemoryManagers(memoryManagers) , m_OwnedTensorHandles(std::move(ownedTensorHandles)) + , m_BindingIdVec(inputLayerInfo.size() + ouputLayerInfo.size()) + , m_InputSize(numeric_cast(inputLayerInfo.size())) , m_IsAllocated(false) - , m_Mutex() { - unsigned int maxInputBindingId = 0; - for (auto pair : inputHandles) + for (const auto& inputInfo : inputLayerInfo) { - unsigned int bindingId = numeric_cast(pair.first); - if (maxInputBindingId < bindingId) + m_InputValidationMap[inputInfo.m_LayerBindingId] = false; + + // Map the LayerBindingIds to the corresponding input ITensorHandle* + auto memDesc = m_WorkingMemDescriptors.at(inputInfo.m_InputSlotCoords[0].first); + ITensorHandle* inputTensorHandle = memDesc.m_Inputs[inputInfo.m_InputSlotCoords[0].second]; + m_InputHandleMap[inputInfo.m_LayerBindingId] = inputTensorHandle; + + // For every input we need to store all locations from which that input's ITensorHandle* is read. + // So we can, at a later point, swap in and out the ITensorHandle* at that location. + for (auto inputSlot : inputInfo.m_InputSlotCoords) { - maxInputBindingId = bindingId; - } + WorkingMemDescriptor& workingMemDescriptor = m_WorkingMemDescriptors.at(inputSlot.first); - } + auto inputPos = workingMemDescriptor.m_Inputs.begin(); - // Create a map of LayerBindingIds to the corresponding input ITensorHandle* - for (auto pair : inputHandles) - { - m_InputHandleMap[pair.first] = m_WorkingMemDescriptorMap.at(pair.second).m_Outputs[0]; - m_ValidationMap[pair.first] = false; + // The DifferenceType of a vector can be unsigned int or signed int depending on the std implementation + // This cast removes any conversion warnings + inputPos += numeric_cast(inputSlot.second); + m_InputConnectionMap[inputInfo.m_LayerBindingId].push_back(inputPos); + } } - // For every input we need to store all locations from which that input's ITensorHandle* is read. - // So we can, at a later point, swap in and out the ITensorHandle* at that location. - for (auto inputConnectionInfo : inputConnections) + for (const auto& outputInfo : ouputLayerInfo) { - WorkingMemDescriptor& workingMemDescriptor = m_WorkingMemDescriptors[inputConnectionInfo.m_DescriptorIndex]; + for (auto bindingId : outputInfo.m_LayerBindingIds) + { + m_OutputValidationMap[bindingId] = false; + + // Store the outputSlot position of the tensorhandle + auto outputPos = m_WorkingMemDescriptors.at(outputInfo.m_OutputSlotCoords.first).m_Outputs.begin(); + outputPos += numeric_cast(outputInfo.m_OutputSlotCoords.second); + + m_OutputHandleMap[bindingId] = *outputPos; + } + + // More than one layerBinding id means the tensorhandle is connected to more than one OutputLayer. + // Importing in this case would likely cause unexpected behaviour, so we disallow it. + if (outputInfo.m_LayerBindingIds.size() != 1) + { + continue; + } + + // Store the inputSlot positions of the tensorhandle + for (auto outputSlot : outputInfo.m_InputSlotCoords) + { + WorkingMemDescriptor& workingMemDescriptor = m_WorkingMemDescriptors.at(outputSlot.first); - auto pos = workingMemDescriptor.m_Inputs.begin(); - // The difference_type of a vector can be unsigned int or signed int depending on the std implementation - // This cast removes any conversion warnings - pos += numeric_cast::difference_type>(inputConnectionInfo.m_InputIndex); + auto inputPos = workingMemDescriptor.m_Inputs.begin(); - m_InputConnectionMap[inputConnectionInfo.m_LayerBindingId].push_back(pos); + // The DifferenceType of a vector can be unsigned int or signed int depending on the std implementation + // This cast removes any conversion warnings + inputPos += numeric_cast(outputSlot.second); + m_OutputConnectionMap[outputInfo.m_LayerBindingIds[0]].push_back(inputPos); + } } } @@ -90,6 +118,74 @@ void WorkingMemHandle::Free() } } +void WorkingMemHandle::MemSyncOutputs() +{ + for (auto output : m_OutputConnectionMap) + { + (*output.second[0])->Map(true); + (*output.second[0])->Unmap(); + } +} + +void WorkingMemHandle::ValidateBindingIds() +{ + auto resetInputValidationMap = [&]() + { + for (auto& pair: m_InputValidationMap) + { + pair.second = false; + } + }; + + auto resetOutputValidationMap = [&]() + { + for (auto& pair: m_OutputValidationMap) + { + pair.second = false; + } + }; + + std::for_each(m_BindingIdVec.begin(), m_BindingIdVec.begin() + m_InputSize, [&](LayerBindingId id) + { + try + { + bool& isUsed = m_InputValidationMap.at(id); + if (isUsed) + { + resetInputValidationMap(); + throw InvalidArgumentException(fmt::format("Duplicate Input LayerBindingId: {}", id)); + } + isUsed = true; + } + catch (const std::out_of_range&) + { + resetInputValidationMap(); + throw InvalidArgumentException(fmt::format("Unknown Input LayerBindingId: {}", id)); + } + }); + resetInputValidationMap(); + + std::for_each(m_BindingIdVec.begin() + m_InputSize, m_BindingIdVec.end(), [&](LayerBindingId id) + { + try + { + bool& isUsed = m_OutputValidationMap.at(id); + if (isUsed) + { + resetOutputValidationMap(); + throw InvalidArgumentException(fmt::format("Duplicate Output LayerBindingId: {}", id)); + } + isUsed = true; + } + catch (const std::out_of_range&) + { + resetOutputValidationMap(); + throw InvalidArgumentException(fmt::format("Unknown Output LayerBindingId: {}", id)); + } + }); + resetOutputValidationMap(); +} + } // end experimental namespace } // end armnn namespace diff --git a/src/armnn/WorkingMemHandle.hpp b/src/armnn/WorkingMemHandle.hpp index 676d04288b..aaa9d593ee 100644 --- a/src/armnn/WorkingMemHandle.hpp +++ b/src/armnn/WorkingMemHandle.hpp @@ -26,18 +26,26 @@ class WorkingMemHandle final : public IWorkingMemHandle { public: - struct InputConnectionInfo + struct InputMemDescriptorCoords { LayerBindingId m_LayerBindingId; - unsigned int m_DescriptorIndex; - unsigned int m_InputIndex; + + std::vector> m_InputSlotCoords; + }; + + struct OutputMemDescriptorCoords + { + std::vector m_LayerBindingIds; + + std::pair m_OutputSlotCoords; + std::vector> m_InputSlotCoords; }; WorkingMemHandle(NetworkId networkId) : m_NetworkId(networkId){} WorkingMemHandle(NetworkId networkId, - std::vector> inputHandles, - std::vector inputConnections, + std::vector inputLayerInfo, + std::vector ouputLayerInfo, std::vector workingMemDescriptors, std::unordered_map workingMemDescriptorMap, std::vector> memoryManagers, @@ -52,25 +60,19 @@ public: } /// Allocate the backing memory required for execution. If this is not called, then allocation will be - /// deferred to execution time. The mutex must be locked. + /// deferred to execution time. void Allocate() override; - /// Free the backing memory required for execution. The mutex must be locked. + /// Free the backing memory required for execution. void Free() override; - /// IsAllocated returns true if the backing memory is currently allocated. The mutex must be locked. + /// IsAllocated returns true if the backing memory is currently allocated. bool IsAllocated() override { return m_IsAllocated; } - /// Get a mutex which can be used for synchronizing access to the WorkingMemHandle object. - std::mutex& GetMutex() override - { - return m_Mutex; - } - - /// Get the WorkingMemDescriptor for a Layer. The mutex must be locked. + /// Get the WorkingMemDescriptor for a Layer. WorkingMemDescriptor& GetWorkingMemDescriptor(LayerGuid id) override { auto result = m_WorkingMemDescriptorMap.find(id); @@ -79,7 +81,7 @@ public: } /// Get the WorkingMemDescriptor at an index. The WorkingMemDescriptors are stored in the same order as - /// the Workloads in a topologically sorted graph. The mutex must be locked. + /// the Workloads in a topologically sorted graph. WorkingMemDescriptor& GetWorkingMemDescriptorAt(unsigned int id) override { return m_WorkingMemDescriptors[id]; @@ -90,22 +92,39 @@ public: return m_InputHandleMap.at(layerBindingId); }; + ITensorHandle* GetOutputHandle(LayerBindingId layerBindingId) const + { + return m_OutputHandleMap.at(layerBindingId); + }; + const std::vector::iterator>& GetInputConnections(LayerBindingId layerBindingId) const { return m_InputConnectionMap.at(layerBindingId); }; - std::unordered_map GetValidationMap() const + const std::vector::iterator>& GetOutputConnection(LayerBindingId layerBindingId) const + { + return m_OutputConnectionMap.at(layerBindingId); + }; + + void MemSyncOutputs(); + + std::vector& GetBindingIdVector() { - return m_ValidationMap; + return m_BindingIdVec; }; + void ValidateBindingIds(); + private: + using DifferenceType = std::vector::difference_type; NetworkId m_NetworkId; std::shared_ptr m_Profiler; std::unordered_map m_InputHandleMap; + std::unordered_map m_OutputHandleMap; std::unordered_map::iterator>> m_InputConnectionMap; + std::unordered_map::iterator>> m_OutputConnectionMap; std::vector m_WorkingMemDescriptors; std::unordered_map m_WorkingMemDescriptorMap; @@ -116,9 +135,14 @@ private: // constant tensor's can be shared by multiple WorkingMemHandles and so will not be stored here std::unordered_map > > m_OwnedTensorHandles; - std::unordered_map m_ValidationMap; + std::unordered_map m_InputValidationMap; + std::unordered_map m_OutputValidationMap; + + std::vector m_BindingIdVec; + + DifferenceType m_InputSize; + bool m_IsAllocated; - std::mutex m_Mutex; }; } // end experimental namespace diff --git a/src/armnn/test/RuntimeTests.cpp b/src/armnn/test/RuntimeTests.cpp index abf13f5dc6..397a545878 100644 --- a/src/armnn/test/RuntimeTests.cpp +++ b/src/armnn/test/RuntimeTests.cpp @@ -64,11 +64,10 @@ TEST_CASE("RuntimeUnloadNetwork") TEST_CASE("RuntimePreImportInputs") { armnn::IRuntime::CreationOptions options; - armnn::IRuntimePtr runtime(armnn::IRuntime::Create(options)); - - armnn::NetworkId networkIdentifier1 = 1; - + armnn::IRuntimePtr runtime(armnn::IRuntime::Create(options)); + armnn::NetworkId networkId = 1; armnn::INetworkPtr testNetwork(armnn::INetwork::Create()); + auto inputLayer1 = testNetwork->AddInputLayer(0, "input 1 layer"); auto inputLayer2 = testNetwork->AddInputLayer(1, "input 2 layer"); auto addLayer = testNetwork->AddAdditionLayer("add layer"); @@ -78,17 +77,18 @@ TEST_CASE("RuntimePreImportInputs") inputLayer1->GetOutputSlot(0).Connect(addLayer->GetInputSlot(0)); inputLayer1->GetOutputSlot(0).SetTensorInfo(tensorInfo); + inputLayer2->GetOutputSlot(0).Connect(addLayer->GetInputSlot(1)); inputLayer2->GetOutputSlot(0).SetTensorInfo(tensorInfo); addLayer->GetOutputSlot(0).Connect(outputLayer->GetInputSlot(0)); addLayer->GetOutputSlot(0).SetTensorInfo(tensorInfo); - std::vector backends = { armnn::Compute::CpuRef }; + std::vector backends = {armnn::Compute::CpuRef}; std::string er; armnn::INetworkProperties networkProperties(true, MemorySource::Malloc, MemorySource::Undefined); - runtime->LoadNetwork(networkIdentifier1, + runtime->LoadNetwork(networkId, Optimize(*testNetwork, backends, runtime->GetDeviceSpec()), er, networkProperties); @@ -99,73 +99,233 @@ TEST_CASE("RuntimePreImportInputs") ConstTensor inputTensor1({{4}, armnn::DataType::Signed32}, inputData1.data()); ConstTensor inputTensor2({{4}, armnn::DataType::Signed32}, inputData2.data()); - Tensor outputTensor({{4}, armnn::DataType::Signed32}, output.data()); - auto importedInputVec1 = runtime->ImportInputs(networkIdentifier1, {{0, inputTensor1}}); + auto importedInputVec1 = runtime->ImportInputs(networkId, {{0, inputTensor1}}); CHECK(importedInputVec1.size() == 1); CHECK(importedInputVec1[0] == 0); - auto memHandle = runtime->CreateWorkingMemHandle(networkIdentifier1); + auto memHandle = runtime->CreateWorkingMemHandle(networkId); runtime->Execute(*memHandle.get(), {{1, inputTensor2}}, {{2, outputTensor}}, {0 /* pre-imported id */}); - for (auto val : output) - { + for (auto val: output) { CHECK(val == 30); } - auto importedInputVec2 = runtime->ImportInputs(networkIdentifier1, {{1, inputTensor2}}); + auto importedInputVec2 = runtime->ImportInputs(networkId, {{1, inputTensor2}}); CHECK(importedInputVec2.size() == 1); CHECK(importedInputVec2[0] == 1); runtime->Execute(*memHandle.get(), {{0, inputTensor1}}, {{2, outputTensor}}, {1 /* pre-imported id */}); - for (auto val : output) - { + for (auto val: output) { CHECK(val == 30); } runtime->Execute(*memHandle.get(), {}, {{2, outputTensor}}, {0, 1}); - for (auto val : output) - { + for (auto val: output) { CHECK(val == 30); } - // Duplicate ImportedInputId and LayerBindingId - CHECK_THROWS_AS(runtime->Execute(*memHandle.get(),{},{{2, outputTensor}},{0, 0}); - , armnn::InvalidArgumentException); - + CHECK_THROWS_AS(runtime->Execute(*memHandle.get(), {}, {{2, outputTensor}}, {0, 0});, + armnn::InvalidArgumentException); // Duplicate LayerBindingId - CHECK_THROWS_AS(runtime->Execute(*memHandle.get(), {{1, inputTensor2}}, {{2, outputTensor}},{1}); - , armnn::InvalidArgumentException); - + CHECK_THROWS_AS(runtime->Execute(*memHandle.get(), {{1, inputTensor2}}, {{2, outputTensor}}, {1});, + armnn::InvalidArgumentException); // Incorrect ImportedInputId - CHECK_THROWS_AS(runtime->Execute(*memHandle.get(), {{1, inputTensor2}}, {{2, outputTensor}},{10}); - , armnn::InvalidArgumentException); - + CHECK_THROWS_AS(runtime->Execute(*memHandle.get(), {{1, inputTensor2}}, {{2, outputTensor}}, {10});, + armnn::InvalidArgumentException); // Incorrect LayerBindingId - CHECK_THROWS_AS(runtime->Execute(*memHandle.get(), {{-2, inputTensor2}}, {{2, outputTensor}},{1}); - , armnn::InvalidArgumentException); - + CHECK_THROWS_AS(runtime->Execute(*memHandle.get(), {{-2, inputTensor2}}, {{2, outputTensor}}, {1});, + armnn::InvalidArgumentException); // Incorrect layer binding id and ImportedInputId - CHECK_THROWS_AS(runtime->Execute(*memHandle.get(), {{-2, inputTensor2}}, {{2, outputTensor}},{10}); - , armnn::InvalidArgumentException); - - - auto importedInputVec3 = runtime->ImportInputs(networkIdentifier1, {{1, inputTensor2}}); + CHECK_THROWS_AS(runtime->Execute(*memHandle.get(), {{-2, inputTensor2}}, {{2, outputTensor}}, {10});, + armnn::InvalidArgumentException); + auto importedInputVec3 = runtime->ImportInputs(networkId, {{1, inputTensor2}}); CHECK(importedInputVec3[0] == 2); // Too many ImportedInputIds - CHECK_THROWS_AS(runtime->Execute(*memHandle.get(), {}, {{2, outputTensor}},{0, 1, 2}); - , armnn::InvalidArgumentException); - + CHECK_THROWS_AS(runtime->Execute(*memHandle.get(), {}, {{2, outputTensor}}, {0, 1, 2});, + armnn::InvalidArgumentException); // Too many InputTensors CHECK_THROWS_AS(runtime->Execute(*memHandle.get(), - {{0, inputTensor2}, {1, inputTensor2}, {2, inputTensor2}}, - {{2, outputTensor}}); - , armnn::InvalidArgumentException); - + {{0, inputTensor2}, + {1, inputTensor2}, + {2, inputTensor2}}, + {{2, outputTensor}});, armnn::InvalidArgumentException); // Too few ImportedInputIds - CHECK_THROWS_AS(runtime->Execute(*memHandle.get(), {}, {{2, outputTensor}},{0}); - , armnn::InvalidArgumentException); + CHECK_THROWS_AS(runtime->Execute(*memHandle.get(), {}, {{2, outputTensor}}, {0});, + armnn::InvalidArgumentException); + runtime->ClearImportedInputs(networkId, {1}); + runtime->Execute(*memHandle.get(), {{1, inputTensor2}}, {{2, outputTensor}}, {0}, {}); + for (auto val: output) { + CHECK(val == 30); + } + // Using deleted pre-imported input + CHECK_THROWS_AS(runtime->Execute(*memHandle.get(), {}, {{2, outputTensor}}, {0, 1}, {});, + armnn::InvalidArgumentException); + + // Trying to delete deleted pre-imported tensor + CHECK_THROWS_AS(runtime->ClearImportedInputs(networkId, {1});, armnn::InvalidArgumentException); + + // Trying to delete unknown pre-imported tensor + CHECK_THROWS_AS(runtime->ClearImportedInputs(networkId, {10});, armnn::InvalidArgumentException); +} +TEST_CASE("RuntimePreImportOutputs") +{ + armnn::IRuntime::CreationOptions options; + armnn::IRuntimePtr runtime(armnn::IRuntime::Create(options)); + + armnn::NetworkId networkId = 1; + + armnn::INetworkPtr testNetwork(armnn::INetwork::Create()); + TensorInfo tensorInfo{{4}, armnn::DataType::Float32}; + + auto inputLayer1 = testNetwork->AddInputLayer(0, "input 1 layer"); + inputLayer1->GetOutputSlot(0).SetTensorInfo(tensorInfo); + + ActivationDescriptor activationDescriptor; + activationDescriptor.m_Function = ActivationFunction::BoundedReLu; + activationDescriptor.m_A = 2.0f; + activationDescriptor.m_B = 0.0f; + auto activationLayer1 = testNetwork->AddActivationLayer(activationDescriptor, "add layer"); + auto outputLayer1 = testNetwork->AddOutputLayer(2, "output layer"); + + inputLayer1->GetOutputSlot(0).Connect(activationLayer1->GetInputSlot(0)); + + activationLayer1->GetOutputSlot(0).Connect(outputLayer1->GetInputSlot(0)); + activationLayer1->GetOutputSlot(0).SetTensorInfo(tensorInfo); + + auto inputLayer2 = testNetwork->AddInputLayer(1, "input 1 layer"); + + activationDescriptor.m_A = 4.0f; + activationDescriptor.m_B = 2.0f; + auto activationLayer2 = testNetwork->AddActivationLayer(activationDescriptor, "add layer"); + auto outputLayer2 = testNetwork->AddOutputLayer(3, "output layer"); + + inputLayer2->GetOutputSlot(0).Connect(activationLayer2->GetInputSlot(0)); + inputLayer2->GetOutputSlot(0).SetTensorInfo(tensorInfo); + + activationLayer2->GetOutputSlot(0).Connect(outputLayer2->GetInputSlot(0)); + activationLayer2->GetOutputSlot(0).SetTensorInfo(tensorInfo); + + std::vector backends = { armnn::Compute::CpuRef }; + + std::string er; + armnn::INetworkProperties networkProperties(true, MemorySource::Malloc, MemorySource::Malloc); + runtime->LoadNetwork(networkId, + Optimize(*testNetwork, backends, runtime->GetDeviceSpec()), + er, + networkProperties); + + std::vector inputData1(4, 1.0f); + std::vector inputData2(4, 3.0f); + + std::vector outputData1(4); + std::vector outputData2(4); + + ConstTensor inputTensor1(tensorInfo, inputData1.data()); + ConstTensor inputTensor2(tensorInfo, inputData2.data()); + + Tensor outputTensor1{tensorInfo, outputData1.data()}; + Tensor outputTensor2{tensorInfo, outputData2.data()}; + + InputTensors inputTensors = {{0, inputTensor1}, {1, inputTensor2}}; + + std::pair output1{2, outputTensor1}; + std::pair output2{3, outputTensor2}; + + auto testOutputs = [&]() + { + for (auto val : outputData1) + { + CHECK(val == 1.0f); + } + + for (auto val : outputData2) + { + CHECK(val == 3.0f); + } + }; + + auto memHandle = runtime->CreateWorkingMemHandle(networkId); + + runtime->Execute(*memHandle.get(),inputTensors, {output1, output2}); + testOutputs(); + + auto importedOutputVec = runtime->ImportOutputs(networkId, {output1, output2 }); + CHECK(importedOutputVec.size() == 2); + CHECK(importedOutputVec[0] == 0); + CHECK(importedOutputVec[1] == 1); + + runtime->Execute(*memHandle.get(), inputTensors, {}, {}, importedOutputVec); + testOutputs(); + + runtime->Execute(*memHandle.get(), inputTensors, {output1}, {}, {1}); + testOutputs(); + + runtime->Execute(*memHandle.get(), inputTensors, {output2}, {}, {0}); + testOutputs(); + + auto importedInputVec = runtime->ImportInputs(networkId, inputTensors); + CHECK(importedInputVec.size() == 2); + CHECK(importedInputVec[0] == 0); + CHECK(importedInputVec[1] == 1); + + runtime->Execute(*memHandle.get(), {}, {}, importedInputVec, importedOutputVec); + testOutputs(); + + runtime->Execute(*memHandle.get(), {{0, inputTensor1}}, {output2}, {1}, {0}); + testOutputs(); + + // Too many ids + CHECK_THROWS_AS(runtime->Execute(*memHandle.get(), inputTensors, {output1, output2}, {}, {0, 1});, + armnn::InvalidArgumentException); + + // Duplicate ids + CHECK_THROWS_AS(runtime->Execute(*memHandle.get(), inputTensors, {output2}, {}, {1});, + armnn::InvalidArgumentException); + + // Duplicate ids + CHECK_THROWS_AS(runtime->Execute(*memHandle.get(), inputTensors, {output1, output1}, {}, {});, + armnn::InvalidArgumentException); + + // Duplicate ids + CHECK_THROWS_AS(runtime->Execute(*memHandle.get(), inputTensors, {}, {}, {0, 0}), + armnn::InvalidArgumentException); + + // Unknown id + CHECK_THROWS_AS(runtime->Execute(*memHandle.get(), inputTensors, {output1}, {}, {3});, + armnn::InvalidArgumentException); + + // Unknown id + CHECK_THROWS_AS(runtime->Execute(*memHandle.get(), inputTensors, {{4, outputTensor2}}, {}, {1});, + armnn::InvalidArgumentException); + + // Input id for output + CHECK_THROWS_AS(runtime->Execute(*memHandle.get(), inputTensors, {{0, outputTensor2}}, {}, {1});, + armnn::InvalidArgumentException); + + // Input id for output + CHECK_THROWS_AS(runtime->Execute(*memHandle.get(), inputTensors, {{0, outputTensor2}}, {}, {1});, + armnn::InvalidArgumentException); + + // Output id for input + CHECK_THROWS_AS(runtime->Execute(*memHandle.get(), {{2, inputTensor1}}, {{0, outputTensor2}}, {1}, {1, 0});, + armnn::InvalidArgumentException); + + runtime->ClearImportedOutputs(networkId, {1}); + + runtime->Execute(*memHandle.get(), inputTensors, {output2}, {}, {0}); + testOutputs(); + + // Trying to use deleted pre-imported tensor + CHECK_THROWS_AS(runtime->Execute(*memHandle.get(), inputTensors, {}, {}, importedOutputVec), + armnn::InvalidArgumentException); + + // Trying to delete deleted pre-imported tensor + CHECK_THROWS_AS(runtime->ClearImportedOutputs(networkId, {1});, armnn::InvalidArgumentException); + + // Trying to delete unknown pre-imported tensor + CHECK_THROWS_AS(runtime->ClearImportedOutputs(networkId, {10});, armnn::InvalidArgumentException); } // Note: the current builds we don't do valgrind and gperftools based leak checking at the same diff --git a/src/backends/backendsCommon/test/CommonTestUtils.hpp b/src/backends/backendsCommon/test/CommonTestUtils.hpp index c7537f1eed..07523d73c4 100644 --- a/src/backends/backendsCommon/test/CommonTestUtils.hpp +++ b/src/backends/backendsCommon/test/CommonTestUtils.hpp @@ -56,7 +56,7 @@ bool Contains(const MapType& map, const typename MapType::key_type& key) // Utility template for comparing tensor elements template> -bool Compare(T a, T b, float tolerance = 0.000001f) +inline bool Compare(T a, T b, float tolerance = 0.000001f) { if (ArmnnType == armnn::DataType::Boolean) { diff --git a/src/backends/backendsCommon/test/CompatibilityTests.cpp b/src/backends/backendsCommon/test/CompatibilityTests.cpp index e1462e0d2e..d18a8fbb6c 100644 --- a/src/backends/backendsCommon/test/CompatibilityTests.cpp +++ b/src/backends/backendsCommon/test/CompatibilityTests.cpp @@ -166,8 +166,8 @@ TEST_CASE("Ref_Backends_Unknown_Capability_Test") CHECK(!armnn::HasCapability(AsyncExecutionString, refCapabilities)); CHECK(!armnn::HasCapability("Telekinesis", refCapabilities)); - armnn::BackendOptions::BackendOption unkownCapability{"Telekinesis", true}; - CHECK(!armnn::HasCapability(unkownCapability, refCapabilities)); + armnn::BackendOptions::BackendOption unknownCapability{"Telekinesis", true}; + CHECK(!armnn::HasCapability(unknownCapability, refCapabilities)); } TEST_CASE ("Ref_Backends_Capability_Test") @@ -180,7 +180,7 @@ TEST_CASE ("Ref_Backends_Capability_Test") {"AsyncExecution", true}, {"ProtectedContentAllocation", false}, {"ConstantTensorsAsInputs", true}, - {"PreImportIOTensors", false}, + {"PreImportIOTensors", true}, {"ExternallyManagedMemory", false}, {"MultiAxisPacking", false}}); } diff --git a/src/backends/reference/RefBackend.hpp b/src/backends/reference/RefBackend.hpp index 93a1cf4844..4d4aba980b 100644 --- a/src/backends/reference/RefBackend.hpp +++ b/src/backends/reference/RefBackend.hpp @@ -15,7 +15,7 @@ const BackendCapabilities cpuRefCapabilities("CpuRef", {"AsyncExecution", true}, {"ProtectedContentAllocation", false}, {"ConstantTensorsAsInputs", true}, - {"PreImportIOTensors", false}, + {"PreImportIOTensors", true}, {"ExternallyManagedMemory", false}, {"MultiAxisPacking", false}, {"SingleAxisPacking", true} -- cgit v1.2.1