diff options
author | Mike Kelly <mike.kelly@arm.com> | 2021-04-07 20:10:49 +0100 |
---|---|---|
committer | finn.williams <finn.williams@arm.com> | 2021-04-08 11:23:47 +0000 |
commit | 55a8ffda24fff5515803df10fb4863d46a1effdf (patch) | |
tree | e314dea48f22ae88d452527b2decaca61df108ad /src/armnn/AsyncNetwork.cpp | |
parent | b76eaed55a89330b3b448c4f4522b3fc94a4f38d (diff) | |
download | armnn-55a8ffda24fff5515803df10fb4863d46a1effdf.tar.gz |
IVGCVSW-5823 Refactor Async Network API
* Moved IAsyncNetwork into IRuntime.
* All LoadedNetworks can be executed Asynchronously.
Signed-off-by: Mike Kelly <mike.kelly@arm.com>
Change-Id: Ibbc901ab9110dc2f881425b75489bccf9ad54169
Diffstat (limited to 'src/armnn/AsyncNetwork.cpp')
-rw-r--r-- | src/armnn/AsyncNetwork.cpp | 665 |
1 files changed, 0 insertions, 665 deletions
diff --git a/src/armnn/AsyncNetwork.cpp b/src/armnn/AsyncNetwork.cpp deleted file mode 100644 index 230346a0c3..0000000000 --- a/src/armnn/AsyncNetwork.cpp +++ /dev/null @@ -1,665 +0,0 @@ -// -// Copyright © 2021 Arm Ltd and Contributors. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#include "AsyncNetwork.hpp" -#include "Graph.hpp" -#include "Layer.hpp" -#include "Profiling.hpp" - -#include <armnn/BackendHelper.hpp> -#include <armnn/BackendRegistry.hpp> -#include <armnn/Logging.hpp> -#include <armnn/utility/Assert.hpp> - -#include <armnn/backends/IMemoryManager.hpp> -#include <backendsCommon/CpuTensorHandle.hpp> -#include <backendsCommon/WorkloadData.hpp> -#include <backendsCommon/MemCopyWorkload.hpp> -#include <LabelsAndEventClasses.hpp> - -#include <fmt/format.h> - -namespace armnn -{ - -namespace experimental -{ - -IAsyncNetwork::IAsyncNetwork(std::unique_ptr<IOptimizedNetwork> net, - const INetworkProperties& networkProperties, - profiling::ProfilingService& profilingService) - : pAsyncNetworkImpl( new AsyncNetworkImpl(std::move(net), networkProperties, profilingService)) {}; - -IAsyncNetwork::~IAsyncNetwork() = default; - -TensorInfo IAsyncNetwork::GetInputTensorInfo(LayerBindingId layerId) const -{ - return pAsyncNetworkImpl->GetInputTensorInfo(layerId); -} - -TensorInfo IAsyncNetwork::GetOutputTensorInfo(LayerBindingId layerId) const -{ - return pAsyncNetworkImpl->GetOutputTensorInfo(layerId); -} - -Status IAsyncNetwork::Execute(const InputTensors& inputTensors, - const OutputTensors& outputTensors, - IWorkingMemHandle& workingMemHandle) -{ - return pAsyncNetworkImpl->Execute(inputTensors, outputTensors, workingMemHandle); -} - -std::unique_ptr<IWorkingMemHandle> IAsyncNetwork::CreateWorkingMemHandle() -{ - return pAsyncNetworkImpl->CreateWorkingMemHandle(); -} - -std::shared_ptr<IProfiler> IAsyncNetwork::GetProfiler() const -{ - return pAsyncNetworkImpl->GetProfiler(); -} - -void IAsyncNetwork::RegisterDebugCallback(const DebugCallbackFunction& func) -{ - pAsyncNetworkImpl->RegisterDebugCallback(func); -} - -void AddLayerStructure(std::unique_ptr<profiling::TimelineUtilityMethods>& timelineUtils, - const Layer& layer, - profiling::ProfilingGuid networkGuid) -{ - // Add layer to the post-optimisation network structure - std::string layerName = layer.GetNameStr().empty() ? "<Unnamed>" : layer.GetNameStr(); - timelineUtils->CreateNamedTypedChildEntity(layer.GetGuid(), - networkGuid, - layerName, - profiling::LabelsAndEventClasses::LAYER_GUID); - for (auto&& input : layer.GetInputSlots()) - { - const IOutputSlot* source = input.GetConnectedOutputSlot(); - ARMNN_ASSERT(source != NULL); - timelineUtils->CreateConnectionRelationship(profiling::ProfilingRelationshipType::RetentionLink, - source->GetOwningLayerGuid(), - layer.GetGuid()); - } -} - -void AddWorkloadStructure(std::unique_ptr<profiling::TimelineUtilityMethods>& timelineUtils, - std::unique_ptr<IWorkload>& workload, - const Layer& layer) -{ - // Add workload to the post-optimisation network structure - timelineUtils->CreateTypedEntity(workload->GetGuid(), profiling::LabelsAndEventClasses::WORKLOAD_GUID); - timelineUtils->MarkEntityWithLabel(workload->GetGuid(), - layer.GetBackendId().Get(), - profiling::LabelsAndEventClasses::BACKENDID_GUID); - - // Link the workload to the layer - timelineUtils->CreateRelationship(profiling::ProfilingRelationshipType::RetentionLink, - layer.GetGuid(), - workload->GetGuid(), - profiling::LabelsAndEventClasses::CHILD_GUID); -} - -TensorInfo AsyncNetworkImpl::GetInputTensorInfo(LayerBindingId layerId) const -{ - for (auto&& inputLayer : m_OptimizedNetwork->pOptimizedNetworkImpl->GetGraph().GetInputLayers()) - { - ARMNN_ASSERT_MSG(inputLayer->GetNumOutputSlots() == 1, "Input layer should have exactly 1 output slot"); - if (inputLayer->GetBindingId() == layerId) - { - return inputLayer->GetOutputSlot(0).GetTensorInfo(); - } - } - - throw InvalidArgumentException(fmt::format("No input layer is associated with id {0}}", layerId)); -} - -TensorInfo AsyncNetworkImpl::GetOutputTensorInfo(LayerBindingId layerId) const -{ - for (auto&& outputLayer : m_OptimizedNetwork->pOptimizedNetworkImpl->GetGraph().GetOutputLayers()) - { - ARMNN_ASSERT_MSG(outputLayer->GetNumInputSlots() == 1, "Output layer should have exactly 1 input slot"); - ARMNN_ASSERT_MSG(outputLayer->GetInputSlot(0).GetConnection(), "Input slot on Output layer must be connected"); - if (outputLayer->GetBindingId() == layerId) - { - return outputLayer->GetInputSlot(0).GetConnection()->GetTensorInfo(); - } - } - - throw InvalidArgumentException(fmt::format("No output layer is associated with id {0}}", layerId)); -} - -// Need something like the collectors to get the correct tensors for the inputs -void AsyncNetworkImpl::CollectInputTensorHandles( - std::unordered_map<LayerGuid, std::vector<ITensorHandle*> >& tensorHandles, - std::vector<ITensorHandle*>& inputs, - const armnn::Layer* layer, - const TensorHandleFactoryRegistry& registry, - const bool isMemoryManaged) -{ - for (auto&& inputSlot : layer->GetInputSlots()) - { - // The graph must be well-formed at this point. - ARMNN_ASSERT(inputSlot.GetConnection()); - auto outputSlot = inputSlot.GetConnectedOutputSlot(); - auto key = outputSlot->GetOwningLayer().GetGuid(); - auto search = tensorHandles.find(key); - - if (search == tensorHandles.end()) - { - ITensorHandleFactory::FactoryId factoryId = outputSlot->GetTensorHandleFactoryId(); - const TensorInfo& tensorInfo = outputSlot->GetTensorInfo(); - - ARMNN_ASSERT(factoryId != ITensorHandleFactory::LegacyFactoryId); - ITensorHandleFactory* handleFactory = registry.GetFactory(factoryId); - ARMNN_ASSERT(handleFactory); - std::unique_ptr<ITensorHandle> tensor = handleFactory->CreateTensorHandle(tensorInfo, isMemoryManaged); - ITensorHandle* tensorPtr = tensor.release(); - inputs.push_back(tensorPtr); - } - else - { - unsigned int index = outputSlot->CalculateIndexOnOwner(); - inputs.push_back(search->second[index]); - } - } -} - -void AsyncNetworkImpl::CreateOutputTensorHandles( - std::unordered_map<LayerGuid, std::vector<ITensorHandle*> >& tensorHandles, - std::vector<ITensorHandle*>& outputs, - const armnn::Layer* layer, - const TensorHandleFactoryRegistry& registry, - const bool isMemoryManaged) -{ - auto guid = layer->GetGuid(); - std::vector<ITensorHandle*> tensorHandleVectors; - tensorHandleVectors.reserve(layer->GetNumOutputSlots()); - - for (unsigned int idx=0; idx < layer->GetNumOutputSlots(); idx++) - { - const OutputSlot& slot = layer->GetOutputSlot(idx); - ITensorHandleFactory::FactoryId factoryId = slot.GetTensorHandleFactoryId(); - const TensorInfo& tensorInfo = slot.GetTensorInfo(); - - ARMNN_ASSERT(factoryId != ITensorHandleFactory::LegacyFactoryId); - ITensorHandleFactory* handleFactory = registry.GetFactory(factoryId); - ARMNN_ASSERT(handleFactory); - std::unique_ptr<ITensorHandle> tensor = handleFactory->CreateTensorHandle(tensorInfo, isMemoryManaged); - ITensorHandle* tensorPtr = tensor.release(); - outputs.push_back(tensorPtr); - tensorHandleVectors.push_back(tensorPtr); - } - tensorHandles.insert({guid, tensorHandleVectors}); -} - -const IWorkloadFactory& AsyncNetworkImpl::GetWorkloadFactory(const Layer& layer) const -{ - const IWorkloadFactory* workloadFactory = nullptr; - - auto it = m_WorkloadFactories.find(layer.GetBackendId()); - if (it == m_WorkloadFactories.end()) - { - throw RuntimeException( - fmt::format("No workload factory for {0} to be used for layer: {1}}", - layer.GetBackendId().Get(), - layer.GetNameStr()), - CHECK_LOCATION()); - } - - workloadFactory = it->second.first.get(); - - ARMNN_ASSERT_MSG(workloadFactory, "No workload factory"); - - std::string reasonIfUnsupported; - ARMNN_ASSERT_MSG(IWorkloadFactory::IsLayerSupported(layer, {}, reasonIfUnsupported), - "Factory does not support layer"); - IgnoreUnused(reasonIfUnsupported); - return *workloadFactory; -} - -void AsyncNetworkImpl::EnqueueInput(const BindableLayer& layer, - const ConstTensor& inputTensor, - WorkingMemHandle& context) -{ - if (layer.GetType() != LayerType::Input) - { - throw InvalidArgumentException("EnqueueInput: given layer not an InputLayer"); - } - LayerGuid id = layer.GetOutputSlot(0).GetConnection(0)->GetOwningLayer().GetGuid(); - WorkingMemDescriptor descriptor = context.GetWorkingMemDescriptor(id); - ARMNN_ASSERT_MSG(descriptor.m_Outputs.size() == 1, "Can only handle Input Layer with one output"); - - MemorySourceFlags importFlags = descriptor.m_Outputs[0]->GetImportFlags(); - if (m_NetworkProperties.m_ImportEnabled) // Try import the input tensor - { - if (CheckFlag(importFlags, MemorySource::Malloc) ) - { - // This assumes a CPU Tensor handle - std::unique_ptr<ITensorHandle> tensorHandle = - std::make_unique<ConstPassthroughCpuTensorHandle>(inputTensor.GetInfo(), - inputTensor.GetMemoryArea()); - - void* mem = tensorHandle->Map(false); - if (descriptor.m_Outputs[0]->Import(mem, MemorySource::Malloc)) - { - tensorHandle->Unmap(); - return; - } - tensorHandle->Unmap(); - throw MemoryImportException("EnqueueInput: Memory Import failed"); - } - else - { - throw MemoryImportException("EnqueueInput: Memory Import failed, backend does not support Import"); - } - } - else - { - std::unique_ptr<ITensorHandle> tensorHandle = - std::make_unique<ConstPassthroughCpuTensorHandle>(inputTensor.GetInfo(), inputTensor.GetMemoryArea()); - - auto copyFunc = [](void* dst, const void* src, size_t size) - { - memcpy(dst, src, size); - }; - - for (const auto& input : descriptor.m_Inputs) - { - CopyTensorContentsGeneric(tensorHandle.get(), input, copyFunc); - } - } -} - -void AsyncNetworkImpl::EnqueueOutput(const BindableLayer& layer, const Tensor& outputTensor, WorkingMemHandle& handle) -{ - if (layer.GetType() != LayerType::Output) - { - throw InvalidArgumentException("EnqueueOutput: given layer not an OutputLayer"); - } - ARMNN_ASSERT_MSG(layer.GetNumInputSlots() == 1, "Output Layer should have exactly one input."); - - LayerGuid id = layer.GetInputSlot(0).GetConnectedOutputSlot()->GetOwningLayerGuid(); - WorkingMemDescriptor descriptor = handle.GetWorkingMemDescriptor(id); - - ITensorHandle* inputTensorHandle = descriptor.m_Inputs[0]; - ARMNN_ASSERT_MSG(inputTensorHandle != nullptr, "Data should have been allocated."); - - // Try import the output tensor. - // Note: We can only import the output pointer if all of the following hold true: - // a) The imported pointer is aligned sufficiently - // b) The tensor has zero padding - // c) There is only one connection to the OutputSlot and it is to an OutputLayer. - // d) The output pointer is allocated via malloc. (Other types will be supported in a later release) - // e) m_IsExportEnabled must be set to true - if (m_NetworkProperties.m_ExportEnabled && - (layer.GetInputSlots()[0].GetConnectedOutputSlot()->GetNumConnections() == 1)) - { - if (layer.GetInputSlots()[0].GetConnectedOutputSlot()->GetOwningLayer().GetType() != LayerType::Input) - { - MemorySourceFlags importFlags = inputTensorHandle->GetImportFlags(); - if (CheckFlag(importFlags, MemorySource::Malloc)) - { - std::unique_ptr<ITensorHandle> tensorHandle = - std::make_unique<PassthroughCpuTensorHandle>(outputTensor.GetInfo(), - outputTensor.GetMemoryArea()); - - void* mem = tensorHandle->Map(false); - bool importOk = inputTensorHandle->Import(mem, MemorySource::Malloc); - tensorHandle->Unmap(); - - if (importOk) - { - ARMNN_SCOPED_PROFILING_EVENT(Compute::Undefined, "SyncMemGeneric_Execute"); - descriptor.m_Inputs[0]->Map(true); - descriptor.m_Inputs[0]->Unmap(); - } - else - { - throw MemoryExportException("EnqueueOutput: Memory Export failed"); - } - } - else - { - throw MemoryExportException("EnqueueOutput: Memory Export failed, backend does not support Export"); - } - } - else - { - throw MemoryExportException("EnqueueOutput: Memory Export failed, attempting to export Input Layer"); - } - } - else - { - auto copyFunc = [](void* dst, const void* src, size_t size) - { - memcpy(dst, src, size); - }; - - std::unique_ptr<ITensorHandle> tensorHandle = - std::make_unique<PassthroughCpuTensorHandle>(outputTensor.GetInfo(), outputTensor.GetMemoryArea()); - - CopyTensorContentsGeneric(descriptor.m_Outputs[0], tensorHandle.get(), copyFunc); - } -} - -AsyncNetworkImpl::AsyncNetworkImpl(std::unique_ptr<IOptimizedNetwork> net, - const INetworkProperties& networkProperties, - profiling::ProfilingService& profilingService) : - m_OptimizedNetwork(std::move(net)), - m_NetworkProperties(networkProperties), - m_ProfilingService(profilingService) -{ - // Create a profiler and register it for the current thread. - m_Profiler = std::make_shared<IProfiler>(); - ProfilerManager::GetInstance().RegisterProfiler(m_Profiler.get()); - - Graph &order = m_OptimizedNetwork->pOptimizedNetworkImpl->GetGraph().TopologicalSort(); - - //First create tensor handlers, backends and workload factories. - //Handlers are created before workloads are. - //Because workload creation can modify some of the handlers, - //(for example the splitter and concat layers). - for (auto &&layer : order) - { - auto const &backendId = layer->GetBackendId(); - if (m_Backends.count(backendId) == 0) - { - auto createBackend = BackendRegistryInstance().GetFactory(backendId); - auto it = m_Backends.emplace(std::make_pair(backendId, createBackend())); - - IBackendInternal* backend = it.first->second.get(); - - if (backend->SupportsTensorAllocatorAPI()) - { - backend->RegisterTensorHandleFactories(m_TensorHandleFactoryRegistry); - - auto workloadFactory = backend->CreateWorkloadFactory(m_TensorHandleFactoryRegistry); - m_WorkloadFactories.emplace( - std::make_pair(backendId, std::make_pair(std::move(workloadFactory), nullptr))); - } - else - { - IBackendInternal::IMemoryManagerSharedPtr memoryManager = backend->CreateMemoryManager(); - auto workloadFactory = backend->CreateWorkloadFactory(memoryManager); - - m_WorkloadFactories.emplace( - std::make_pair(backendId, std::make_pair(std::move(workloadFactory), memoryManager))); - } - } - } - - // Check backends support BackendCapability::AsyncExecution - for (auto const& backend : m_Backends) - { - if (!IsCapabilitySupported(backend.first, BackendCapability::AsyncExecution)) - { - ARMNN_LOG(warning) << fmt::format("AsyncNetworkImpl() Backend: '{0}' does not support Async Execution. " - "Will fall back to default implementation.", - backend.first.Get()); - } - - } - - profiling::ProfilingGuid networkGuid = m_OptimizedNetwork->GetGuid(); - std::unique_ptr<profiling::TimelineUtilityMethods> timelineUtils = - profiling::TimelineUtilityMethods::GetTimelineUtils(m_ProfilingService); - if (timelineUtils) - { - timelineUtils->CreateTypedEntity(networkGuid, profiling::LabelsAndEventClasses::NETWORK_GUID); - } - - //Then create workloads. - for (auto &&layer : order) - { - if (timelineUtils) - { - // Add layer to the post-optimisation network structure - AddLayerStructure(timelineUtils, *layer, networkGuid); - } - - const IWorkloadFactory &workloadFactory = GetWorkloadFactory(*layer); - - switch (layer->GetType()) - { - case LayerType::Input: - case LayerType::Output: - { - // Inputs and outputs are treated in a special way - see EnqueueInput() and EnqueueOutput(). - break; - } - default: - { - auto workload = layer->CreateWorkload(workloadFactory); - - if (!workload) - { - const char* const layerName = - layer->GetNameStr().length() != 0 ? layer->GetName() : "<Unnamed>"; - throw InvalidArgumentException( - fmt::format("No workload created for layer (name: '{0}' type: '{1}') (compute '{2}')", - layerName, - static_cast<int>(layer->GetType()), - layer->GetBackendId().Get() - )); - } - - if (timelineUtils) - { - // Add workload to the post-optimisation network structure - AddWorkloadStructure(timelineUtils, workload, *layer); - } - - m_WorkloadQueue.push_back(move(workload)); - // release the constant data in the layer.. - layer->ReleaseConstantData(); - break; - } - } - } - - if (timelineUtils) - { - // Commit to send the post-optimisation network structure - timelineUtils->Commit(); - } - - // Now that the intermediate tensor memory has been set-up, do any post allocation configuration for each workload. - // PostAllocationConfiguure will now need to be handled in the ExecuteOn(WorkingMemDescriptor) - for (auto &workload : m_WorkloadQueue) - { - workload->PostAllocationConfigure(); - } -} - -Status AsyncNetworkImpl::Execute(const InputTensors& inputTensors, - const OutputTensors& outputTensors, - IWorkingMemHandle& iWorkingMemHandle) -{ - const Graph& graph = m_OptimizedNetwork->pOptimizedNetworkImpl->GetGraph(); - - // Walk graph to determine the order of execution. - if (graph.GetNumLayers() < 2) - { - ARMNN_LOG(warning) << "IRuntime::EnqueueWorkload()::Less than two nodes in graph"; - return Status::Failure; - } - - if (graph.GetNumInputs() != inputTensors.size()) - { - throw InvalidArgumentException("Number of inputs provided does not match network."); - } - - std::unique_ptr<profiling::TimelineUtilityMethods> timelineUtils = - profiling::TimelineUtilityMethods::GetTimelineUtils(m_ProfilingService); - profiling::ProfilingGuid inferenceGuid = m_ProfilingService.GetNextGuid(); - if (timelineUtils) - { - // Add inference timeline trace if profiling is enabled. - profiling::ProfilingGuid networkGuid = m_OptimizedNetwork->GetGuid(); - timelineUtils->CreateTypedEntity(inferenceGuid, profiling::LabelsAndEventClasses::INFERENCE_GUID); - timelineUtils->CreateRelationship(profiling::ProfilingRelationshipType::RetentionLink, - networkGuid, - inferenceGuid, - profiling::LabelsAndEventClasses::EXECUTION_OF_GUID); - timelineUtils->RecordEvent(inferenceGuid, profiling::LabelsAndEventClasses::ARMNN_PROFILING_SOL_EVENT_CLASS); - } - - bool executionSucceeded = true; - - if (timelineUtils) - { - // Add end of life of the inference timeline if profiling is enabled. - timelineUtils->RecordEvent(inferenceGuid, profiling::LabelsAndEventClasses::ARMNN_PROFILING_EOL_EVENT_CLASS); - timelineUtils->Commit(); - } - WorkingMemHandle& workingMemHandle = dynamic_cast<WorkingMemHandle&>(iWorkingMemHandle); - std::lock_guard<std::mutex> lockGuard(workingMemHandle.GetMutex()); - - if (!workingMemHandle.IsAllocated()) - { - workingMemHandle.Allocate(); - } - - { - ARMNN_SCOPED_PROFILING_EVENT(Compute::Undefined, "PrepareInputs"); - unsigned int i = 0; - - for (const BindableLayer* inputLayer : graph.GetInputLayers()) - { - EnqueueInput(*inputLayer, inputTensors[i].second, workingMemHandle); - ++i; - } - } - - auto Fail = [&](const std::exception& error) - { - ARMNN_LOG(error) << "An error occurred attempting to execute a workload: " << error.what(); - executionSucceeded = false; - }; - profiling::ProfilingDynamicGuid workloadInferenceID(0); - - try - { - for (unsigned int i = 0; i < m_WorkloadQueue.size(); ++i) - { - auto& workload = m_WorkloadQueue[i]; - if (timelineUtils) - { - workloadInferenceID = timelineUtils->RecordWorkloadInferenceAndStartOfLifeEvent(workload->GetGuid(), - inferenceGuid); - } - workload->ExecuteAsync(workingMemHandle.GetWorkingMemDescriptorAt(i)); - - if (timelineUtils) - { - timelineUtils->RecordEndOfLifeEvent(workloadInferenceID); - } - } - } - catch (const RuntimeException& error) - { - Fail(error); - } - catch (const std::runtime_error& error) - { - Fail(error); - } - // For each output to the network, call EnqueueOutput with the data passed by the user. - { - ARMNN_SCOPED_PROFILING_EVENT(Compute::Undefined, "PrepareOutputs"); - unsigned int i = static_cast<unsigned int>(m_WorkloadQueue.size() - graph.GetNumOutputs()); - - for (const BindableLayer* outputLayer : graph.GetOutputLayers()) - { - EnqueueOutput(*outputLayer, outputTensors[i].second, workingMemHandle); - ++i; - } - } - return executionSucceeded ? Status::Success : Status::Failure; -} - -/// Get the profiler used for this network -std::shared_ptr<IProfiler> AsyncNetworkImpl::GetProfiler() const -{ - return m_Profiler; -} - -void AsyncNetworkImpl::RegisterDebugCallback(const DebugCallbackFunction& func) -{ - for (auto&& workloadPtr: m_WorkloadQueue) - { - workloadPtr.get()->RegisterDebugCallback(func); - } -} - -/// Create a new unique WorkingMemHandle object. Create multiple handles if you wish to have -/// overlapped Execution by calling this function from different threads. -std::unique_ptr<IWorkingMemHandle> AsyncNetworkImpl::CreateWorkingMemHandle() -{ - Graph& order = m_OptimizedNetwork->pOptimizedNetworkImpl->GetGraph(); - std::unordered_map<LayerGuid, std::vector<ITensorHandle*> > tensorHandles; - std::vector<WorkingMemDescriptor> workingMemDescriptors; - std::unordered_map<LayerGuid, WorkingMemDescriptor> workingMemDescriptorMap; - - for (auto&& layer : order) - { - if (layer->GetType() == LayerType::Input || layer->GetType() == LayerType::Output) - { - continue; - } - WorkingMemDescriptor workingMemDescriptor; - // Look for the layer with 1 OutputSlot which has 1 connection and that connection is an Output Layer - // If Export is enabled disable memory management so we can export, otherwise we do a copy - if((layer->GetNumOutputSlots() == 1) && - (layer->GetOutputSlots()[0].GetNumConnections() == 1) && - (layer->GetOutputSlots()[0].GetConnection(0)->GetOwningLayer().GetType() == LayerType::Output)) - { - CollectInputTensorHandles(tensorHandles, - workingMemDescriptor.m_Inputs, - layer, - m_TensorHandleFactoryRegistry, - !m_NetworkProperties.m_ExportEnabled); - CreateOutputTensorHandles(tensorHandles, - workingMemDescriptor.m_Outputs, - layer, - m_TensorHandleFactoryRegistry, - !m_NetworkProperties.m_ExportEnabled); - } - else - { - CollectInputTensorHandles(tensorHandles, - workingMemDescriptor.m_Inputs, - layer, - m_TensorHandleFactoryRegistry); - CreateOutputTensorHandles(tensorHandles, - workingMemDescriptor.m_Outputs, - layer, - m_TensorHandleFactoryRegistry); - } - workingMemDescriptorMap.insert({layer->GetGuid(), workingMemDescriptor}); - workingMemDescriptors.push_back(workingMemDescriptor); - } - return std::make_unique<WorkingMemHandle>(workingMemDescriptors, workingMemDescriptorMap); -} - -void AsyncNetworkImpl::FreeWorkingMemory() -{ - // Informs the memory managers to release memory in it's respective memory group - for (auto&& workloadFactory : m_WorkloadFactories) - { - IBackendInternal::IMemoryManagerSharedPtr memoryManager = workloadFactory.second.second; - if (memoryManager) - { - memoryManager->Release(); - } - } - m_TensorHandleFactoryRegistry.ReleaseMemory(); -} - -} // end experimental namespace - -} // end armnn namespace |