plain/22.08/_loaded_network_8cpp_source.xhtml

 //
 // Copyright © 2022 Arm Ltd and Contributors. All rights reserved.
 // SPDX-License-Identifier: MIT
 //

 #include "LoadedNetwork.hpp"
 #include "Layer.hpp"
 #include "Graph.hpp"
 #include "Profiling.hpp"
 #include "HeapProfiling.hpp"
 #include "WorkingMemHandle.hpp"
 #include "ExecutionData.hpp"

 #include <armnn/BackendHelper.hpp>
 #include <armnn/BackendRegistry.hpp>
 #include <armnn/Logging.hpp>

 #include <armnn/backends/TensorHandle.hpp>
 #include <armnn/backends/IBackendInternal.hpp>
 #include <armnn/backends/IMemoryManager.hpp>
 #include <armnn/backends/MemCopyWorkload.hpp>

 #include <armnn/profiling/ArmNNProfiling.hpp>

 #include <armnn/utility/Assert.hpp>

 #include <backendsCommon/MemSyncWorkload.hpp>

 #include <common/include/Processes.hpp>

 #include <fmt/format.h>

 namespace armnn
 {

 using namespace std;
 using namespace arm::pipe;

 namespace
 {

 template <typename ExceptionType>
 std::string ToErrorMessage(const char * prefix, const ExceptionType & error)
 {
     std::stringstream ss;
     ss << prefix << " " << error.what();
     return ss.str();
 }

 void AddLayerStructure(std::unique_ptr<TimelineUtilityMethods>& timelineUtils,
                        const Layer& layer,
                        ProfilingGuid networkGuid)
 {
     // Add layer to the post-optimisation network structure
     std::string layerName = layer.GetNameStr().empty() ? "<Unnamed>" : layer.GetNameStr();
     timelineUtils->CreateNamedTypedChildEntity(layer.GetGuid(),
                                                networkGuid,
                                                layerName,
                                                LabelsAndEventClasses::LAYER_GUID);
     for (auto&& input : layer.GetInputSlots())
     {
         const IOutputSlot* source = input.GetConnectedOutputSlot();
         ARMNN_ASSERT(source != NULL);
         timelineUtils->CreateConnectionRelationship(ProfilingRelationshipType::RetentionLink,
                                                     source->GetOwningLayerGuid(),
                                                     layer.GetGuid());
     }
 }

 void AddWorkloadStructure(std::unique_ptr<TimelineUtilityMethods>& timelineUtils,
                           std::unique_ptr<IWorkload>& workload,
                           const Layer& layer)
 {
     // Add workload to the post-optimisation network structure
     timelineUtils->CreateTypedEntity(workload->GetGuid(), LabelsAndEventClasses::WORKLOAD_GUID);
     timelineUtils->MarkEntityWithLabel(workload->GetGuid(),
                                        layer.GetBackendId().Get(),
                                        LabelsAndEventClasses::BACKENDID_GUID);

     // Link the workload to the layer
     timelineUtils->CreateRelationship(ProfilingRelationshipType::RetentionLink,
                                       layer.GetGuid(),
                                       workload->GetGuid(),
                                       LabelsAndEventClasses::CHILD_GUID);
 }

 } // anonymous

 /**
  * This function performs a sanity check to ensure that the combination of input and output memory source matches the
  * values for importEnabled and exportEnabled that were specified during optimization. During optimization the tensor
  * handle factories are chosen based on whether import and export are enabled. If the user then specifies something
  * incompatible here it can lead to problems.
  *
  * @param optimizedOptions
  * @param networkProperties
  */
 void ValidateSourcesMatchOptimizedNetwork(std::vector<BackendOptions> optimizedOptions,
                                           const INetworkProperties& networkProperties)
 {
     // Find the "Global" backend options. During the optimize phase the values of importEnabled and exportEnabled are
     // added as backend options.
     const vector<BackendOptions>::iterator& backendItr =
         find_if(optimizedOptions.begin(), optimizedOptions.end(), [](const BackendOptions& backend) {
             if (backend.GetBackendId().Get() == "Global")
             {
                 return true;
             }
             else
             {
                 return false;
             }
         });
     bool importEnabled = false;
     bool exportEnabled = false;
     if (backendItr != optimizedOptions.end())
     {
         // Find the importEnabled and exportEnabled values.
         for (size_t i = 0; i < backendItr->GetOptionCount(); i++)
         {
             const BackendOptions::BackendOption& option = backendItr->GetOption(i);
             if (option.GetName() == "ImportEnabled")
             {
                 importEnabled = option.GetValue().AsBool();
             }
             if (option.GetName() == "ExportEnabled")
             {
                 exportEnabled = option.GetValue().AsBool();
             }
         }
     }

     // Now that we have values for import and export compare them to the MemorySource variables.
     // Any value of MemorySource that's not "Undefined" implies that we need to do an import of some kind.
     if ((networkProperties.m_InputSource == MemorySource::Undefined && importEnabled) ||
         (networkProperties.m_InputSource != MemorySource::Undefined && !importEnabled))
     {
         auto message = fmt::format("The input memory source specified, '{0}',", networkProperties.m_InputSource);
         if (!importEnabled)
         {
             message.append(" requires that memory import be enabled. However, "
                            "it was disabled when this network was optimized.");
         }
         else
         {
             message.append(" requires that memory import be disabled. However, "
                            "it was enabled when this network was optimized.");
         }
         throw InvalidArgumentException(message);
     }

     if ((networkProperties.m_OutputSource == MemorySource::Undefined && exportEnabled) ||
         (networkProperties.m_OutputSource != MemorySource::Undefined && !exportEnabled))
     {
         auto message = fmt::format("The output memory source specified, '{0}',", networkProperties.m_OutputSource);
         if (!exportEnabled)
         {
             message.append(" requires that memory export be enabled. However, "
                            "it was disabled when this network was optimized.");
         }
         else
         {
             message.append(" requires that memory export be disabled. However, "
                            "it was enabled when this network was optimized.");
         }
         throw InvalidArgumentException(message);
     }
 } // anonymous

 std::unique_ptr<LoadedNetwork> LoadedNetwork::MakeLoadedNetwork(std::unique_ptr<IOptimizedNetwork> net,
                                                                 std::string& errorMessage,
                                                                 const INetworkProperties& networkProperties,
                                                                 arm::pipe::IProfilingService* profilingService)
 {
     std::unique_ptr<LoadedNetwork> loadedNetwork;

     auto Fail = [&](const std::exception& error) -> std::unique_ptr<LoadedNetwork>
     {
         errorMessage = ToErrorMessage("An error occurred when preparing the network workloads: ", error);
         ARMNN_LOG(error) << errorMessage;

         return std::unique_ptr<LoadedNetwork>();
     };

     try
     {
         loadedNetwork.reset(new LoadedNetwork(std::move(net), networkProperties, profilingService));
     }
     catch (const armnn::RuntimeException& error)
     {
         return Fail(error);
     }
     catch (const armnn::Exception& error)
     {
         return Fail(error);
     }
     catch (const std::runtime_error& error)
     {
         return Fail(error);
     }

     return loadedNetwork;
 }

 LoadedNetwork::LoadedNetwork(std::unique_ptr<IOptimizedNetwork> net,
                              const INetworkProperties& networkProperties,
                              arm::pipe::IProfilingService* profilingService) :
                              m_OptimizedNetwork(std::move(net)),
                              m_NetworkProperties(networkProperties),
                              m_TensorHandleFactoryRegistry(),
                              m_ProfilingService(profilingService)
 {
     ARMNN_SCOPED_PROFILING_EVENT(Compute::Undefined, "LoadedNetwork");
     // Get the profiler and register it for the current thread.
     const std::shared_ptr<IProfiler>& profiler = m_OptimizedNetwork->GetProfiler();
     ProfilerManager::GetInstance().RegisterProfiler(profiler.get());

     profiler->EnableProfiling(networkProperties.m_ProfilingEnabled);

     profiler->EnableNetworkDetailsToStdOut(networkProperties.m_OutputNetworkDetailsMethod);

     // We need to check that the memory sources match up with the values of import and export specified during the
     // optimize phase. If they don't this will throw an exception.
     ValidateSourcesMatchOptimizedNetwork(m_OptimizedNetwork.get()->pOptimizedNetworkImpl->GetModelOptions(),
                                          m_NetworkProperties);

     //First create tensor handlers, backends and workload factories.
     //Handlers are created before workloads are.
     //Because workload creation can modify some of the handlers,
     //(for example the splitter and concat layers).

     bool useExternalMemoryManager = false;
     bool useInternalMemoryManager = false;
     Graph& order = m_OptimizedNetwork->pOptimizedNetworkImpl->GetGraph();
     // Ensure Topological order
     order.SetLayersOutOfOrder();
     order.TopologicalSort();

     if (!networkProperties.m_AsyncEnabled)
     {
         m_IsInputImported = std::vector<bool>(order.GetNumInputs(), false);
         m_IsOutputImported = std::vector<bool>(order.GetNumOutputs(), false);
     }

     for (auto&& layer : order)
     {
         auto const& backendId = layer->GetBackendId();
         if (m_Backends.count(backendId) == 0)
         {
             auto createBackend = BackendRegistryInstance().GetFactory(backendId);
             auto it = m_Backends.emplace(std::make_pair(backendId, createBackend()));

             IBackendInternal* backend = it.first->second.get();

             // If we're doing async execution verify that the backend supports it and ExternallyManagedMemory.
             if (networkProperties.m_AsyncEnabled)
             {
                 if (!HasCapability(BackendOptions::BackendOption{"AsyncExecution", true}, backend->GetCapabilities()))
                 {
                     std::string er = backend->GetId();
                     er += " does not support AsyncExecution";
                     throw BackendCapabilityException(er);
                 }
                 if (!HasCapability(BackendOptions::BackendOption{"ExternallyManagedMemory", true},
                 backend->GetCapabilities()))
                 {
                     std::string er = backend->GetId();
                     er += " does not support ExternallyManagedMemory\n";
                     er += "AsyncEnabled networks require all backends to support ExternallyManagedMemory";
                     throw BackendCapabilityException(er);
                 }
                 m_SupportsExternallyManagedMemory[backend->GetId()] = true;
                 useExternalMemoryManager = true;
             }
             else
             {
                 m_SupportsExternallyManagedMemory[backend->GetId()] = false;
                 useInternalMemoryManager = true;
             }

             IBackendInternal::IWorkloadFactoryPtr workloadFactory;
             if (backend->SupportsTensorAllocatorAPI())
             {
                 workloadFactory = backend->CreateWorkloadFactory(
                     m_TensorHandleFactoryRegistry,
                     m_OptimizedNetwork->pOptimizedNetworkImpl->GetModelOptions(),
                     static_cast<MemorySourceFlags>(m_NetworkProperties.m_InputSource),
                     static_cast<MemorySourceFlags>(m_NetworkProperties.m_OutputSource));
             }
             else
             {
                 m_BackendMemoryMangers.emplace_back(backend->CreateMemoryManager());
                 workloadFactory = backend->CreateWorkloadFactory(
                         m_BackendMemoryMangers.back(), m_OptimizedNetwork->pOptimizedNetworkImpl->GetModelOptions());
             }
             m_WorkloadFactories[backendId ] = std::move(workloadFactory);
         }
     }

     if (!networkProperties.m_AsyncEnabled)
     {
         for (auto&& layer : order)
         {
             auto& workloadFactory = GetWorkloadFactory(*layer);
             bool supportsExternalManager = m_SupportsExternallyManagedMemory[layer->GetBackendId()];

             switch (layer->GetType())
             {
                 case LayerType::Input:
                 case LayerType::MemImport:
                 {
                     // If IsImportEnabled is true then we need to set IsMemoryManaged
                     // to false when creating TensorHandles
                     layer->CreateTensorHandles(m_TensorHandleFactoryRegistry,
                                                workloadFactory,
                                                !supportsExternalManager && !m_NetworkProperties.m_ImportEnabled);
                     break;
                 }
                 case LayerType::Constant:
                 {
                     layer->CreateTensorHandles(m_TensorHandleFactoryRegistry, workloadFactory, true);
                     break;
                 }
                 default:
                 {
                     // Look for a layer with 1 OutputSlot which has 1 connection and that connection is an Output Layer
                     // If Export is enabled disable memory management so we can export, otherwise we do a copy
                     if ((layer->GetNumOutputSlots() == 1) &&
                        (layer->GetOutputSlots()[0].GetNumConnections() == 1) &&
                        (layer->GetOutputSlots()[0].GetConnection(0)->GetOwningLayer().GetType() == LayerType::Output))
                     {
                         layer->CreateTensorHandles(m_TensorHandleFactoryRegistry,
                                                    workloadFactory,
                                                    !supportsExternalManager && !m_NetworkProperties.m_ExportEnabled);
                     }
                     else
                     {
                         layer->CreateTensorHandles(m_TensorHandleFactoryRegistry,
                                                    workloadFactory,
                                                    !supportsExternalManager);
                     }
                 }
             }
         }
     }

     ProfilingGuid networkGuid = m_OptimizedNetwork->GetGuid();
     std::unique_ptr<TimelineUtilityMethods> timelineUtils =
         TimelineUtilityMethods::GetTimelineUtils(*m_ProfilingService);
     if (timelineUtils)
     {
         timelineUtils->CreateTypedEntity(networkGuid, LabelsAndEventClasses::NETWORK_GUID);
         // Mark the network with a start of life event
         timelineUtils->RecordEvent(networkGuid, LabelsAndEventClasses::ARMNN_PROFILING_SOL_EVENT_CLASS);
         // and with the process ID
         int processID = arm::pipe::GetCurrentProcessId();
         std::stringstream ss;
         ss << processID;
         timelineUtils->MarkEntityWithLabel(networkGuid, ss.str(), LabelsAndEventClasses::PROCESS_ID_GUID);
     }

     std::vector<IWorkload*> ConstWorkloads;

     //Then create workloads.
     {
         ARMNN_SCOPED_PROFILING_EVENT(Compute::Undefined, "LoadNetwork_CreateWorkloads");
         for (auto&& layer: order)
         {
             if (timelineUtils)
             {
                 // Add layer to the post-optimisation network structure
                 AddLayerStructure(timelineUtils, *layer, networkGuid);
             }

             const IWorkloadFactory& workloadFactory = GetWorkloadFactory(*layer);

             switch (layer->GetType())
             {
                 case LayerType::Input:
                 case LayerType::Output:
                 {
                     // Inputs and outputs are treated in a special way - see EnqueueInput() and EnqueueOutput().
                     break;
                 }
                 default:
                 {
                     auto workload = layer->CreateWorkload(workloadFactory);

                     if (!workload)
                     {
                         const char* const layerName =
                                 layer->GetNameStr().length() != 0 ? layer->GetName() : "<Unnamed>";
                         throw InvalidArgumentException(
                                 fmt::format("No workload created for layer (name: '{0}' type: '{1}') (compute '{2}')",
                                             layerName, static_cast<int>(layer->GetType()), layer->GetBackendId().Get()
                                 ));
                     }

                     if (timelineUtils)
                     {
                         // Add workload to the post-optimisation network structure
                         AddWorkloadStructure(timelineUtils, workload, *layer);
                     }

                     // For async networks ConstantWorkloads are managed exclusively by LoadedNetwork
                     // and are separated out from the other workloads
                     if((networkProperties.m_AsyncEnabled  || useExternalMemoryManager) &&
                         layer->GetType() == LayerType::Constant)
                     {
                         m_ConstantTensorHandles[layer->GetGuid()] =
                                 layer->GetOutputSlot(0).GetOutputHandler().GetData();
                         m_ConstantWorkloads[layer->GetGuid()] = std::move(workload);
                     }
                     else
                     {
                         m_WorkloadQueue.push_back(std::move(workload));

                         if (layer->GetType() == LayerType::Constant)
                         {
                             // Place the Constant Workloads into a queue so that they can be executed first
                             ConstWorkloads.push_back(m_WorkloadQueue.back().get());
                         }
                     }
                     // release the constant data in the layer..
                     layer->ReleaseConstantData();
                     break;
                 }
             }
         }
     }

     // Gather information about workloads for inputs & outputs
     if (!networkProperties.m_AsyncEnabled && m_WorkloadQueue.size() != 0)
     {
         const int noOfInputs = armnn::numeric_cast<int>(order.GetNumInputs());

         // Get indices of all workloads connected to each input and
         // check if they support tensor handle replacement
         for (const BindableLayer* layer: order.GetInputLayers())
         {
             const auto bindingId = layer->GetBindingId();

             bool supportsReplacement = true;

             for (const auto inputSlot: layer->GetOutputSlot(0).GetConnections())
             {
                 auto workloadIndex = std::distance(order.begin(), order.GetPosInGraph(inputSlot->GetOwningLayer()));
                 workloadIndex -= noOfInputs;

                 m_InputWorkloadSlotPairs[bindingId].emplace_back(WorkloadIndices{
                         armnn::numeric_cast<unsigned int>(workloadIndex), inputSlot->GetSlotIndex()});

                 auto workload = m_WorkloadQueue[m_InputWorkloadSlotPairs[bindingId].back().m_WorkloadIndex].get();
                 supportsReplacement &= workload->SupportsTensorHandleReplacement();
             }

             ITensorHandleFactory::FactoryId factoryId = layer->GetOutputSlot(0).GetTensorHandleFactoryId();
             // Get matching import factory Id
             ITensorHandleFactory::FactoryId importFactoryId =
                     m_TensorHandleFactoryRegistry.GetMatchingImportFactoryId(factoryId);

             ITensorHandleFactory *importFactory = m_TensorHandleFactoryRegistry.GetFactory(importFactoryId);

             if (supportsReplacement && importFactory)
             {
                 m_PreImportedInputHandles.emplace_back(
                         bindingId, importFactory->CreateTensorHandle(layer->GetOutputSlot(0).GetTensorInfo(), false));
             }
             else
             {
                 m_PreImportedInputHandles.emplace_back(bindingId, nullptr);
             }
         }

         // Get indices of all workloads connected to each output and
         // check if they support tensor handle replacement
         for (const BindableLayer* layer: order.GetOutputLayers())
         {
             const auto bindingId = layer->GetBindingId();

             const auto outputSlot = layer->GetInputSlot(0).GetConnectedOutputSlot();
             auto& indices = m_OutputWorkloadSlotPairs[bindingId];

             auto workloadIndex = std::distance(order.begin(), order.GetPosInGraph(outputSlot->GetOwningLayer()));
             workloadIndex -= noOfInputs;

             indices.m_OutputSlotIndices = WorkloadIndices{numeric_cast<unsigned int>(workloadIndex),
                                                           outputSlot->CalculateIndexOnOwner()};

             bool supportsReplacement = true;
             auto outputWorkload = m_WorkloadQueue[indices.m_OutputSlotIndices.m_WorkloadIndex].get();
             supportsReplacement &= outputWorkload->SupportsTensorHandleReplacement();

             for (auto &inputSlot: outputSlot->GetConnections())
             {
                 if(inputSlot->GetOwningLayer().GetType() != LayerType::Output)
                 {
                     auto inWorkloadIndex = std::distance(order.begin(),
                                                          order.GetPosInGraph(inputSlot->GetOwningLayer()));
                     inWorkloadIndex -= noOfInputs;
                     indices.m_InputSlotIndices.emplace_back(WorkloadIndices{numeric_cast<unsigned int>(inWorkloadIndex),
                                                             inputSlot->GetSlotIndex()});
                     auto inputWorkload = m_WorkloadQueue[indices.m_InputSlotIndices.back().m_WorkloadIndex].get();
                     supportsReplacement &= inputWorkload->SupportsTensorHandleReplacement();
                 }
             }

             ITensorHandleFactory::FactoryId factoryId = outputSlot->GetTensorHandleFactoryId();
             // Get matching import factory Id
             ITensorHandleFactory::FactoryId importFactoryId =
                     m_TensorHandleFactoryRegistry.GetMatchingImportFactoryId(factoryId);
             ITensorHandleFactory *importFactory = m_TensorHandleFactoryRegistry.GetFactory(importFactoryId);

             if (supportsReplacement && importFactory)
             {
                 m_PreImportedOutputHandles.emplace_back(
                         bindingId, importFactory->CreateTensorHandle(outputSlot->GetTensorInfo(), false));
             }
             else
             {
                 m_PreImportedOutputHandles.emplace_back(bindingId, nullptr);
             }
         }
     }

     for (auto&& workloadFactory : m_WorkloadFactories)
     {
         workloadFactory.second->AfterWorkloadsCreated();
     }

     if (timelineUtils)
     {
         // Commit to send the post-optimisation network structure
         timelineUtils->Commit();
     }

     if (useExternalMemoryManager)
     {
         if (networkProperties.m_AsyncEnabled)
         {
             CreateMemoryProfileAsync();
         }
         else
         {
             CreateMemoryProfile();
         }

         auto backendStrategyMap = BackendRegistryInstance().GetMemoryOptimizerStrategies();
         for (auto& backendMemoryProfile : m_MemBlockMap)
         {
             const BackendId& backendId = backendMemoryProfile.first;
             if (backendStrategyMap.find(backendId) != backendStrategyMap.end())
             {
                 m_MemBinMap[backendId] = backendStrategyMap[backendId]->Optimize(backendMemoryProfile.second);
             }
             else
             {
                 m_MemBinMap[backendId] = m_ConstantStrategy->Optimize(backendMemoryProfile.second);
             }
         }

         if (!networkProperties.m_AsyncEnabled)
         {
             m_ExternalMemoryManager = CreateExternalMemoryManger(m_TensorMemory);

             // Sort m_TensorMemory, so it's order matches m_Tensorhandles
             std::sort(m_TensorMemory.begin(), m_TensorMemory.end(),
                       [](const std::pair<std::shared_ptr<TensorMemory>, MemorySource>& lhs,
                          const std::pair<std::shared_ptr<TensorMemory>, MemorySource>& rhs)
                       {
                           return lhs.first->m_OutputSlotId < rhs.first->m_OutputSlotId;
                       });
         }
     }

     // Now that the intermediate tensor memory has been set-up,
     // do any post allocation configuration for each workload.
     if (!networkProperties.m_AsyncEnabled)
     {
         if (useInternalMemoryManager)
         {
             // Set up memory.
             m_OptimizedNetwork->pOptimizedNetworkImpl->GetGraph().AllocateDynamicBuffers();
         }

         for (auto &workload : m_WorkloadQueue)
         {
             workload->PostAllocationConfigure();
         }
     }

     if (useExternalMemoryManager)
     {
         if (!networkProperties.m_AsyncEnabled)
         {
             AllocateAndExecuteConstantWorkloads();
         }
         else
         {
             AllocateAndExecuteConstantWorkloadsAsync();
         }
     }
     // If synchronous, execute all constant layer workloads
     if (!networkProperties.m_AsyncEnabled)
     {
         for (auto workload: ConstWorkloads)
         {
             workload->Execute();
         }
     }
 }

 void LoadedNetwork::AllocateAndExecuteConstantWorkloads()
 {
     ARMNN_SCOPED_PROFILING_EVENT(Compute::Undefined, "LoadNetwork_AllocateAndExecuteConstants");
     for (auto& pair : m_ConstantWorkloads)
     {
         auto tensorHandle = m_ConstantTensorHandles[pair.first];
         tensorHandle->Allocate();
         pair.second->Execute();
     }
 }

 void LoadedNetwork::AllocateAndExecuteConstantWorkloadsAsync()
 {
     ARMNN_SCOPED_PROFILING_EVENT(Compute::Undefined, "LoadNetwork_AllocateAndExecuteConstants");
     Graph& order = m_OptimizedNetwork->pOptimizedNetworkImpl->GetGraph();
     for (auto&& layer : order)
     {
         if (layer->GetType() == LayerType::Constant)
         {
             const auto& outSlot = layer->GetOutputSlots()[0];
             const auto factoryId = outSlot.GetTensorHandleFactoryId();
             ARMNN_ASSERT(factoryId != ITensorHandleFactory::LegacyFactoryId);
             auto& workloadFactory = GetWorkloadFactory(*layer);

             layer->CreateTensorHandles(m_TensorHandleFactoryRegistry, workloadFactory);
             ITensorHandle* tensorHandle = outSlot.GetOutputHandler().GetData();

             m_ConstantTensorHandles[layer->GetGuid()] = tensorHandle;
             tensorHandle->Allocate();

             auto& backend = m_Backends.at(layer->GetBackendId());

             WorkingMemDescriptor memDesc;
             memDesc.m_Outputs.push_back(tensorHandle);

             ExecutionData executionData = backend->CreateExecutionData(memDesc);
             m_ConstantWorkloads[layer->GetGuid()]->ExecuteAsync(executionData);
         }
     }
 }

 void LoadedNetwork::SendNetworkStructure(arm::pipe::IProfilingService& profilingService)
 {
     ARMNN_SCOPED_PROFILING_EVENT(Compute::Undefined, "LoadNetwork_SendNetworkStructure");
     Graph& order = m_OptimizedNetwork->pOptimizedNetworkImpl->GetGraph().TopologicalSort();
     ProfilingGuid networkGuid = m_OptimizedNetwork->GetGuid();

     std::unique_ptr<TimelineUtilityMethods> timelineUtils =
         TimelineUtilityMethods::GetTimelineUtils(profilingService);

     timelineUtils->CreateTypedEntity(networkGuid, LabelsAndEventClasses::NETWORK_GUID);

     for (auto&& layer : order)
     {
         // Add layer to the post-optimisation network structure
         AddLayerStructure(timelineUtils, *layer, networkGuid);
         switch (layer->GetType())
         {
             case LayerType::Input:
             case LayerType::Output:
             {
                 // Inputs and outputs are treated in a special way - see EnqueueInput() and EnqueueOutput().
                 break;
             }
             default:
             {
                 for (auto& workload : m_WorkloadQueue)
                 {
                     // Add workload to the post-optimisation network structure
                     AddWorkloadStructure(timelineUtils, workload, *layer);
                 }
             break;
             }
         }
     }
     // Commit to send the post-optimisation network structure
     timelineUtils->Commit();
 }

 ProfilingGuid LoadedNetwork::GetNetworkGuid()
 {
     return m_OptimizedNetwork->GetGuid();
 }

 TensorInfo LoadedNetwork::GetInputTensorInfo(LayerBindingId layerId) const
 {
     for (auto&& inputLayer : m_OptimizedNetwork->pOptimizedNetworkImpl->GetGraph().GetInputLayers())
     {
         ARMNN_ASSERT_MSG(inputLayer->GetNumOutputSlots() == 1, "Input layer should have exactly 1 output slot");
         if (inputLayer->GetBindingId() == layerId)
         {
             return inputLayer->GetOutputSlot(0).GetTensorInfo();
         }
     }

     throw InvalidArgumentException(fmt::format("No input layer is associated with id {}", layerId));
 }

 TensorInfo LoadedNetwork::GetOutputTensorInfo(LayerBindingId layerId) const
 {
     for (auto&& outputLayer : m_OptimizedNetwork->pOptimizedNetworkImpl->GetGraph().GetOutputLayers())
     {
         ARMNN_ASSERT_MSG(outputLayer->GetNumInputSlots() == 1, "Output layer should have exactly 1 input slot");
         ARMNN_ASSERT_MSG(outputLayer->GetInputSlot(0).GetConnection(), "Input slot on Output layer must be connected");
         if (outputLayer->GetBindingId() == layerId)
         {
             return outputLayer->GetInputSlot(0).GetConnection()->GetTensorInfo();
         }
     }

     throw InvalidArgumentException(fmt::format("No output layer is associated with id {}", layerId));
 }

 const IWorkloadFactory& LoadedNetwork::GetWorkloadFactory(const Layer& layer) const
 {
     const IWorkloadFactory* workloadFactory = nullptr;

     auto it = m_WorkloadFactories.find(layer.GetBackendId());
     if (it ==  m_WorkloadFactories.end())
     {
         throw RuntimeException(fmt::format("No workload factory for {0} to be used for layer: {1}",
                                            layer.GetBackendId().Get(),
                                            layer.GetNameStr()),
                                            CHECK_LOCATION());
     }

     workloadFactory = it->second.get();

     ARMNN_ASSERT_MSG(workloadFactory, "No workload factory");

     std::string reasonIfUnsupported;
     ARMNN_ASSERT_MSG(IWorkloadFactory::IsLayerSupported(layer,
                                                         {},
                                                         reasonIfUnsupported,
                                                         m_OptimizedNetwork->pOptimizedNetworkImpl->GetModelOptions()),
         "Factory does not support layer");
     IgnoreUnused(reasonIfUnsupported);
     return *workloadFactory;
 }

 namespace {

 // Non-copyable class owning accelerator-specific tensor data.
 class TensorPin
 {
 public:
     TensorPin(std::unique_ptr<ITensorHandle> handle, const TensorInfo& info, LayerBindingId id)
         : m_TensorHandle(std::move(handle))
         , m_TensorInfo(info)
         , m_Id(id)
     {
     }

     ITensorHandle* GetTensorHandle() const { return m_TensorHandle.get(); }
     const TensorInfo& GetTensorInfo() const { return m_TensorInfo; }
     LayerBindingId GetBindingId() const { return m_Id; }

 private:
     std::unique_ptr<ITensorHandle> m_TensorHandle;
     TensorInfo m_TensorInfo;
     LayerBindingId m_Id;
 };

 static const TensorPin& GetTensorPin(LayerBindingId id,
     const std::vector<TensorPin>& pins,
     char const* bindingPointDesc)
 {
     auto it = std::find_if(pins.begin(), pins.end(),
         [id](const TensorPin& pin)
     {
         return pin.GetBindingId() == id;
     });

     if (it != pins.end())
     {
         return *it;
     }
     else
     {
         throw InvalidArgumentException(fmt::format("No tensor supplied for {0} {1}", bindingPointDesc, id));
     }
 }

 // Stores data that needs to be kept accessible for the entire execution of a workload.
 class WorkloadData
 {
 public:
     WorkloadData(const InputTensors& inputTensors, const OutputTensors& outputTensors)
     {
         m_InputTensorPins.reserve(inputTensors.size());
         m_OutputTensorPins.reserve(outputTensors.size());

         for (auto inputTensorPair : inputTensors)
         {
             auto inputTensor = inputTensorPair.second;

             std::unique_ptr<ITensorHandle> tensorHandle =
                 std::make_unique<ConstPassthroughTensorHandle>(inputTensor.GetInfo(),inputTensor.GetMemoryArea());
             LayerBindingId layerId = inputTensorPair.first;

             m_InputTensorPins.emplace_back(std::move(tensorHandle), inputTensor.GetInfo(), layerId);
         }

         for (auto outputTensorPair : outputTensors)
         {
             auto outputTensor = outputTensorPair.second;

             std::unique_ptr<ITensorHandle> tensorHandle =
                 std::make_unique<PassthroughTensorHandle>(outputTensor.GetInfo(), outputTensor.GetMemoryArea());
             LayerBindingId layerId = outputTensorPair.first;

             m_OutputTensorPins.emplace_back(std::move(tensorHandle), outputTensor.GetInfo(), layerId);
         }
     }

     const TensorPin& GetInputTensorPin(LayerBindingId id) const
     {
         return GetTensorPin(id, m_InputTensorPins, "input");
     }

     const TensorPin& GetOutputTensorPin(LayerBindingId id) const
     {
         return GetTensorPin(id, m_OutputTensorPins, "output");
     }

 private:

     std::vector<TensorPin> m_InputTensorPins;
     std::vector<TensorPin> m_OutputTensorPins;
 };

 }

 Status LoadedNetwork::EnqueueWorkload(const InputTensors& inputTensors,
                                       const OutputTensors& outputTensors,
                                       std::vector<ImportedInputId> preImportedInputIds,
                                       std::vector<ImportedOutputId> preImportedOutputIds)
 {
     const Graph& graph = m_OptimizedNetwork->pOptimizedNetworkImpl->GetGraph();

     // Walk graph to determine the order of execution.
     if (graph.GetNumLayers() < 2)
     {
         ARMNN_LOG(warning) << "IRuntime::EnqueueWorkload()::Less than two nodes in graph";
         return Status::Failure;
     }

     // Data that must be kept alive for the entire execution of the workload.
     WorkloadData workloadData(inputTensors, outputTensors);

     // Input tensors can be provided as parameters or pre imported. Either way the number of
     // tensors should match the number of inputs.
     if (graph.GetNumInputs() != (inputTensors.size() + preImportedInputIds.size()))
     {
         throw InvalidArgumentException("Number of inputs provided does not match network.");
     }

     // For each input to the network, call EnqueueInput with the data passed by the user.
     {
         ARMNN_SCOPED_PROFILING_EVENT(Compute::Undefined, "PrepareInputs");
         m_InputQueue.clear();
         m_InputQueue.reserve(graph.GetNumInputs());

         unsigned int inputIndex = 0;
         unsigned int importedInputIdIndex = 0;
         std::sort(preImportedInputIds.begin(), preImportedInputIds.end());
         for (const BindableLayer* inputLayer : graph.GetInputLayers())
         {
             if (importedInputIdIndex < preImportedInputIds.size() &&
                 inputIndex == preImportedInputIds[importedInputIdIndex])
             {
                 // Only replace tensorhandles if they have not already been replaced
                 if (!m_IsInputImported[inputIndex])
                 {
                     auto outputTensorHandle = m_PreImportedInputHandles[inputIndex].m_TensorHandle.get();

                     for (const auto& workloadInfo: m_InputWorkloadSlotPairs[inputLayer->GetBindingId()])
                     {
                         auto workload = m_WorkloadQueue[workloadInfo.m_WorkloadIndex].get();
                         workload->ReplaceInputTensorHandle(outputTensorHandle, workloadInfo.m_SlotIndex);
                     }
                     m_IsInputImported[inputIndex] = true;
                 }
                 importedInputIdIndex++;
             }
             else
             {
                 if (m_IsInputImported[inputIndex])
                 {
                     OutputHandler& handler = const_cast<OutputHandler&>(inputLayer->GetOutputHandler(0));

                     for (const auto& workloadInfo: m_InputWorkloadSlotPairs[inputLayer->GetBindingId()])
                     {
                         auto workload = m_WorkloadQueue[workloadInfo.m_WorkloadIndex].get();
                         workload->ReplaceInputTensorHandle(handler.GetData(), workloadInfo.m_SlotIndex);
                     }

                     m_IsInputImported[inputIndex] = false;
                 }

                 // InputTensorHandle is not imported yet, process to enqueue input
                 const TensorPin& pin = workloadData.GetInputTensorPin(inputLayer->GetBindingId());
                 EnqueueInput(*inputLayer, pin.GetTensorHandle(), pin.GetTensorInfo());
             }
             inputIndex++;
         }
     }
     // For each output to the network, call EnqueueOutput with the data passed by the user.
     {
         ARMNN_SCOPED_PROFILING_EVENT(Compute::Undefined, "PrepareOutputs");
         m_OutputQueue.clear();
         m_OutputQueue.reserve(graph.GetNumOutputs());

         if (preImportedOutputIds.size() > graph.GetNumOutputs())
         {
             throw InvalidArgumentException("Invalid number of preImportedOutputIds");
         }

         unsigned int outputIndex = 0;
         unsigned int importedOutputIdIndex = 0;
         std::sort(preImportedOutputIds.begin(), preImportedOutputIds.end());
         for (const BindableLayer* outputLayer : graph.GetOutputLayers())
         {
             if (importedOutputIdIndex < preImportedOutputIds.size() &&
                 outputIndex == preImportedOutputIds[importedOutputIdIndex])
             {
                 // Only replace tensorhandles if they have not already been replaced
                 ITensorHandle* inputTensorHandle = m_PreImportedOutputHandles[outputIndex].m_TensorHandle.get();

                 if (!m_IsOutputImported[outputIndex])
                 {
                     const auto bindingId = outputLayer->GetBindingId();
                     const auto& indices = m_OutputWorkloadSlotPairs[bindingId];

                     auto outputWorkload = m_WorkloadQueue[indices.m_OutputSlotIndices.m_WorkloadIndex].get();

                     outputWorkload->ReplaceOutputTensorHandle(inputTensorHandle,
                                                               indices.m_OutputSlotIndices.m_SlotIndex);

                     for (const auto& workloadInfo: indices.m_InputSlotIndices)
                     {
                         auto inputWorkload = m_WorkloadQueue[workloadInfo.m_WorkloadIndex].get();
                         inputWorkload->ReplaceInputTensorHandle(inputTensorHandle, workloadInfo.m_SlotIndex);
                     }
                     m_IsOutputImported[outputIndex] = true;
                 }

                 ARMNN_ASSERT_MSG(inputTensorHandle != nullptr, "Data should have been allocated.");
                 MemSyncQueueDescriptor syncDesc;
                 syncDesc.m_Inputs.push_back(inputTensorHandle);
                 WorkloadInfo info;
                 info.m_InputTensorInfos.push_back(
                         outputLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo());
                 auto syncWorkload = std::make_unique<SyncMemGenericWorkload>(syncDesc, info);
                 ARMNN_ASSERT_MSG(syncWorkload, "No sync workload created");
                 m_OutputQueue.push_back(move(syncWorkload));
                 importedOutputIdIndex++;
             }
             else
             {
                 if (m_IsOutputImported[outputIndex])
                 {
                     const auto bindingId = outputLayer->GetBindingId();
                     const auto& indices = m_OutputWorkloadSlotPairs[bindingId];

                     auto outputWorkload = m_WorkloadQueue[indices.m_OutputSlotIndices.m_WorkloadIndex].get();
                     const OutputHandler& outputHandler =
                             outputLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetOutputHandler();

                     outputWorkload->ReplaceOutputTensorHandle(
                             outputHandler.GetData(), indices.m_OutputSlotIndices.m_SlotIndex);

                     for (const auto& workloadInfo: indices.m_InputSlotIndices)
                     {
                         auto inputWorkload = m_WorkloadQueue[workloadInfo.m_WorkloadIndex].get();
                         inputWorkload->ReplaceInputTensorHandle(outputHandler.GetData(), workloadInfo.m_SlotIndex);
                     }
                     m_IsOutputImported[outputIndex] = false;
                 }

                 const TensorPin& pin = workloadData.GetOutputTensorPin(outputLayer->GetBindingId());
                 // OutputTensorHandle is not imported yet, process to enqueue Output
                 EnqueueOutput(*outputLayer, pin.GetTensorHandle(), pin.GetTensorInfo());
             }
             outputIndex++;
         }
     }

     std::unique_ptr<TimelineUtilityMethods> timelineUtils =
                         TimelineUtilityMethods::GetTimelineUtils(*m_ProfilingService);
     ProfilingGuid inferenceGuid = m_ProfilingService->GetNextGuid();
     if (timelineUtils)
     {
         // Add inference timeline trace if profiling is enabled.
         ProfilingGuid networkGuid = m_OptimizedNetwork->GetGuid();
         timelineUtils->CreateTypedEntity(inferenceGuid, LabelsAndEventClasses::INFERENCE_GUID);
         timelineUtils->CreateRelationship(ProfilingRelationshipType::RetentionLink,
                                           networkGuid,
                                           inferenceGuid,
                                           LabelsAndEventClasses::EXECUTION_OF_GUID);
         timelineUtils->RecordEvent(inferenceGuid, LabelsAndEventClasses::ARMNN_PROFILING_SOL_EVENT_CLASS);
     }

     bool executionSucceeded = true;

     {
         if (m_ProfilingService->IsProfilingEnabled())
         {
             m_ProfilingService->IncrementCounterValue(INFERENCES_RUN);
         }
         ARMNN_SCOPED_PROFILING_EVENT(Compute::Undefined, "Execute");
         ARMNN_SCOPED_HEAP_PROFILING("Executing");
         executionSucceeded = Execute(timelineUtils, inferenceGuid);
     }

     if (timelineUtils)
     {
         // Add end of life of the inference timeline if profiling is enabled.
         timelineUtils->RecordEvent(inferenceGuid, LabelsAndEventClasses::ARMNN_PROFILING_EOL_EVENT_CLASS);
         timelineUtils->Commit();
     }

     return executionSucceeded ? Status::Success : Status::Failure;
 }

 void LoadedNetwork::EnqueueInput(const BindableLayer& layer, ITensorHandle* tensorHandle, const TensorInfo& tensorInfo)
 {
     if (layer.GetType() != LayerType::Input)
     {
         throw InvalidArgumentException("EnqueueInput: given layer not an InputLayer");
     }

     if (tensorHandle == nullptr)
     {
         throw InvalidArgumentException("EnqueueInput: tensorHandle must not be NULL");
     }

     InputQueueDescriptor inputQueueDescriptor;
     WorkloadInfo info;

     inputQueueDescriptor.m_Inputs.push_back(tensorHandle);
     info.m_InputTensorInfos.push_back(tensorInfo);

     ARMNN_ASSERT_MSG(layer.GetNumOutputSlots() == 1, "Can only handle Input Layer with one output");
     const OutputHandler& handler = layer.GetOutputHandler();
     const TensorInfo& outputTensorInfo = handler.GetTensorInfo();
     ITensorHandle* outputTensorHandle = handler.GetData();
     ARMNN_ASSERT_MSG(outputTensorHandle != nullptr,
                      "Data should have been allocated.");
     inputQueueDescriptor.m_Outputs.push_back(outputTensorHandle);
     info.m_OutputTensorInfos.push_back(outputTensorInfo);

     MemorySourceFlags importFlags = outputTensorHandle->GetImportFlags();
     bool needMemCopy = true;
     if (m_NetworkProperties.m_ImportEnabled)  // Try import the input tensor
     {
         if(CheckFlag(importFlags, m_NetworkProperties.m_InputSource))
         {
             needMemCopy = false;
             // This assumes a CPU Tensor handle
             void* mem = tensorHandle->Map(false);
             if (outputTensorHandle->Import(mem, m_NetworkProperties.m_InputSource))
             {
                 tensorHandle->Unmap();
                 return; // No need for a workload since the import has been done.
             }
             tensorHandle->Unmap();
             throw MemoryImportException("EnqueueInput: Memory Import failed");
         }
     }
     if (needMemCopy)
     {
         // Create a mem copy workload for input since we did not import
         std::unique_ptr<IWorkload> inputWorkload = std::make_unique<CopyMemGenericWorkload>(inputQueueDescriptor, info);

         ARMNN_ASSERT_MSG(inputWorkload, "No input workload created");

         std::unique_ptr<TimelineUtilityMethods> timelineUtils =
                             TimelineUtilityMethods::GetTimelineUtils(*m_ProfilingService);
         if (timelineUtils)
         {
             // Add Input Workload to the post-optimisation network structure
             AddWorkloadStructure(timelineUtils, inputWorkload, layer);
             timelineUtils->Commit();
         }

         m_InputQueue.push_back(move(inputWorkload));
     }
 }

 void LoadedNetwork::EnqueueOutput(const BindableLayer& layer, ITensorHandle* tensorHandle, const TensorInfo& tensorInfo)
 {
     if (layer.GetType() != LayerType::Output)
     {
         throw InvalidArgumentException("EnqueueOutput: given layer not an OutputLayer");
     }

     if (tensorHandle == nullptr)
     {
         throw InvalidArgumentException("EnqueueOutput: tensorHandle must not be NULL");
     }

     OutputQueueDescriptor outputQueueDescriptor;
     WorkloadInfo info;

     outputQueueDescriptor.m_Outputs.push_back(tensorHandle);
     info.m_OutputTensorInfos.push_back(tensorInfo);

     ARMNN_ASSERT_MSG(layer.GetNumInputSlots() == 1, "Output Layer should have exactly one input.");

     // Gets the output handler from the previous node.
     const OutputHandler& outputHandler = layer.GetInputSlots()[0].GetConnectedOutputSlot()->GetOutputHandler();

     const TensorInfo& inputTensorInfo = outputHandler.GetTensorInfo();
     ITensorHandle* inputTensorHandle = outputHandler.GetData();
     ARMNN_ASSERT_MSG(inputTensorHandle != nullptr, "Data should have been allocated.");

     // Try import the output tensor.
     // Note: We can only import the output pointer if all of the following  hold true:
     // a) The imported pointer is aligned sufficiently
     // b) The tensor has zero padding
     // c) There is only one connection to the OutputSlot and it is to an OutputLayer.
     // d) The output pointer is allocated via malloc. (Other types will be supported in a later release)
     // e) m_IsExportEnabled must be set to true
     bool needMemCopy = true;
     if (m_NetworkProperties.m_ExportEnabled &&
         (layer.GetInputSlots()[0].GetConnectedOutputSlot()->GetNumConnections() == 1))
     {
         if(layer.GetInputSlots()[0].GetConnectedOutputSlot()->GetOwningLayer().GetType() != LayerType::Input)
         {
             MemorySourceFlags importFlags = inputTensorHandle->GetImportFlags();
             if (CheckFlag(importFlags, m_NetworkProperties.m_OutputSource))
             {
                 needMemCopy = false;
                 void *mem = tensorHandle->Map(false);
                 bool importOk = inputTensorHandle->Import(mem, m_NetworkProperties.m_OutputSource);
                 tensorHandle->Unmap();

                 if (importOk)
                 {
                     // Insert synchronization workload
                     MemSyncQueueDescriptor syncDesc;
                     syncDesc.m_Inputs.push_back(inputTensorHandle);
                     info.m_InputTensorInfos.push_back(inputTensorInfo);
                     auto syncWorkload = std::make_unique<SyncMemGenericWorkload>(syncDesc, info);
                     ARMNN_ASSERT_MSG(syncWorkload, "No sync workload created");
                     m_OutputQueue.push_back(move(syncWorkload));
                 }
                 else
                 {
                     throw MemoryExportException("EnqueueOutput: Memory Export failed");
                 }
             }
         }
     }
     if (needMemCopy)
     {
         // If we got here then we didn't export the memory, so add an output workload which performs a memcopy.
         outputQueueDescriptor.m_Inputs.push_back(inputTensorHandle);
         info.m_InputTensorInfos.push_back(inputTensorInfo);

         std::unique_ptr<IWorkload> outputWorkload =
             std::make_unique<CopyMemGenericWorkload>(outputQueueDescriptor, info);
         ARMNN_ASSERT_MSG(outputWorkload, "No output workload created");

         std::unique_ptr<TimelineUtilityMethods> timelineUtils =
             TimelineUtilityMethods::GetTimelineUtils(*m_ProfilingService);
         if (timelineUtils)
         {
             // Add Output Workload to the post-optimisation network structure
             AddWorkloadStructure(timelineUtils, outputWorkload, layer);
             timelineUtils->Commit();
         }

         m_OutputQueue.push_back(move(outputWorkload));
     }
 }

 void LoadedNetwork::AllocateWorkingMemory(
 #if !defined(ARMNN_DISABLE_THREADS)
      std::lock_guard<std::mutex>& lock
 #endif
     )
 {
     ARMNN_SCOPED_PROFILING_EVENT(Compute::Undefined, "Working Memory Allocation");

 #if !defined(ARMNN_DISABLE_THREADS)
     // this unused parameter makes sure we can only call this function with a valid lock
     IgnoreUnused(lock);
 #endif
     if (m_IsWorkingMemAllocated)
     {
         return;
     }

     if (m_ExternalMemoryManager)
     {
         m_ExternalMemoryManager->Allocate();

         for (unsigned int i = 0; i < m_TensorMemory.size(); ++i)
         {
             m_Tensorhandles[i]->Import(m_TensorMemory[i].first->m_Data, m_TensorMemory[i].second);
         }
     }

     for (auto&& memoryManager : m_BackendMemoryMangers)
     {
         if (memoryManager)
         {
             memoryManager->Acquire();
         }
     }
     m_TensorHandleFactoryRegistry.AquireMemory();
     m_IsWorkingMemAllocated = true;
 }

 void LoadedNetwork::FreeWorkingMemory()
 {
 #if !defined(ARMNN_DISABLE_THREADS)
     std::lock_guard<std::mutex> lockGuard(m_WorkingMemMutex);
 #endif

     if (!m_IsWorkingMemAllocated)
     {
         return;
     }

     if (m_ExternalMemoryManager)
     {
         m_ExternalMemoryManager->Deallocate();
     }

     // Informs the memory managers to release memory in its respective memory group
     for (auto&& memoryManager : m_BackendMemoryMangers)
     {
         if (memoryManager)
         {
             memoryManager->Release();
         }
     }
     m_TensorHandleFactoryRegistry.ReleaseMemory();
     m_IsWorkingMemAllocated = false;
 }

 bool LoadedNetwork::Execute(std::unique_ptr<TimelineUtilityMethods>& timelineUtils,
                            ProfilingGuid inferenceGuid)
 {
     bool success = true;

     auto Fail = [&](const std::exception& error)
     {
         ARMNN_LOG(error) << "An error occurred attempting to execute a workload: " << error.what();
         success = false;
     };

     try
     {
 #if !defined(ARMNN_DISABLE_THREADS)
         std::lock_guard<std::mutex> lockGuard(m_WorkingMemMutex);
         AllocateWorkingMemory(lockGuard);
 #else
         AllocateWorkingMemory();
 #endif

         ProfilingDynamicGuid workloadInferenceID(0);
         auto ExecuteQueue = [&timelineUtils, &workloadInferenceID, &inferenceGuid](WorkloadQueue& queue)
         {
             for (auto& workload : queue)
             {
                 if(timelineUtils)
                 {
                     workloadInferenceID = timelineUtils->RecordWorkloadInferenceAndStartOfLifeEvent(workload->GetGuid(),
                                                                                                     inferenceGuid);
                 }
                 workload->Execute();
                 if(timelineUtils)
                 {
                     timelineUtils->RecordEndOfLifeEvent(workloadInferenceID);
                 }
             }
         };

         ExecuteQueue(m_InputQueue);
         ExecuteQueue(m_WorkloadQueue);
         ExecuteQueue(m_OutputQueue);
     }
     catch (const RuntimeException& error)
     {
         Fail(error);
     }
     catch (const std::runtime_error& error)
     {
         Fail(error);
     }

     return success;
 }

 void LoadedNetwork::EnqueueInput(const ConstTensor& inputTensor, ITensorHandle* inputTensorHandle)
 {
     if (m_NetworkProperties.m_ImportEnabled)  // Try import the input tensor
     {
         MemorySourceFlags importFlags = inputTensorHandle->GetImportFlags();
         if (CheckFlag(importFlags, m_NetworkProperties.m_InputSource) )
         {
             std::unique_ptr<ITensorHandle> tensorHandle =
                     std::make_unique<ConstPassthroughTensorHandle>(inputTensor.GetInfo(),
                                                                    inputTensor.GetMemoryArea());
             void* mem = tensorHandle->Map(false);

             if (inputTensorHandle->Import(mem, m_NetworkProperties.m_InputSource))
             {
                 tensorHandle->Unmap();
                 return;
             }
             tensorHandle->Unmap();
             throw MemoryImportException("EnqueueInput: Memory Import failed");
         }
         else
         {
             throw MemoryImportException("EnqueueInput: Memory Import failed, backend does not support Import");
         }
     }
     else
     {
         std::unique_ptr<ITensorHandle> tensorHandle =
                 std::make_unique<ConstPassthroughTensorHandle>(inputTensor.GetInfo(), inputTensor.GetMemoryArea());

         auto copyFunc = [](void* dst, const void* src, size_t size)
         {
             memcpy(dst, src, size);
         };

         CopyTensorContentsGeneric(tensorHandle.get(), inputTensorHandle, copyFunc);
     }
 }

 // Note: We can only import the output pointer if all of the following  hold true:
 // a) The imported pointer is aligned sufficiently
 // b) The tensor has zero padding
 // c) There is only one connection to the OutputSlot and it is to an OutputLayer.
 // d) The output pointer is allocated via malloc. (Other types will be supported in a later release)
 // e) m_IsExportEnabled must be set to true
 void LoadedNetwork::ImportOutputTensor(const Tensor& outputTensor, ITensorHandle* outputTensorHandle)
 {
     ARMNN_ASSERT_MSG(outputTensorHandle != nullptr, "Data should have been allocated.");
     MemorySourceFlags importFlags = outputTensorHandle->GetImportFlags();
     if (CheckFlag(importFlags, m_NetworkProperties.m_OutputSource))
     {
         std::unique_ptr<ITensorHandle> tensorHandle =
                 std::make_unique<PassthroughTensorHandle>(outputTensor.GetInfo(),
                                                           outputTensor.GetMemoryArea());

         void* mem = tensorHandle->Map(false);
         bool importOk = outputTensorHandle->Import(mem, m_NetworkProperties.m_OutputSource);
         tensorHandle->Unmap();

         if (!importOk)
         {
             throw MemoryExportException("ImportOutputTensor: Memory Export failed");
         }
     }
     else
     {
         throw MemoryExportException("ImportOutputTensor: Memory Export failed, attempting to export Input Layer");
     }

 }

 void CopyToOutputTensor(const Tensor& outputTensor, ITensorHandle* outputTensorHandle)
 {
     auto copyFunc = [](void* dst, const void* src, size_t size)
     {
         memcpy(dst, src, size);
     };

     std::unique_ptr<ITensorHandle> tensorHandle =
             std::make_unique<PassthroughTensorHandle>(outputTensor.GetInfo(),
                                                       outputTensor.GetMemoryArea());

     CopyTensorContentsGeneric(outputTensorHandle, tensorHandle.get(), copyFunc);
 }


 const armnn::ConstTensor GetInputTensor(const LayerBindingId layerId, const InputTensors& inputTensors)
 {
     for (auto inputTensorPair : inputTensors)
     {
         LayerBindingId id = inputTensorPair.first;
         if (id == layerId)
         {
             return inputTensorPair.second;
         }
     }
     throw InvalidArgumentException("Input does not exist.");
 }

 const armnn::Tensor GetOutputTensor(const LayerBindingId layerId, const OutputTensors& outputTensors)
 {
     for (auto outputTensorPair : outputTensors)
     {
         LayerBindingId id = outputTensorPair.first;
         if (id == layerId)
         {
             return outputTensorPair.second;
         }
     }
     throw InvalidArgumentException("Output does not exist.");
 }

 std::vector<ImportedInputId> LoadedNetwork::ImportInputs(const InputTensors& inputTensors,
                                                          MemorySource forceImportMemorySource)
 {
     if (!m_NetworkProperties.m_AsyncEnabled)
     {
         // Cannot import if import is not enabled and forceImportMemorySource is undefined
         if (forceImportMemorySource == MemorySource::Undefined)
         {
             throw MemoryImportException("ImportInputs: Memory Import failed, NetworkProperties.m_ImportEnabled");
         }
         // The number of pre imported tensors should not exceed the number of inputs.
         if (inputTensors.size() > m_OptimizedNetwork->pOptimizedNetworkImpl->GetGraph().GetNumInputs())
         {
             throw MemoryImportException("ImportInputs: The number of tensors provided exceeds the number of inputs.");
         }

         std::vector<ImportedInputId> importedInputs;
         Graph& graph = m_OptimizedNetwork->pOptimizedNetworkImpl->GetGraph().TopologicalSort();
         unsigned int inputIndex = 0;
         for (const BindableLayer* inputLayer : graph.GetInputLayers())
         {
             auto outputTensorHandle = m_PreImportedInputHandles[inputIndex].m_TensorHandle.get();

             if (!outputTensorHandle)
             {
                 inputIndex++;
                 continue;
             }

             auto layerBindingId = inputLayer->GetBindingId();
             auto it = std::find_if(inputTensors.begin(), inputTensors.end(), [=](const auto& inputTensor)
             {
                 return inputTensor.first == layerBindingId;
             });

             if (it == inputTensors.end())
             {
                 inputIndex++;
                 continue;
             }

             const auto& inputTensor = *it;
             std::unique_ptr<ITensorHandle> passThroughTensorHandle =
                     std::make_unique<ConstPassthroughTensorHandle>(inputTensor.second.GetInfo(),
                                                                    inputTensor.second.GetMemoryArea());

             try
             {
                 if (outputTensorHandle->CanBeImported(passThroughTensorHandle->Map(), forceImportMemorySource)
                     && (outputTensorHandle->Import(passThroughTensorHandle->Map(), forceImportMemorySource)))
                 {
                     importedInputs.push_back(inputIndex);
                 }
                 passThroughTensorHandle->Unmap();
             }
             catch(const MemoryImportException& exception)
             {
                 ARMNN_LOG(error) << "An error occurred attempting to import input_"
                                            << inputIndex << " : " << exception.what();
                 passThroughTensorHandle->Unmap();
             }
             inputIndex++;
         }

         return importedInputs;
     }
     else
     {
         // Import when the import of network properties is enabled
         std::vector<ImportedInputId> importedInputs;
         Graph& graph = m_OptimizedNetwork->pOptimizedNetworkImpl->GetGraph().TopologicalSort();

         for (auto inputTensor : inputTensors)
         {
             auto layerBindingId = inputTensor.first;
             auto it = std::find_if(graph.GetInputLayers().begin(), graph.GetInputLayers().end(), [=](auto* layer)
             {
                 return layer->GetBindingId() == layerBindingId;
             });

             if (it == graph.GetInputLayers().end())
             {
                 throw MemoryImportException(fmt::format(
                     "ImportInputs: Memory Import failed, unknown LayerBindingId: {}", layerBindingId));
             }

             const Layer* layer = *it;
             if (layer->GetType() != LayerType::Input)
             {
                 throw InvalidArgumentException("ImportInputs: given layer not an InputLayer");
             }

             auto& backend = m_Backends.at(layer->GetBackendId());
             if (!HasCapability(BackendOptions::BackendOption{"PreImportIOTensors", true}, backend->GetCapabilities()))
             {
                 std::string er = backend->GetId();
                 er += " does not have PreImportIOTensors capability";
                 throw BackendCapabilityException(er);
             }

             const OutputSlot& outputSlot = layer->GetOutputSlots()[0];

             ITensorHandleFactory::FactoryId factoryId = outputSlot.GetTensorHandleFactoryId();
             const TensorInfo& tensorInfo = outputSlot.GetTensorInfo();

             ITensorHandleFactory* handleFactory = m_TensorHandleFactoryRegistry.GetFactory(factoryId);
             ARMNN_ASSERT(handleFactory);

             ImportedTensorHandlePin importedTensorHandlePin{layerBindingId,
                                                             handleFactory->CreateTensorHandle(tensorInfo, false)};

             ITensorHandle* tensorHandle = importedTensorHandlePin.m_TensorHandle.get();

             if (!CheckFlag(tensorHandle->GetImportFlags(), forceImportMemorySource))
             {
                 throw MemoryImportException(
                     fmt::format("ImportInputs: Memory Import failed, backend: "
                                 "{} does not support importing from source {}"
                                 , factoryId, m_NetworkProperties.m_InputSource));
             }

             std::unique_ptr<ITensorHandle> passThroughTensorHandle =
                     std::make_unique<ConstPassthroughTensorHandle>(inputTensor.second.GetInfo(),
                                                                    inputTensor.second.GetMemoryArea());

             if (tensorHandle->Import(passThroughTensorHandle->Map(), forceImportMemorySource))
             {
                 importedInputs.push_back(m_CurImportedInputId++);
                 passThroughTensorHandle->Unmap();
             }
             else
             {
                 passThroughTensorHandle->Unmap();
                 throw MemoryImportException("ImportInputs: Memory Import failed");
             }

             m_PreImportedInputHandles.push_back(std::move(importedTensorHandlePin));
         }
         return importedInputs;
     }
 }

 std::vector<ImportedOutputId> LoadedNetwork::ImportOutputs(const OutputTensors& outputTensors,
                                                            MemorySource forceImportMemorySource)
 {
     if (!m_NetworkProperties.m_AsyncEnabled)
     {
         // Cannot import if import is not enabled and forceImportMemorySource is undefined
         if (forceImportMemorySource == MemorySource::Undefined)
         {
             throw MemoryImportException("ImportOutputs: Memory Import failed, NetworkProperties.m_ImportEnabled");
         }
         // If forceImportMemorySource is defined, try import if memory is aligned
         if (outputTensors.size() != m_OptimizedNetwork->pOptimizedNetworkImpl->GetGraph().GetNumOutputs())
         {
             throw MemoryImportException("ImportOutputs: Force Import failed, incorrect number of tensors");
         }
         std::vector<ImportedOutputId> importedOutputs;
         Graph& graph = m_OptimizedNetwork->pOptimizedNetworkImpl->GetGraph().TopologicalSort();

         unsigned int outputIndex = 0;
         for (const BindableLayer* const outputLayer : graph.GetOutputLayers())
         {
             auto inputTensorHandle = m_PreImportedOutputHandles[outputIndex].m_TensorHandle.get();
             if (!inputTensorHandle)
             {
                 outputIndex++;
                 continue;
             }

             auto layerBindingId = outputLayer->GetBindingId();
             auto it = std::find_if(outputTensors.begin(), outputTensors.end(), [=] (const auto& outputTensor)
             {
                 return outputTensor.first == layerBindingId;
             });

             if (it == outputTensors.end())
             {
                 outputIndex++;
                 continue;
             }

             const auto outputTensor = *it;
             try
             {
                 // Check if the output memory can be imported
                 if (inputTensorHandle->CanBeImported(outputTensor.second.GetMemoryArea(), forceImportMemorySource)
                     && inputTensorHandle->Import(outputTensor.second.GetMemoryArea(), forceImportMemorySource))
                 {
                     importedOutputs.push_back(outputIndex);
                 }
             }
             catch(const MemoryImportException& exception)
             {
                 ARMNN_LOG(error) << "An error occurred attempting to import output_"
                                  << outputIndex << " : " << exception.what();
             }
             outputIndex++;
         }
         return importedOutputs;
     }

     std::vector<ImportedOutputId> importedOutputs;
     Graph& graph = m_OptimizedNetwork->pOptimizedNetworkImpl->GetGraph().TopologicalSort();

     for (const auto& outputTensor : outputTensors)
     {
         auto layerBindingId = outputTensor.first;
         auto it = std::find_if(graph.GetOutputLayers().begin(), graph.GetOutputLayers().end(), [=](auto* layer)
         {
             return layer->GetBindingId() == layerBindingId;
         });

         if (it == graph.GetOutputLayers().end())
         {
             throw MemoryImportException(fmt::format("ImportOutputs: Memory Import failed, unknown LayerBindingId: {}",
                                                      layerBindingId));
         }

         const Layer* layer = *it;
         if (layer->GetType() != LayerType::Output)
         {
             throw InvalidArgumentException("ImportOutputs: given layer not an OutputLayer");
         }

         auto& backend = m_Backends.at(layer->GetBackendId());
         if (!HasCapability(BackendOptions::BackendOption{"PreImportIOTensors", true}, backend->GetCapabilities()))
         {
             std::string er = backend->GetId();
             er += " does not have PreImportIOTensors capability";
             throw BackendCapabilityException(er);
         }

         const InputSlot& inputSlot = layer->GetInputSlots()[0];
         ITensorHandleFactory::FactoryId factoryId = inputSlot.GetConnectedOutputSlot()->GetTensorHandleFactoryId();
         const TensorInfo& tensorInfo = inputSlot.GetConnectedOutputSlot()->GetTensorInfo();

         ITensorHandleFactory* handleFactory = m_TensorHandleFactoryRegistry.GetFactory(factoryId);
         ARMNN_ASSERT(handleFactory);

         ImportedTensorHandlePin importedTensorHandlePin{layerBindingId,
                                                         handleFactory->CreateTensorHandle(tensorInfo, false)};

         ITensorHandle* tensorHandle = importedTensorHandlePin.m_TensorHandle.get();

         if (!CheckFlag(tensorHandle->GetImportFlags(), forceImportMemorySource))
         {
             throw MemoryImportException(fmt::format("ImportInputs: Memory Import failed, backend: "
                                                     "{} does not support importing from source {}"
                                                     , factoryId, forceImportMemorySource));
         }

         if (tensorHandle->Import(outputTensor.second.GetMemoryArea(), forceImportMemorySource))
         {
             importedOutputs.push_back(m_CurImportedOutputId++);
         }
         else
         {
             throw MemoryImportException("ImportInputs: Memory Import failed");
         }

         m_PreImportedOutputHandles.push_back(std::move(importedTensorHandlePin));
     }

     return importedOutputs;
 }

 void LoadedNetwork::ClearImportedInputs(const std::vector<ImportedInputId> inputIds)
 {
     for (auto id : inputIds)
     {
         if (id > m_PreImportedInputHandles.size())
         {
             throw InvalidArgumentException(fmt::format("ClearImportedInputs::Unknown ImportedInputId: {}", id));
         }

         auto& importedTensorHandle = m_PreImportedInputHandles[id].m_TensorHandle;
         if (!importedTensorHandle)
         {
             throw InvalidArgumentException(
                     fmt::format("ClearImportedInputs::ImportedInput with id: {} has already been deleted", id));
         }
         // Call Unimport then destroy the tensorHandle
         importedTensorHandle->Unimport();
         importedTensorHandle = {};
     }
 }

 void LoadedNetwork::ClearImportedOutputs(const std::vector<ImportedOutputId> outputIds)
 {
     for (auto id : outputIds)
     {
         if (id > m_PreImportedOutputHandles.size())
         {
             throw InvalidArgumentException(fmt::format("ClearImportedOutputs::Unknown ImportedOutputId: {}", id));
         }

         auto& importedTensorHandle = m_PreImportedOutputHandles[id].m_TensorHandle;
         if (!importedTensorHandle)
         {
             throw InvalidArgumentException(
                     fmt::format("ClearImportedOutputs::ImportedOutput with id: {} has already been deleted", id));
         }
         // Call Unimport then destroy the tensorHandle
         importedTensorHandle->Unimport();
         importedTensorHandle = {};
     }
 }

 Status LoadedNetwork::Execute(const InputTensors& inputTensors,
                               const OutputTensors& outputTensors,
                               IWorkingMemHandle& iWorkingMemHandle,
                               std::vector<ImportedInputId> preImportedInputs,
                               std::vector<ImportedOutputId> preImportedOutputs)
 {
     const Graph& graph = m_OptimizedNetwork->pOptimizedNetworkImpl->GetGraph();

     if (inputTensors.size() + preImportedInputs.size() != graph.GetNumInputs())
     {
         if (preImportedInputs.empty())
         {
             throw InvalidArgumentException("LoadedNetwork::Execute: Number of inputs provided does not match network.");
         }
         else
         {
             throw InvalidArgumentException("LoadedNetwork::Execute: "
                                            "Number of inputs + preImportedInputs provided does not match network.");
         }
     }

     if (outputTensors.size() + preImportedOutputs.size() != graph.GetNumOutputs())
     {
         if (preImportedOutputs.empty())
         {
             throw InvalidArgumentException("LoadedNetwork::Execute: "
                                            "Number of outputs provided does not match network.");
         }
         else
         {
             throw InvalidArgumentException("LoadedNetwork::Execute: "
                                            "Number of outputs + preImportedOutputs provided does not match network.");
         }
     }

     WorkingMemHandle& workingMemHandle = dynamic_cast<WorkingMemHandle&>(iWorkingMemHandle);
     // Collect all the given LayerBindingIds and check them for duplicates and unknowns.
     std::vector<LayerBindingId>& bindingIds = workingMemHandle.GetBindingIdVector();
     unsigned int index = 0;
     for (auto pair : inputTensors)
     {
         bindingIds[index++] = pair.first;
     }
     for (ImportedInputId id : preImportedInputs)
     {
         bindingIds[index++] = ValidateImportedInputID(id);
     }
     for (auto pair : outputTensors)
     {
         bindingIds[index++] = pair.first;
     }
     for (ImportedOutputId id : preImportedOutputs)
     {
         bindingIds[index++] = ValidateImportedOutputID(id);
     }

     workingMemHandle.ValidateBindingIds();

     auto resetMemHandle = [&]()
     {
         for (ImportedInputId id: preImportedInputs)
         {
             const LayerBindingId layerBindingId = m_PreImportedInputHandles[id].m_LayerBindingId;

             auto inputHandle = workingMemHandle.GetInputHandle(layerBindingId);
             auto inputConnections = workingMemHandle.GetInputConnections(layerBindingId);
             for (auto it : inputConnections)
             {
                 *it = inputHandle;
             }
         }

         for (ImportedOutputId id: preImportedOutputs)
         {
             const LayerBindingId layerBindingId = m_PreImportedOutputHandles[id].m_LayerBindingId;

             auto outputHandle = workingMemHandle.GetOutputHandle(layerBindingId);
             auto outputConnections = workingMemHandle.GetOutputConnection(layerBindingId);

             for (auto it : outputConnections)
             {
                 *it = outputHandle;
             }
         }
     };

     std::unique_ptr<TimelineUtilityMethods> timelineUtils =
            TimelineUtilityMethods::GetTimelineUtils(*m_ProfilingService);
     ProfilingGuid inferenceGuid = m_ProfilingService->GetNextGuid();
     if (timelineUtils)
     {
         // Add inference timeline trace if profiling is enabled.
        ProfilingGuid networkGuid = m_OptimizedNetwork->GetGuid();
         timelineUtils->CreateTypedEntity(inferenceGuid,LabelsAndEventClasses::INFERENCE_GUID);
         timelineUtils->CreateRelationship(ProfilingRelationshipType::RetentionLink,
                                           networkGuid,
                                           inferenceGuid,
                                          LabelsAndEventClasses::EXECUTION_OF_GUID);
         timelineUtils->RecordEvent(inferenceGuid,LabelsAndEventClasses::ARMNN_PROFILING_SOL_EVENT_CLASS);
     }

     bool executionSucceeded = true;

     if (timelineUtils)
     {
         // Add end of life of the inference timeline if profiling is enabled.
         timelineUtils->RecordEvent(inferenceGuid,LabelsAndEventClasses::ARMNN_PROFILING_EOL_EVENT_CLASS);
         timelineUtils->Commit();
     }

     if (!workingMemHandle.IsAllocated())
     {
         workingMemHandle.Allocate();
     }

     {
         ARMNN_SCOPED_PROFILING_EVENT(Compute::Undefined, "PrepareInputs");
         for (auto pair : inputTensors)
         {
             EnqueueInput(pair.second, workingMemHandle.GetInputHandle(pair.first));
         }

         // Swap in the pre-imported inputs if any
         for (ImportedInputId id : preImportedInputs)
         {
             const ImportedTensorHandlePin& importedInputPin = m_PreImportedInputHandles[id];
             const LayerBindingId layerBindingId = m_PreImportedInputHandles[id].m_LayerBindingId;
             const auto& preimportedHandle = importedInputPin.m_TensorHandle;

             auto inputConnections = workingMemHandle.GetInputConnections(layerBindingId);
             for (auto it : inputConnections)
             {
                 *it = preimportedHandle.get();
             }
         }
     }
     {
         ARMNN_SCOPED_PROFILING_EVENT(Compute::Undefined, "PrepareOutputs");
         if (m_NetworkProperties.m_ExportEnabled)
         {
             for (auto pair: outputTensors)
             {
                 ImportOutputTensor(pair.second, workingMemHandle.GetOutputHandle(pair.first));
             }
         }

         for (ImportedOutputId id : preImportedOutputs)
         {
             const ImportedTensorHandlePin& importedOutputPin = m_PreImportedOutputHandles[id];
             const LayerBindingId layerBindingId = m_PreImportedOutputHandles[id].m_LayerBindingId;
             const auto& preimportedHandle = importedOutputPin.m_TensorHandle;

             auto outputConnections = workingMemHandle.GetOutputConnection(layerBindingId);
             for (auto it : outputConnections)
             {
                 *it = preimportedHandle.get();
             }
         }
     }

     auto Fail = [&](const std::exception& error)
     {
         ARMNN_LOG(error) << "An error occurred attempting to execute a workload: " << error.what();
         executionSucceeded = false;
     };
     ProfilingDynamicGuid workloadInferenceID(0);

     try
     {
         for (unsigned int i = 0; i < m_WorkloadQueue.size(); ++i)
         {
             auto& workload = m_WorkloadQueue[i];
             if (timelineUtils)
             {
                 workloadInferenceID = timelineUtils->RecordWorkloadInferenceAndStartOfLifeEvent(workload->GetGuid(),
                                                                                                 inferenceGuid);
             }

             workload->ExecuteAsync(workingMemHandle.GetExecutionDataAt(i).second);

             if (timelineUtils)
             {
                 timelineUtils->RecordEndOfLifeEvent(workloadInferenceID);
             }
         }
     }
     catch (const RuntimeException& error)
     {
         resetMemHandle();
         Fail(error);
     }
     catch (const std::runtime_error& error)
     {
         resetMemHandle();
         Fail(error);
     }
     catch (...)
     {
         resetMemHandle();
         throw;
     }

     if (!m_NetworkProperties.m_ExportEnabled)
     {
         for (auto pair: outputTensors)
         {
             CopyToOutputTensor(pair.second, workingMemHandle.GetOutputHandle(pair.first));
         }
     }
     else
     {
        ARMNN_SCOPED_PROFILING_EVENT(Compute::Undefined, "SyncMemGeneric_Execute");
        workingMemHandle.MemSyncOutputs();
     }

     resetMemHandle();

     return executionSucceeded ? Status::Success : Status::Failure;
 }

 /// Create a new unique WorkingMemHandle object. Create multiple handles if you wish to have
 /// overlapped Execution by calling this function from different threads.
 std::unique_ptr<IWorkingMemHandle> LoadedNetwork::CreateWorkingMemHandle(NetworkId networkId)
 {
     Graph& order = m_OptimizedNetwork->pOptimizedNetworkImpl->GetGraph();

     // Tensors that will need to be allocated internally within armnn
     std::vector<std::unique_ptr<ITensorHandle>> managedTensorHandles;
     // Tensors that will be allocated externally by the user
     std::vector<std::unique_ptr<ITensorHandle>> unmanagedTensorHandles;

     std::vector<WorkingMemDescriptor> workingMemDescriptors;
     std::vector<std::pair<BackendId, ExecutionData>> executionDataVec;

     auto GetTensorHandle = [&](Layer* layer, const OutputSlot& outputSlot)
     {
         ITensorHandleFactory::FactoryId factoryId = outputSlot.GetTensorHandleFactoryId();
         const TensorInfo& tensorInfo = outputSlot.GetTensorInfo();

         if (factoryId == ITensorHandleFactory::LegacyFactoryId)
         {
             BackendId id = layer->GetBackendId();
             ARMNN_NO_DEPRECATE_WARN_BEGIN
             return m_WorkloadFactories.at(id)->CreateTensorHandle(tensorInfo, false);
             ARMNN_NO_DEPRECATE_WARN_END
         }
         else
         {
             ITensorHandleFactory* handleFactory = m_TensorHandleFactoryRegistry.GetFactory(factoryId);
             ARMNN_ASSERT(handleFactory);
             return handleFactory->CreateTensorHandle(tensorInfo, false);
         }
     };

     struct HandleInfo
     {
         ITensorHandle* m_TensorHandle;

         bool m_IsInputLayerHandle = false;
         bool m_IsOutputLayerHandle = false;

         WorkingMemHandle::InputMemDescriptorCoords m_InputMemDescriptorCoords;
         WorkingMemHandle::OutputMemDescriptorCoords m_OutputMemDescriptorCoords;
     };

     std::unordered_map<const OutputSlot*, HandleInfo> outputToHandleInfoMap;

     unsigned int layerIndex = 0;
     for (auto&& layer : order)
     {
         // Constant layers execution and management is handled during loaded network construction
         if (layer->GetType() == LayerType::Constant)
         {
             continue;
         }

         WorkingMemDescriptor workingMemDescriptor;

         bool isMemoryManaged = true;
         bool isInputLayer = false;
         bool isOutputLayer = false;
         bool isConnectedToOutputLayer = false;

         if (layer->GetType() == LayerType::Input || layer->GetType() == LayerType::MemImport)
         {
             // Input layers/workloads will not be executed so the descriptor is not added to workingMemDescriptors
             // However we will still need to manage the tensorHandle
             isInputLayer = true;
             isMemoryManaged = !m_NetworkProperties.m_ImportEnabled;
         }
         else if (layer->GetType() == LayerType::Output)
         {
             isOutputLayer = true;
         }

         unsigned int slotIndex = 0;
         // Create a tensor handle for each output slot of a layer
         // Once we create it, we start managing its lifetime
         for (auto& slot : layer->GetOutputSlots())
         {
             for (unsigned int i = 0; i < slot.GetNumConnections(); ++i)
             {
                 if ((slot.GetConnection(i)->GetOwningLayer().GetType() == LayerType::Output))
                 {
                     if (!isConnectedToOutputLayer)
                     {
                         isConnectedToOutputLayer = true;
                         // If Export is enabled disable memory management, so we can export, otherwise we do a copy
                         isMemoryManaged = !m_NetworkProperties.m_ExportEnabled;
                     }
                     else
                     {
                         // Importing in this case would likely cause unexpected behaviour, so we disallow it.
                         ARMNN_LOG(warning) <<
                            fmt::format("Layer name: '{0}' guid: '{1}' has two or more OutputLayers connected to it. "
                                        "This will prevent importing on the connected OutputLayers.",
                                         layer->GetName(), layer->GetGuid());
                         isMemoryManaged = true;
                     }
                 }
             }

             ITensorHandle* tensorHandle;
             if (isMemoryManaged)
             {
                 managedTensorHandles.emplace_back(GetTensorHandle(layer, slot));
                 tensorHandle = managedTensorHandles.back().get();
             }
             else
             {
                 unmanagedTensorHandles.emplace_back(GetTensorHandle(layer, slot));
                 tensorHandle = unmanagedTensorHandles.back().get();
             }

             workingMemDescriptor.m_Outputs.push_back(tensorHandle);

             HandleInfo& handleInfo = outputToHandleInfoMap[&slot];
             handleInfo.m_TensorHandle = tensorHandle;

             // Store the coordinates of the current layer's OutputSlot that is connected to the OutputLayer
             if (isConnectedToOutputLayer)
             {
                 handleInfo.m_IsOutputLayerHandle = true;
                 handleInfo.m_OutputMemDescriptorCoords.m_OutputSlotCoords = {layerIndex, slotIndex};
             }
             // Store the LayerBindingId of the InputLayer
             if (isInputLayer)
             {
                 handleInfo.m_IsInputLayerHandle = true;
                 LayerBindingId bindingId = static_cast<BindableLayer*>(layer)->GetBindingId();
                 handleInfo.m_InputMemDescriptorCoords.m_LayerBindingId = bindingId;
             }
             slotIndex++;
         }
         // Loop through the input slots in the same layer and decrement the reference counter associated
         // to each tensor handle we encounter.
         // Once it reaches zero, the lifetime of the tensor handle has ended, and we mark its memory as available
         // so that the next tensor handle with a non overlapping lifetime can share its memory.
         for (auto& slot : layer->GetInputSlots())
         {
             ARMNN_ASSERT(slot.GetConnection());
             auto outputSlot = slot.GetConnectedOutputSlot();
             auto key = outputSlot->GetOwningLayer().GetGuid();

             // Constant layers execution and management is handled during loaded network construction
             auto found = m_ConstantTensorHandles.find(key);
             if (found != m_ConstantTensorHandles.end())
             {
                 ITensorHandle* tensorHandle = found->second;
                 workingMemDescriptor.m_Inputs.push_back(tensorHandle);

                 // Odd case where a constant layer is connected to an output layer
                 // We will need to create a HandleInfo to track it
                 if (isOutputLayer)
                 {
                     LayerBindingId bindingId = static_cast<BindableLayer*>(layer)->GetBindingId();

                     HandleInfo& handleInfo = outputToHandleInfoMap[outputSlot];
                     handleInfo.m_TensorHandle = tensorHandle;
                     handleInfo.m_IsOutputLayerHandle = true;
                     handleInfo.m_OutputMemDescriptorCoords.m_LayerBindingIds.push_back(bindingId);
                     handleInfo.m_OutputMemDescriptorCoords.m_InputSlotCoords.push_back({layerIndex, 0});
                 }
                 continue;
             }

             HandleInfo& handleInfo = outputToHandleInfoMap.at(outputSlot);

             ITensorHandle* inputTensorHandle = handleInfo.m_TensorHandle;
             workingMemDescriptor.m_Inputs.push_back(inputTensorHandle);

             // Store the LayerBindingId of the OutputLayer
             if (isOutputLayer)
             {
                 LayerBindingId bindingId = static_cast<BindableLayer*>(layer)->GetBindingId();
                 handleInfo.m_OutputMemDescriptorCoords.m_LayerBindingIds.push_back(bindingId);
                 handleInfo.m_OutputMemDescriptorCoords.m_InputSlotCoords.push_back({layerIndex, 0});
             }
             // In this case the layer is not an Output Layer but shares its input tensorhandle with an OutputLayer
             // It will need to be updated as well, if we swap out the tensorhandle
             else if (handleInfo.m_IsOutputLayerHandle)
             {
                 handleInfo.m_OutputMemDescriptorCoords.m_InputSlotCoords.push_back({layerIndex, slot.GetSlotIndex()});
             }

             // Store the coordinates of the InputSlots connected to the InputLayer
             // There can be more than one InputSlot connected to an InputLayer, so we use a vector
             if (handleInfo.m_IsInputLayerHandle)
             {
                 std::pair<LayerGuid, unsigned int> connectionLocation{layerIndex, slot.GetSlotIndex()};
                 handleInfo.m_InputMemDescriptorCoords.m_InputSlotCoords.emplace_back(connectionLocation);
             }
         }

         // Input/Output layers/workloads will not be executed, so the descriptor is not added to workingMemDescriptors
         // However we will still need to manage the tensorHandle
         if (!isInputLayer)
         {
             // Simply auto initialise ExecutionData here, so it's added only for the layer that require execution.
             // The memory and data will be allocated/assigned for the void* in WorkingMemHandle::Allocate.
             std::pair<BackendId, ExecutionData> dataPair;
             dataPair.first = layer->GetBackendId();

             executionDataVec.push_back(dataPair);
             workingMemDescriptors.push_back(workingMemDescriptor);

             layerIndex++;
         }
     }

     std::vector<std::pair<std::shared_ptr<TensorMemory>, MemorySource>> tensorMemory;

     auto externalMemoryManager = CreateExternalMemoryManger(tensorMemory);

     // Sort m_TensorMemory, so it's order matches the outputSlot order
     std::sort(tensorMemory.begin(), tensorMemory.end(),
               [](const std::pair<std::shared_ptr<TensorMemory>, MemorySource>& lhs,
                  const std::pair<std::shared_ptr<TensorMemory>, MemorySource>& rhs)
               {
                   return lhs.first->m_OutputSlotId < rhs.first->m_OutputSlotId;
               });

     std::vector<WorkingMemHandle::InputMemDescriptorCoords> inputConnectionsInfo;
     std::vector<WorkingMemHandle::OutputMemDescriptorCoords> outputConnectionsInfo;

     for (const auto& handleInfo: outputToHandleInfoMap)
     {
         if (handleInfo.second.m_IsOutputLayerHandle)
         {
             outputConnectionsInfo.emplace_back(handleInfo.second.m_OutputMemDescriptorCoords);
         }

         if (handleInfo.second.m_IsInputLayerHandle)
         {
             inputConnectionsInfo.emplace_back(handleInfo.second.m_InputMemDescriptorCoords);
         }
     }

     return std::make_unique<WorkingMemHandle>(networkId,
                                               inputConnectionsInfo,
                                               outputConnectionsInfo,
                                               workingMemDescriptors,
                                               std::move(externalMemoryManager),
                                               std::move(tensorMemory),
                                               std::move(managedTensorHandles),
                                               std::move(unmanagedTensorHandles),
                                               executionDataVec,
                                               &m_Backends);
 }

 void LoadedNetwork::RegisterDebugCallback(const DebugCallbackFunction& func)
 {
     for (auto&& workloadPtr: m_WorkloadQueue)
     {
         workloadPtr.get()->RegisterDebugCallback(func);
     }
 }


 void LoadedNetwork::CreateMemoryProfileAsync()
 {
     struct PartialBlock
     {
         unsigned int m_StartOfLife;
         unsigned int m_Lifetime;

         size_t m_MemSize;
         unsigned int m_Index;

         BackendId m_BackendId;
     };

     auto align = [](size_t numToAlign)
     {
         const size_t alignment = sizeof(float);
         return ((numToAlign + alignment - 1) / alignment) * alignment;
     };

     std::unordered_map<const OutputSlot*, PartialBlock> memBlockTrackerMap;

     const bool inputImportingEnabled = m_NetworkProperties.m_InputSource != MemorySource::Undefined;
     const bool outputImportingEnabled = m_NetworkProperties.m_OutputSource != MemorySource::Undefined;

     unsigned int timestep = 0;
     unsigned int outputIndex = 0;
     Graph& order = m_OptimizedNetwork->pOptimizedNetworkImpl->GetGraph().TopologicalSort();

     for (auto&& layer : order)
     {
         const LayerType& layerType = layer->GetType();
         // Don't manage memory if importing.
         if (layerType == LayerType::Input && inputImportingEnabled)
         {
             continue;
         }
         // Don't manage memory if importing.
         if (layerType == LayerType::Output && outputImportingEnabled
             && layer->GetInputSlot(0).GetConnectedOutputSlot()->GetNumConnections() == 1)
         {
             continue;
         }
         // Because Constant Layer memory can not be shared, the memory must persist for the lifetime of execution,
         // management is done separately.
         if (layerType == LayerType::Constant)
         {
             continue;
         }

         BackendId backendId = layer->GetBackendId();
         for (auto& outputSlot : layer->GetOutputSlots())
         {
             if (!m_SupportsExternallyManagedMemory[backendId])
             {
                 continue;
             }

             PartialBlock partialBlock;

             partialBlock.m_StartOfLife = timestep;

             size_t alignedSize = align(outputSlot.GetOutputHandler().GetTensorInfo().GetNumBytes());
             partialBlock.m_MemSize = alignedSize;
             partialBlock.m_Index = outputIndex++;
             partialBlock.m_Lifetime = outputSlot.GetNumConnections();
             partialBlock.m_BackendId = backendId;

             if (partialBlock.m_Lifetime == 0)
             {
                 m_MemBlockMap[partialBlock.m_BackendId].emplace_back(partialBlock.m_StartOfLife,
                                                                      partialBlock.m_StartOfLife,
                                                                      partialBlock.m_MemSize,
                                                                      0,
                                                                      partialBlock.m_Index);
             }
             else
             {
                 memBlockTrackerMap[&outputSlot] = partialBlock;
             }
         }

         for (auto& inputSlot : layer->GetInputSlots())
         {
             const Layer& connectedInputLayer = inputSlot.GetConnectedOutputSlot()->GetOwningLayer();
             const LayerType& owningLayerType = connectedInputLayer.GetType();

             if (owningLayerType == LayerType::Constant)
             {
                 continue;
             }
             if (inputImportingEnabled && owningLayerType == LayerType::Input)
             {
                 continue;
             }

             auto outputSlot = inputSlot.GetConnectedOutputSlot();

             PartialBlock& partialBlock = memBlockTrackerMap.at(outputSlot);

             auto& lifetime = partialBlock.m_Lifetime;
             --lifetime;

             if (lifetime == 0)
             {
                 m_MemBlockMap[partialBlock.m_BackendId].emplace_back(partialBlock.m_StartOfLife,
                                                                      timestep,
                                                                      partialBlock.m_MemSize,
                                                                      0,
                                                                      partialBlock.m_Index);
             }
         }
         ++timestep;
     }
 }

 void LoadedNetwork::CreateMemoryProfile()
 {
     // Finds the first TensorHandle ancestor of a SubTensorHandle. If the ITensorHandle provided
     // is a TensorHandle, the function just returns it
     auto TraceSubTensorHandleAncestry = [](ITensorHandle* const subTensorHandle)
     {
         ITensorHandle* ancestor = subTensorHandle;
         while (ancestor && ancestor->GetParent())
         {
             ancestor = ancestor->GetParent();
         }
         return ancestor;
     };

     struct PartialBlock
     {
         unsigned int m_StartOfLife;
         unsigned int m_Lifetime;

         size_t m_MemSize;
         unsigned int m_Index;

         BackendId m_BackendId;
     };

     auto align = [](size_t numToAlign)
     {
         const size_t alignment = sizeof(float);
         return ((numToAlign + alignment - 1) / alignment) * alignment;
     };

     std::unordered_map<ITensorHandle*, PartialBlock> memBlockTrackerMap;

     const bool inputImportingEnabled = m_NetworkProperties.m_InputSource != MemorySource::Undefined;
     const bool outputImportingEnabled = m_NetworkProperties.m_OutputSource != MemorySource::Undefined;

     unsigned int timestep = 0;
     unsigned int outputIndex = 0;
     Graph& order = m_OptimizedNetwork->pOptimizedNetworkImpl->GetGraph().TopologicalSort();

     for (auto&& layer : order)
     {
         const LayerType& layerType = layer->GetType();
         // Don't manage memory if importing.
         if (layerType == LayerType::Input && inputImportingEnabled)
         {
             continue;
         }
         // Don't manage memory if importing.
         if (layerType == LayerType::Output && outputImportingEnabled
             && layer->GetInputSlot(0).GetConnectedOutputSlot()->GetNumConnections() == 1)
         {
             continue;
         }
         // Because Constant Layer memory can not be shared, the memory must persist for the lifetime of execution,
         // management is done separately.
         if (layerType == LayerType::Constant)
         {
             continue;
         }

         BackendId backendId = layer->GetBackendId();
         for (auto& outputSlot : layer->GetOutputSlots())
         {
             if (!m_SupportsExternallyManagedMemory[backendId])
             {
                 continue;
             }

             ITensorHandle* tensorHandle = outputSlot.GetOutputHandler().GetData();
             tensorHandle = TraceSubTensorHandleAncestry(tensorHandle);

             if (memBlockTrackerMap.find(tensorHandle) == memBlockTrackerMap.end())
             {
                 PartialBlock partialBlock;

                 partialBlock.m_StartOfLife = timestep;

                 size_t alignedSize = align(outputSlot.GetOutputHandler().GetTensorInfo().GetNumBytes());
                 partialBlock.m_MemSize = alignedSize;
                 partialBlock.m_Index = outputIndex++;
                 partialBlock.m_Lifetime = outputSlot.GetNumConnections();
                 partialBlock.m_BackendId = backendId;

                 if (partialBlock.m_Lifetime == 0)
                 {
                     m_MemBlockMap[partialBlock.m_BackendId].emplace_back(partialBlock.m_StartOfLife,
                                                                          partialBlock.m_StartOfLife,
                                                                          partialBlock.m_MemSize,
                                                                          0,
                                                                          partialBlock.m_Index);
                 }
                 else
                 {
                     memBlockTrackerMap[tensorHandle] = partialBlock;
                 }
                 m_Tensorhandles.push_back(tensorHandle);

             }
             else
             {
                 memBlockTrackerMap.at(tensorHandle).m_Lifetime += outputSlot.GetNumConnections();
             }
         }

         for (auto& inputSlot : layer->GetInputSlots())
         {
             const Layer& connectedInputLayer = inputSlot.GetConnectedOutputSlot()->GetOwningLayer();
             const LayerType& owningLayerType = connectedInputLayer.GetType();

             if (owningLayerType == LayerType::Constant)
             {
                 continue;
             }
             if (inputImportingEnabled && owningLayerType == LayerType::Input)
             {
                 continue;
             }
             if (!m_SupportsExternallyManagedMemory[connectedInputLayer.GetBackendId()])
             {
                 continue;
             }

             auto outputSlot = inputSlot.GetConnectedOutputSlot();

             ITensorHandle* tensorHandle = outputSlot->GetOutputHandler().GetData();
             tensorHandle = TraceSubTensorHandleAncestry(tensorHandle);

             PartialBlock& partialBlock = memBlockTrackerMap.at(tensorHandle);

             auto& lifetime = partialBlock.m_Lifetime;
             --lifetime;

             if (lifetime == 0)
             {
                 m_MemBlockMap[partialBlock.m_BackendId].emplace_back(partialBlock.m_StartOfLife,
                                                                      timestep,
                                                                      partialBlock.m_MemSize,
                                                                      0,
                                                                      partialBlock.m_Index);
             }
         }
         ++timestep;
     }

 }

 std::unique_ptr<MemoryManager> LoadedNetwork::CreateExternalMemoryManger(
         std::vector<std::pair<std::shared_ptr<TensorMemory>, MemorySource>>& tensorMemoryVec)
 {
     std::unique_ptr<MemoryManager> memoryManager = std::make_unique<MemoryManager>();
     auto allocatorMap = BackendRegistryInstance().GetAllocators();

     for (auto& backend : m_MemBinMap)
     {
         std::vector<BufferStorage> bufferStorageVec;

         std::shared_ptr<ICustomAllocator> backendAllocator;
         if (allocatorMap.find(backend.first) != allocatorMap.end())
         {
             backendAllocator = allocatorMap[backend.first];
         }
         else
         {
             backendAllocator = m_Backends[backend.first]->GetDefaultAllocator();
         }

         for (auto& memBin : backend.second)
         {
             BufferStorage bufferStorage;
             bufferStorage.m_BufferSize = memBin.m_MemSize;
             bufferStorage.m_TensorMemoryVector.reserve(memBin.m_MemBlocks.size());

             for (auto& memBlock : memBin.m_MemBlocks)
             {
                 auto tensorMemory = std::make_shared<TensorMemory>(TensorMemory{memBlock.m_Offset, memBlock.m_Index});

                 tensorMemoryVec.emplace_back(tensorMemory, backendAllocator->GetMemorySourceType());
                 bufferStorage.m_TensorMemoryVector.emplace_back(tensorMemory);
             }

             bufferStorageVec.emplace_back(std::move(bufferStorage));
         }

         memoryManager->StoreMemToAllocate(bufferStorageVec, backendAllocator, 4);
     }

     return memoryManager;
 }

 LayerBindingId LoadedNetwork::ValidateImportedInputID(ImportedInputId id)
 {
     try
     {
         const auto& importedTensorHandlePin = m_PreImportedInputHandles.at(id);
         if (!importedTensorHandlePin.m_TensorHandle)
         {
             throw InvalidArgumentException(fmt::format("LoadedNetwork::Execute:"
                                                        "PreImportedInput: {} has been deleted", id));
         }
         return importedTensorHandlePin.m_LayerBindingId;
     }
     catch (const std::out_of_range&)
     {
         throw InvalidArgumentException(fmt::format("LoadedNetwork::Execute: Unknown ImportedInputId: {}", id));
     }
 }

 LayerBindingId LoadedNetwork::ValidateImportedOutputID(ImportedOutputId id)
 {
     try
     {
         const auto& importedTensorHandlePin = m_PreImportedOutputHandles.at(id);
         if (!importedTensorHandlePin.m_TensorHandle)
         {
             throw InvalidArgumentException(fmt::format("LoadedNetwork::Execute: "
                                                        "PreImportedOutput: {} has been deleted", id));
         }
         return importedTensorHandlePin.m_LayerBindingId;
     }
     catch (const std::out_of_range&)
     {
         throw InvalidArgumentException(fmt::format("LoadedNetwork::Execute: Unknown ImportedOutputId: {}", id));
     }
 }

 }
armnn::LoadedNetwork::Execute
Status Execute(const InputTensors &inputTensors, const OutputTensors &outputTensors, IWorkingMemHandle &workingMemHandle, std::vector< ImportedInputId > preImportedInputs={}, std::vector< ImportedOutputId > preImportedOutputs={})
Thread safe execution of the loaded network.
Definition: LoadedNetwork.cpp:1732

armnn::BufferStorage::m_TensorMemoryVector
std::vector< std::shared_ptr< TensorMemory > > m_TensorMemoryVector
Vector of pointer to .
Definition: MemoryManager.hpp:32

ArmNNProfiling.hpp

armnn::LoadedNetwork::CreateWorkingMemHandle
std::unique_ptr< IWorkingMemHandle > CreateWorkingMemHandle(NetworkId networkId)
Create a new unique WorkingMemHandle object.
Definition: LoadedNetwork.cpp:1954

armnn::HasCapability
bool HasCapability(const std::string &name, const BackendCapabilities &capabilities)
Convenience function to check if a capability exists in a BackendCapabilites struct.
Definition: BackendHelper.cpp:58

armnn::INetworkProperties::m_InputSource
const MemorySource m_InputSource
Definition: IRuntime.hpp:64

armnn::ITensorHandle::Import
virtual bool Import(void *memory, MemorySource source)
Import externally allocated memory.
Definition: ITensorHandle.hpp:75

armnn::BackendRegistry::GetFactory
FactoryFunction GetFactory(const BackendId &id) const
Definition: BackendRegistry.cpp:57

armnn::Graph::InputLayersAccessor::begin
ConstIteratorInputs begin() const
Definition: Graph.hpp:65

armnn::IBackendInternal::IWorkloadFactoryPtr
std::unique_ptr< IWorkloadFactory > IWorkloadFactoryPtr
Definition: IBackendInternal.hpp:89

armnn::Layer::GetNumInputSlots
unsigned int GetNumInputSlots() const override
Returns the number of connectable input slots.
Definition: Layer.hpp:321

armnn::BackendOptions::BackendOption::GetValue
Var GetValue() const
Definition: BackendOptions.hpp:252

armnn::IBackendInternal
Definition: IBackendInternal.hpp:77

armnn::ProfilerManager::GetInstance
static ProfilerManager & GetInstance()
Definition: Profiling.cpp:572

IBackendInternal.hpp

ARMNN_NO_DEPRECATE_WARN_BEGIN
#define ARMNN_NO_DEPRECATE_WARN_BEGIN
Definition: Deprecated.hpp:33

armnn::IBackendInternal::CreateMemoryManager
virtual IMemoryManagerUniquePtr CreateMemoryManager() const
Definition: IBackendInternal.cpp:12

armnn::TensorInfo
Definition: Tensor.hpp:152

armnn::IWorkloadFactory
Definition: WorkloadFactory.hpp:22

armnn::BindableLayer::GetBindingId
LayerBindingId GetBindingId() const
Definition: Layer.hpp:465

armnn::ITensorHandle::GetImportFlags
virtual unsigned int GetImportFlags() const
Get flags describing supported import sources.
Definition: ITensorHandle.hpp:69

armnn::INetworkProperties::m_AsyncEnabled
const bool m_AsyncEnabled
Definition: IRuntime.hpp:58

armnn::GetOutputTensor
const armnn::Tensor GetOutputTensor(const LayerBindingId layerId, const OutputTensors &outputTensors)
Definition: LoadedNetwork.cpp:1410

armnn::BackendRegistry::GetMemoryOptimizerStrategies
MemoryOptimizerStrategiesMapRef GetMemoryOptimizerStrategies()
Definition: BackendRegistry.cpp:150

armnn::ImportedOutputId
unsigned int ImportedOutputId
Definition: Types.hpp:292

armnn::BindableLayer
Definition: Layer.hpp:452

armnn::experimental::WorkingMemHandle::GetExecutionDataAt
std::pair< BackendId, ExecutionData > & GetExecutionDataAt(unsigned int id) override
Get the ExecutionData at an index.
Definition: WorkingMemHandle.hpp:92

armnn::LayerType::Output

armnn::BackendCapabilityException
Definition: Exceptions.hpp:152

armnn::TensorMemory::m_Offset
size_t m_Offset
Number of bytes the value is away from the .m_Buffer.
Definition: MemoryManager.hpp:22

armnn::ITensorHandle::Allocate
virtual void Allocate()=0
Indicate to the memory manager that this resource is no longer active.

arm::pipe
Definition: BackendRegistry.hpp:20

armnn::TensorMemory
Definition: MemoryManager.hpp:19

armnn::Exception::what
virtual const char * what() const noexcept override
Definition: Exceptions.cpp:32

armnn::LoadedNetwork::GetInputTensorInfo
TensorInfo GetInputTensorInfo(LayerBindingId layerId) const
Definition: LoadedNetwork.cpp:697

ARMNN_LOG
#define ARMNN_LOG(severity)
Definition: Logging.hpp:205

armnn::BufferStorage::m_BufferSize
size_t m_BufferSize
Total size of the buffer.
Definition: MemoryManager.hpp:34

armnn::experimental::WorkingMemHandle::GetOutputHandle
ITensorHandle * GetOutputHandle(LayerBindingId layerBindingId) const
Definition: WorkingMemHandle.hpp:102

armnn::BackendRegistryInstance
BackendRegistry & BackendRegistryInstance()
Definition: BackendRegistry.cpp:15

armnn::InputTensors
std::vector< std::pair< LayerBindingId, class ConstTensor > > InputTensors
Definition: Tensor.hpp:392

armnn::InputSlot
Definition: Layer.hpp:42

armnn::INetworkProperties::m_OutputNetworkDetailsMethod
const ProfilingDetailsMethod m_OutputNetworkDetailsMethod
Definition: IRuntime.hpp:62

armnn::MemorySourceFlags
unsigned int MemorySourceFlags
Definition: MemorySources.hpp:15

armnn::BaseTensor::GetMemoryArea
MemoryType GetMemoryArea() const
Definition: Tensor.hpp:305

armnn::Graph::GetNumOutputs
size_t GetNumOutputs() const
Definition: Graph.hpp:188

armnn::CopyToOutputTensor
void CopyToOutputTensor(const Tensor &outputTensor, ITensorHandle *outputTensorHandle)
Definition: LoadedNetwork.cpp:1382

armnn::LoadedNetwork::GetOutputTensorInfo
TensorInfo GetOutputTensorInfo(LayerBindingId layerId) const
Definition: LoadedNetwork.cpp:711

BackendHelper.hpp

armnn
Copyright (c) 2021 ARM Limited and Contributors.
Definition: 01_00_quick_start.dox:6

armnn::IgnoreUnused
void IgnoreUnused(Ts &&...)
Definition: IgnoreUnused.hpp:14

armnn::Layer::GetInputSlots
const std::vector< InputSlot > & GetInputSlots() const
Definition: Layer.hpp:245

armnn::LoadedNetwork::ImportInputs
std::vector< ImportedInputId > ImportInputs(const InputTensors &inputTensors, MemorySource forceImportMemorySource=MemorySource::Undefined)
Definition: LoadedNetwork.cpp:1423

LoadedNetwork.hpp

armnn::DebugCallbackFunction
std::function< void(LayerGuid guid, unsigned int slotIndex, ITensorHandle *tensorHandle)> DebugCallbackFunction
Define the type of callback for the Debug layer to call.
Definition: Types.hpp:379

armnn::Layer::GetNumOutputSlots
unsigned int GetNumOutputSlots() const override
Returns the number of connectable output slots.
Definition: Layer.hpp:322

armnn::BoostLogSeverityMapping::error

BackendRegistry.hpp

armnn::LayerBindingId
int LayerBindingId
Type of identifiers for bindable layers (inputs, outputs).
Definition: Types.hpp:290

ARMNN_SCOPED_PROFILING_EVENT
#define ARMNN_SCOPED_PROFILING_EVENT(backendId, name)
Definition: Profiling.hpp:220

armnn::INetworkProperties
Definition: IRuntime.hpp:35

armnn::IBackend::GetId
virtual const BackendId & GetId() const =0

armnn::Graph::OutputLayersAccessor::begin
ConstIteratorOutputs begin() const
Definition: Graph.hpp:84

armnn::Tensor
A tensor defined by a TensorInfo (shape and data type) and a mutable backing store.
Definition: Tensor.hpp:319

armnn::experimental::WorkingMemDescriptor
Definition: WorkingMemDescriptor.hpp:18

armnn::IBackendInternal::CreateWorkloadFactory
virtual IWorkloadFactoryPtr CreateWorkloadFactory(const IMemoryManagerSharedPtr &memoryManager=nullptr) const =0

ExecutionData.hpp

WorkingMemHandle.hpp

armnn::experimental::WorkingMemDescriptor::m_Inputs
std::vector< ITensorHandle * > m_Inputs
Definition: WorkingMemDescriptor.hpp:20

armnn::OutputSlot::GetNumConnections
unsigned int GetNumConnections() const override
Definition: Layer.hpp:145

std
Definition: BackendId.hpp:149

armnn::Layer::GetInputSlot
const InputSlot & GetInputSlot(unsigned int index) const override
Get a const input slot handle by slot index.
Definition: Layer.hpp:324

armnn::WorkloadInfo::m_InputTensorInfos
std::vector< TensorInfo > m_InputTensorInfos
Definition: WorkloadInfo.hpp:18

armnn::experimental::WorkingMemHandle::MemSyncOutputs
void MemSyncOutputs()
Definition: WorkingMemHandle.cpp:136

armnn::experimental::WorkingMemHandle::GetOutputConnection
const std::vector< std::vector< ITensorHandle * >::iterator > & GetOutputConnection(LayerBindingId layerBindingId) const
Definition: WorkingMemHandle.hpp:112

armnn::experimental::WorkingMemHandle::ValidateBindingIds
void ValidateBindingIds()
Definition: WorkingMemHandle.cpp:145

armnn::Status::Success

ARMNN_NO_DEPRECATE_WARN_END
#define ARMNN_NO_DEPRECATE_WARN_END
Definition: Deprecated.hpp:34

ARMNN_ASSERT_MSG
#define ARMNN_ASSERT_MSG(COND, MSG)
Definition: Assert.hpp:15

armnn::IBackendInternal::SupportsTensorAllocatorAPI
bool SupportsTensorAllocatorAPI() const
Definition: IBackendInternal.cpp:119

Graph.hpp

armnn::ITensorHandle
Definition: ITensorHandle.hpp:15

armnn::BoostLogSeverityMapping::warning

ARMNN_SCOPED_HEAP_PROFILING
#define ARMNN_SCOPED_HEAP_PROFILING(TAG)
Definition: HeapProfiling.hpp:45

armnn::Compute::Undefined

armnn::NetworkId
int NetworkId
Definition: IRuntime.hpp:27

armnn::ConstTensor
A tensor defined by a TensorInfo (shape and data type) and an immutable backing store.
Definition: Tensor.hpp:327

armnn::ITensorHandle::GetParent
virtual ITensorHandle * GetParent() const =0
Get the parent tensor if this is a subtensor.

armnn::OutputTensors
std::vector< std::pair< LayerBindingId, class Tensor > > OutputTensors
Definition: Tensor.hpp:393

armnn::Layer::GetNameStr
const std::string & GetNameStr() const
Definition: Layer.hpp:227

armnn::Layer::GetType
LayerType GetType() const override
Returns the armnn::LayerType of this layer.
Definition: Layer.hpp:273

armnn::Status
Status
enumeration
Definition: Types.hpp:42

armnn::OutputSlot
Definition: Layer.hpp:87

armnn::LoadedNetwork::SendNetworkStructure
void SendNetworkStructure(arm::pipe::IProfilingService &profilingService)
Definition: LoadedNetwork.cpp:654

armnn::experimental::IWorkingMemHandle
Definition: IWorkingMemHandle.hpp:20

armnn::experimental::WorkingMemHandle::GetInputConnections
const std::vector< std::vector< ITensorHandle * >::iterator > & GetInputConnections(LayerBindingId layerBindingId) const
Definition: WorkingMemHandle.hpp:107

ARMNN_ASSERT
#define ARMNN_ASSERT(COND)
Definition: Assert.hpp:14

armnn::InputSlot::GetConnectedOutputSlot
const OutputSlot * GetConnectedOutputSlot() const
Definition: Layer.hpp:56

TensorHandle.hpp

armnn::LoadedNetwork::ClearImportedInputs
void ClearImportedInputs(const std::vector< ImportedInputId > inputIds)
Definition: LoadedNetwork.cpp:1690

armnn::OutputHandler
Definition: OutputHandler.hpp:28

armnn::WorkloadInfo::m_OutputTensorInfos
std::vector< TensorInfo > m_OutputTensorInfos
Definition: WorkloadInfo.hpp:19

armnn::Graph::SetLayersOutOfOrder
void SetLayersOutOfOrder()
Definition: Graph.cpp:655

armnn::IWorkloadFactory::IsLayerSupported
static bool IsLayerSupported(const BackendId &backendId, const IConnectableLayer &layer, Optional< DataType > dataType, std::string &outReasonIfUnsupported)
Definition: WorkloadFactory.cpp:1532

armnn::OutputHandler::GetData
ITensorHandle * GetData() const
Gets the allocated tensor memory.
Definition: OutputHandler.hpp:46

armnn::RuntimeException
Definition: Exceptions.hpp:120

armnn::LoadedNetwork::WorkloadQueue
std::vector< std::unique_ptr< IWorkload > > WorkloadQueue
Definition: LoadedNetwork.hpp:45

armnn::BaseTensor::GetInfo
const TensorInfo & GetInfo() const
Definition: Tensor.hpp:295

armnn::InvalidArgumentException
Definition: Exceptions.hpp:80

CHECK_LOCATION
#define CHECK_LOCATION()
Definition: Exceptions.hpp:203

armnn::Layer::GetBackendId
const BackendId & GetBackendId() const
Definition: Layer.hpp:277

armnn::experimental::ExecutionData
Definition: ExecutionData.hpp:14

armnn::experimental::WorkingMemHandle::Allocate
void Allocate() override
Allocate the backing memory required for execution.
Definition: WorkingMemHandle.cpp:100

armnn::ValidateSourcesMatchOptimizedNetwork
void ValidateSourcesMatchOptimizedNetwork(std::vector< BackendOptions > optimizedOptions, const INetworkProperties &networkProperties)
This function performs a sanity check to ensure that the combination of input and output memory sourc...
Definition: LoadedNetwork.cpp:98

armnn::Graph
Definition: Graph.hpp:30

armnn::LoadedNetwork::GetNetworkGuid
arm::pipe::ProfilingGuid GetNetworkGuid()
Definition: LoadedNetwork.cpp:692

armnn::experimental::WorkingMemHandle::OutputMemDescriptorCoords
Definition: WorkingMemHandle.hpp:40

armnn::Layer::GetOutputSlots
const std::vector< OutputSlot > & GetOutputSlots() const
Definition: Layer.hpp:246

Logging.hpp

armnn::ITensorHandle::CanBeImported
virtual bool CanBeImported(void *memory, MemorySource source)
Implementations must determine if this memory block can be imported.
Definition: ITensorHandle.hpp:85

armnn::LoadedNetwork::FreeWorkingMemory
void FreeWorkingMemory()
Definition: LoadedNetwork.cpp:1229

armnn::Graph::GetOutputLayers
OutputLayersAccessor GetOutputLayers() const
Returns a wrapper object with begin(), end() methods to iterate over the output layers in a range-bas...
Definition: Graph.hpp:196

armnn::MemCopyQueueDescriptor
Definition: WorkloadData.hpp:86

armnn::ImportedInputId
unsigned int ImportedInputId
Definition: Types.hpp:291

armnn::BackendOptions
Struct for the users to pass backend specific options.
Definition: BackendOptions.hpp:22

armnn::LoadedNetwork::EnqueueWorkload
Status EnqueueWorkload(const InputTensors &inputTensors, const OutputTensors &outputTensors, std::vector< ImportedInputId > preImportedInputIds={}, std::vector< ImportedOutputId > preImportedOutputIds={})
Single thread execution of the loaded network.
Definition: LoadedNetwork.cpp:846

Assert.hpp

Layer.hpp

armnn::LayerType::MemImport

armnn::ProfilerManager::RegisterProfiler
void RegisterProfiler(IProfiler *profiler)
Definition: Profiling.cpp:579

armnn::ITensorHandle::Map
virtual const void * Map(bool blocking=true) const =0
Map the tensor data for access.

armnn::LoadedNetwork
Definition: LoadedNetwork.hpp:42

armnn::experimental::WorkingMemHandle::GetBindingIdVector
std::vector< LayerBindingId > & GetBindingIdVector()
Definition: WorkingMemHandle.hpp:119

armnn::BackendRegistry::GetAllocators
std::unordered_map< BackendId, std::shared_ptr< ICustomAllocator > > GetAllocators()
Definition: BackendRegistry.cpp:128

armnn::IBackendInternal::GetCapabilities
virtual BackendCapabilities GetCapabilities() const
Returns a BackendCapability if the backend lists the capability The BackendCapability must then be in...
Definition: IBackendInternal.hpp:172

armnn::ITensorHandle::Unmap
virtual void Unmap() const =0
Unmap the tensor data.

armnn::Status::Failure

armnn::BackendOptions::Var::AsBool
bool AsBool() const
Value getters.
Definition: BackendOptions.hpp:119

armnn::BackendOptions::BackendOption::GetName
std::string GetName() const
Definition: BackendOptions.hpp:251

armnn::experimental::WorkingMemHandle::IsAllocated
bool IsAllocated() override
IsAllocated returns true if the backing memory is currently allocated.
Definition: WorkingMemHandle.hpp:77

armnn::QueueDescriptor::m_Outputs
std::vector< ITensorHandle * > m_Outputs
Definition: WorkloadData.hpp:27

armnn::Exception
Base class for all ArmNN exceptions so that users can filter to just those.
Definition: Exceptions.hpp:46

armnn::BoostLogSeverityMapping::info

armnn::Layer::GetOutputHandler
const OutputHandler & GetOutputHandler(unsigned int i=0) const
Definition: Layer.hpp:232

armnn::LoadedNetwork::ImportOutputs
std::vector< ImportedOutputId > ImportOutputs(const OutputTensors &outputTensors, MemorySource forceImportMemorySource=MemorySource::Undefined)
Definition: LoadedNetwork.cpp:1565

armnn::MemorySource
MemorySource
Define the Memory Source to reduce copies.
Definition: Types.hpp:230

armnn::BackendId::Get
const std::string & Get() const
Definition: BackendId.hpp:138

armnn::MemoryImportException
Definition: Exceptions.hpp:125

armnn::ITensorHandleFactory
Definition: ITensorHandleFactory.hpp:42

armnn::LoadedNetwork::RegisterDebugCallback
void RegisterDebugCallback(const DebugCallbackFunction &func)
Definition: LoadedNetwork.cpp:2202

armnn::Graph::OutputLayersAccessor::end
ConstIteratorOutputs end() const
Definition: Graph.hpp:90

armnn::LayerType::Input

armnn::experimental::WorkingMemDescriptor::m_Outputs
std::vector< ITensorHandle * > m_Outputs
Definition: WorkingMemDescriptor.hpp:21

armnn::ITensorHandleFactory::FactoryId
std::string FactoryId
Definition: ITensorHandleFactory.hpp:45

IMemoryManager.hpp

armnn::BufferStorage
Definition: MemoryManager.hpp:29

armnn::numeric_cast
std::enable_if_t< std::is_unsigned< Source >::value &&std::is_unsigned< Dest >::value, Dest > numeric_cast(Source source)
Definition: NumericCast.hpp:35

MemCopyWorkload.hpp

armnn::INetworkProperties::m_OutputSource
const MemorySource m_OutputSource
Definition: IRuntime.hpp:65

armnn::MemoryExportException
Definition: Exceptions.hpp:130

armnn::BackendOptions::BackendOption
Definition: BackendOptions.hpp:215

armnn::WorkloadInfo
Contains information about TensorInfos of a layer.
Definition: WorkloadInfo.hpp:16

armnn::Layer::GetName
const char * GetName() const override
Returns the name of the layer.
Definition: Layer.hpp:319

MemSyncWorkload.hpp

armnn::OutputSlot::GetTensorHandleFactoryId
ITensorHandleFactory::FactoryId GetTensorHandleFactoryId() const
Definition: Layer.cpp:205

armnn::CheckFlag
bool CheckFlag(MemorySourceFlags flags, MemorySource source)
Definition: MemorySources.hpp:41

armnn::CopyTensorContentsGeneric
void CopyTensorContentsGeneric(const ITensorHandle *srcTensor, ITensorHandle *dstTensor, CopyFunc copy)
Definition: WorkloadUtils.hpp:46

armnn::Graph::TopologicalSort
Graph & TopologicalSort()
Sorts layers in topological order and return this.
Definition: Graph.hpp:184

armnn::Graph::GetInputLayers
InputLayersAccessor GetInputLayers() const
Returns a wrapper object with begin(), end() methods to iterate over the input layers in a range-base...
Definition: Graph.hpp:192

armnn::QueueDescriptor::m_Inputs
std::vector< ITensorHandle * > m_Inputs
Definition: WorkloadData.hpp:26

HeapProfiling.hpp

armnn::Graph::GetNumLayers
size_t GetNumLayers() const
Definition: Graph.hpp:198

armnn::Graph::InputLayersAccessor::end
ConstIteratorInputs end() const
Definition: Graph.hpp:70

armnn::MemSyncQueueDescriptor
Definition: WorkloadData.hpp:99

armnn::GetInputTensor
const armnn::ConstTensor GetInputTensor(const LayerBindingId layerId, const InputTensors &inputTensors)
Definition: LoadedNetwork.cpp:1397

armnn::GetTensorInfo
const TensorInfo & GetTensorInfo(const ITensorHandle *tensorHandle)
float32 helpers
Definition: RefWorkloadUtils.hpp:26

armnn::PaddingMode::Constant

Profiling.hpp

armnn::experimental::WorkingMemHandle::InputMemDescriptorCoords
Definition: WorkingMemHandle.hpp:33

armnn::OutputSlot::GetTensorInfo
const TensorInfo & GetTensorInfo() const override
Definition: Layer.cpp:92

armnn::LoadedNetwork::MakeLoadedNetwork
static std::unique_ptr< LoadedNetwork > MakeLoadedNetwork(std::unique_ptr< IOptimizedNetwork > net, std::string &errorMessage, const INetworkProperties &networkProperties, arm::pipe::IProfilingService *profilingService)
Definition: LoadedNetwork.cpp:170

armnn::experimental::WorkingMemHandle::GetInputHandle
ITensorHandle * GetInputHandle(LayerBindingId layerBindingId) const
Definition: WorkingMemHandle.hpp:97

armnn::Graph::GetNumInputs
size_t GetNumInputs() const
Definition: Graph.hpp:187

armnn::ITensorHandleFactory::CreateTensorHandle
virtual std::unique_ptr< ITensorHandle > CreateTensorHandle(const TensorInfo &tensorInfo) const =0

armnn::ITensorHandleFactory::LegacyFactoryId
static const FactoryId LegacyFactoryId
Definition: ITensorHandleFactory.hpp:46

armnn::experimental::WorkingMemHandle
Definition: WorkingMemHandle.hpp:29

armnn::Layer
Definition: Layer.hpp:217

armnn::INetworkProperties::m_ProfilingEnabled
const bool m_ProfilingEnabled
Definition: IRuntime.hpp:60

armnn::LoadedNetwork::ClearImportedOutputs
void ClearImportedOutputs(const std::vector< ImportedOutputId > outputIds)
Definition: LoadedNetwork.cpp:1711

armnn::LayerType
LayerType
When adding a new layer, adapt also the LastLayer enum value in the enum class LayerType below...
Definition: Types.hpp:468

armnn::BackendId
Definition: BackendId.hpp:75

armnn::Layer::GetGuid
LayerGuid GetGuid() const final
Returns the unique id of the layer.
Definition: Layer.hpp:330