24 #include <fmt/format.h> 35 template <
typename ExceptionType>
36 std::string ToErrorMessage(
const char * prefix,
const ExceptionType &
error)
39 ss << prefix <<
" " << error.what();
43 void AddLayerStructure(std::unique_ptr<TimelineUtilityMethods>& timelineUtils,
45 ProfilingGuid networkGuid)
48 std::string layerName = layer.GetNameStr().empty() ?
"<Unnamed>" : layer.GetNameStr();
49 timelineUtils->CreateNamedTypedChildEntity(layer.GetGuid(),
52 LabelsAndEventClasses::LAYER_GUID);
53 for (
auto&& input : layer.GetInputSlots())
55 const IOutputSlot* source = input.GetConnectedOutputSlot();
57 timelineUtils->CreateConnectionRelationship(ProfilingRelationshipType::RetentionLink,
58 source->GetOwningLayerGuid(),
63 void AddWorkloadStructure(std::unique_ptr<TimelineUtilityMethods>& timelineUtils,
64 std::unique_ptr<IWorkload>& workload,
68 timelineUtils->CreateTypedEntity(workload->GetGuid(), LabelsAndEventClasses::WORKLOAD_GUID);
69 timelineUtils->MarkEntityWithLabel(workload->GetGuid(),
70 layer.GetBackendId().Get(),
71 LabelsAndEventClasses::BACKENDID_GUID);
74 timelineUtils->CreateRelationship(ProfilingRelationshipType::RetentionLink,
77 LabelsAndEventClasses::CHILD_GUID);
83 std::string& errorMessage,
87 std::unique_ptr<LoadedNetwork> loadedNetwork;
89 auto Fail = [&](
const std::exception&
error) -> std::unique_ptr<LoadedNetwork>
91 errorMessage = ToErrorMessage(
"An error occurred when preparing the network workloads: ", error);
94 return std::unique_ptr<LoadedNetwork>();
99 loadedNetwork.reset(
new LoadedNetwork(std::move(net), networkProperties, profilingService));
109 catch (
const std::runtime_error& error)
114 return loadedNetwork;
117 LoadedNetwork::LoadedNetwork(std::unique_ptr<IOptimizedNetwork> net,
120 m_OptimizedNetwork(std::move(net)),
121 m_NetworkProperties(networkProperties),
122 m_TensorHandleFactoryRegistry(),
123 m_ProfilingService(profilingService)
127 const std::shared_ptr<IProfiler>& profiler = m_OptimizedNetwork->GetProfiler();
139 bool useExternalMemoryManager =
false;
140 bool useInternalMemoryManager =
false;
145 m_IsInputImported = std::vector<bool>(order.
GetNumInputs(),
false);
146 m_IsOutputImported = std::vector<bool>(order.
GetNumOutputs(),
false);
149 for (
auto&& layer : order)
151 auto const& backendId = layer->GetBackendId();
152 if (m_Backends.count(backendId) == 0)
155 auto it = m_Backends.emplace(std::make_pair(backendId, createBackend()));
162 std::string er = backend->
GetId();
163 er +=
" does not support AsyncExecution";
171 std::string er = backend->
GetId();
172 er +=
" does not support ExternallyManagedMemory\n";
173 er +=
"AsyncEnabled networks require all backends to support ExternallyManagedMemory";
178 && (m_NetworkProperties.m_ExternalMemoryManagementEnabled || m_NetworkProperties.m_AsyncEnabled))
180 m_SupportsExternallyManagedMemory[backend->
GetId()] =
true;
181 useExternalMemoryManager =
true;
185 m_SupportsExternallyManagedMemory[backend->
GetId()] =
false;
186 useInternalMemoryManager =
true;
193 m_TensorHandleFactoryRegistry,
194 m_OptimizedNetwork->pOptimizedNetworkImpl->GetModelOptions(),
196 static_cast<MemorySourceFlags>(m_NetworkProperties.m_OutputSource));
202 m_BackendMemoryMangers.back(), m_OptimizedNetwork->pOptimizedNetworkImpl->GetModelOptions());
204 m_WorkloadFactories[backendId ] = std::move(workloadFactory);
210 for (
auto&& layer : order)
212 auto& workloadFactory = GetWorkloadFactory(*layer);
213 bool supportsExternalManager = m_SupportsExternallyManagedMemory[layer->GetBackendId()];
215 switch (layer->GetType())
222 layer->CreateTensorHandles(m_TensorHandleFactoryRegistry,
224 !supportsExternalManager && !m_NetworkProperties.m_ImportEnabled);
229 layer->CreateTensorHandles(m_TensorHandleFactoryRegistry, workloadFactory,
true);
236 if ((layer->GetNumOutputSlots() == 1) &&
237 (layer->GetOutputSlots()[0].GetNumConnections() == 1) &&
238 (layer->GetOutputSlots()[0].GetConnection(0)->GetOwningLayer().GetType() ==
LayerType::Output))
240 layer->CreateTensorHandles(m_TensorHandleFactoryRegistry,
242 !supportsExternalManager && !m_NetworkProperties.m_ExportEnabled);
246 layer->CreateTensorHandles(m_TensorHandleFactoryRegistry,
248 !supportsExternalManager);
255 ProfilingGuid networkGuid = m_OptimizedNetwork->GetGuid();
256 std::unique_ptr<TimelineUtilityMethods> timelineUtils =
260 timelineUtils->CreateTypedEntity(networkGuid, LabelsAndEventClasses::NETWORK_GUID);
262 timelineUtils->RecordEvent(networkGuid, LabelsAndEventClasses::ARMNN_PROFILING_SOL_EVENT_CLASS);
265 std::stringstream ss;
267 timelineUtils->MarkEntityWithLabel(networkGuid, ss.str(), LabelsAndEventClasses::PROCESS_ID_GUID);
273 for (
auto&& layer: order)
278 AddLayerStructure(timelineUtils, *layer, networkGuid);
283 switch (layer->GetType())
293 auto workload = layer->CreateWorkload(workloadFactory);
297 const char*
const layerName =
298 layer->GetNameStr().length() != 0 ? layer->GetName() :
"<Unnamed>";
300 fmt::format(
"No workload created for layer (name: '{0}' type: '{1}') (compute '{2}')",
301 layerName, static_cast<int>(layer->GetType()), layer->GetBackendId().Get()
308 AddWorkloadStructure(timelineUtils, workload, *layer);
313 if((networkProperties.
m_AsyncEnabled || useExternalMemoryManager) &&
316 m_ConstantTensorHandles[layer->GetGuid()] =
317 layer->GetOutputSlot(0).GetOutputHandler().GetData();
318 m_ConstantWorkloads[layer->GetGuid()] = std::move(workload);
322 m_WorkloadQueue.push_back(std::move(workload));
326 layer->ReleaseConstantData();
334 if (!networkProperties.
m_AsyncEnabled && m_WorkloadQueue.size() != 0)
342 const auto bindingId = layer->GetBindingId();
344 bool supportsReplacement =
true;
346 for (
const auto inputSlot: layer->GetOutputSlot(0).GetConnections())
348 auto workloadIndex = std::distance(order.begin(), order.GetPosInGraph(inputSlot->GetOwningLayer()));
349 workloadIndex -= noOfInputs;
351 m_InputWorkloadSlotPairs[bindingId].emplace_back(WorkloadIndices{
354 auto workload = m_WorkloadQueue[m_InputWorkloadSlotPairs[bindingId].back().m_WorkloadIndex].get();
355 supportsReplacement &= workload->SupportsTensorHandleReplacement();
361 m_TensorHandleFactoryRegistry.GetMatchingImportFactoryId(factoryId);
363 ITensorHandleFactory *importFactory = m_TensorHandleFactoryRegistry.GetFactory(importFactoryId);
365 if (supportsReplacement && importFactory)
367 m_PreImportedInputHandles.emplace_back(
368 bindingId, importFactory->
CreateTensorHandle(layer->GetOutputSlot(0).GetTensorInfo(),
false));
372 m_PreImportedInputHandles.emplace_back(bindingId,
nullptr);
380 const auto bindingId = layer->GetBindingId();
382 const auto outputSlot = layer->GetInputSlot(0).GetConnectedOutputSlot();
383 auto& indices = m_OutputWorkloadSlotPairs[bindingId];
385 auto workloadIndex = std::distance(order.begin(), order.GetPosInGraph(outputSlot->GetOwningLayer()));
386 workloadIndex -= noOfInputs;
388 indices.m_OutputSlotIndices = WorkloadIndices{
numeric_cast<
unsigned int>(workloadIndex),
389 outputSlot->CalculateIndexOnOwner()};
391 bool supportsReplacement =
true;
392 auto outputWorkload = m_WorkloadQueue[indices.m_OutputSlotIndices.m_WorkloadIndex].get();
393 supportsReplacement &= outputWorkload->SupportsTensorHandleReplacement();
395 for (
auto &inputSlot: outputSlot->GetConnections())
399 auto inWorkloadIndex = std::distance(order.begin(),
400 order.GetPosInGraph(inputSlot->GetOwningLayer()));
401 inWorkloadIndex -= noOfInputs;
402 indices.m_InputSlotIndices.emplace_back(WorkloadIndices{
numeric_cast<
unsigned int>(inWorkloadIndex),
403 inputSlot->GetSlotIndex()});
404 auto inputWorkload = m_WorkloadQueue[indices.m_InputSlotIndices.back().m_WorkloadIndex].get();
405 supportsReplacement &= inputWorkload->SupportsTensorHandleReplacement();
412 m_TensorHandleFactoryRegistry.GetMatchingImportFactoryId(factoryId);
413 ITensorHandleFactory *importFactory = m_TensorHandleFactoryRegistry.GetFactory(importFactoryId);
415 if (supportsReplacement && importFactory)
417 m_PreImportedOutputHandles.emplace_back(
422 m_PreImportedOutputHandles.emplace_back(bindingId,
nullptr);
427 for (
auto&& workloadFactory : m_WorkloadFactories)
429 workloadFactory.second->AfterWorkloadsCreated();
435 timelineUtils->Commit();
438 if (useExternalMemoryManager)
442 CreateMemoryProfileAsync();
446 CreateMemoryProfile();
450 for (
auto& backendMemoryProfile : m_MemBlockMap)
452 const BackendId& backendId = backendMemoryProfile.first;
453 if (backendStrategyMap.find(backendId) != backendStrategyMap.end())
455 m_MemBinMap[backendId] = backendStrategyMap[backendId]->Optimize(backendMemoryProfile.second);
459 m_MemBinMap[backendId] = m_ConstantStrategy->Optimize(backendMemoryProfile.second);
465 m_ExternalMemoryManager = CreateExternalMemoryManger(m_TensorMemory);
468 std::sort(m_TensorMemory.begin(), m_TensorMemory.end(),
469 [](
const std::pair<std::shared_ptr<TensorMemory>,
MemorySource>& lhs,
470 const std::pair<std::shared_ptr<TensorMemory>,
MemorySource>& rhs)
472 return lhs.first->m_OutputSlotId < rhs.first->m_OutputSlotId;
481 if (useInternalMemoryManager)
484 m_OptimizedNetwork->pOptimizedNetworkImpl->GetGraph().AllocateDynamicBuffers();
487 for (
auto &workload : m_WorkloadQueue)
489 workload->PostAllocationConfigure();
493 if (useExternalMemoryManager)
497 AllocateAndExecuteConstantWorkloads();
501 AllocateAndExecuteConstantWorkloadsAsync();
506 void LoadedNetwork::AllocateAndExecuteConstantWorkloads()
509 for (
auto& pair : m_ConstantWorkloads)
511 auto tensorHandle = m_ConstantTensorHandles[pair.first];
512 tensorHandle->Allocate();
513 pair.second->Execute();
519 void LoadedNetwork::AllocateAndExecuteConstantWorkloadsAsync()
522 Graph& order = m_OptimizedNetwork->pOptimizedNetworkImpl->GetGraph();
523 for (
auto&& layer : order)
527 const auto& outSlot = layer->GetOutputSlots()[0];
528 const auto factoryId = outSlot.GetTensorHandleFactoryId();
530 auto& workloadFactory = GetWorkloadFactory(*layer);
532 layer->CreateTensorHandles(m_TensorHandleFactoryRegistry, workloadFactory);
533 ITensorHandle* tensorHandle = outSlot.GetOutputHandler().GetData();
535 m_ConstantTensorHandles[layer->GetGuid()] = tensorHandle;
539 memDesc.
m_Outputs.push_back(tensorHandle);
540 m_ConstantWorkloads[layer->GetGuid()]->ExecuteAsync(memDesc);
549 ProfilingGuid networkGuid = m_OptimizedNetwork->GetGuid();
551 std::unique_ptr<TimelineUtilityMethods> timelineUtils =
554 timelineUtils->CreateTypedEntity(networkGuid, LabelsAndEventClasses::NETWORK_GUID);
556 for (
auto&& layer : order)
559 AddLayerStructure(timelineUtils, *layer, networkGuid);
560 switch (layer->GetType())
570 for (
auto& workload : m_WorkloadQueue)
573 AddWorkloadStructure(timelineUtils, workload, *layer);
580 timelineUtils->Commit();
585 return m_OptimizedNetwork->GetGuid();
590 for (
auto&& inputLayer : m_OptimizedNetwork->pOptimizedNetworkImpl->GetGraph().GetInputLayers())
592 ARMNN_ASSERT_MSG(inputLayer->GetNumOutputSlots() == 1,
"Input layer should have exactly 1 output slot");
593 if (inputLayer->GetBindingId() == layerId)
595 return inputLayer->GetOutputSlot(0).GetTensorInfo();
604 for (
auto&& outputLayer : m_OptimizedNetwork->pOptimizedNetworkImpl->GetGraph().GetOutputLayers())
606 ARMNN_ASSERT_MSG(outputLayer->GetNumInputSlots() == 1,
"Output layer should have exactly 1 input slot");
607 ARMNN_ASSERT_MSG(outputLayer->GetInputSlot(0).GetConnection(),
"Input slot on Output layer must be connected");
608 if (outputLayer->GetBindingId() == layerId)
610 return outputLayer->GetInputSlot(0).GetConnection()->GetTensorInfo();
621 auto it = m_WorkloadFactories.find(layer.
GetBackendId());
622 if (it == m_WorkloadFactories.end())
624 throw RuntimeException(fmt::format(
"No workload factory for {0} to be used for layer: {1}",
630 workloadFactory = it->second.get();
634 std::string reasonIfUnsupported;
638 m_OptimizedNetwork->pOptimizedNetworkImpl->GetModelOptions()),
639 "Factory does not support layer");
641 return *workloadFactory;
651 : m_TensorHandle(std::move(handle))
657 ITensorHandle* GetTensorHandle()
const {
return m_TensorHandle.get(); }
662 std::unique_ptr<ITensorHandle> m_TensorHandle;
668 const std::vector<TensorPin>& pins,
669 char const* bindingPointDesc)
671 auto it = std::find_if(pins.begin(), pins.end(),
672 [id](
const TensorPin& pin)
674 return pin.GetBindingId() == id;
677 if (it != pins.end())
693 m_InputTensorPins.reserve(inputTensors.size());
694 m_OutputTensorPins.reserve(outputTensors.size());
696 for (
auto inputTensorPair : inputTensors)
698 auto inputTensor = inputTensorPair.second;
700 std::unique_ptr<ITensorHandle> tensorHandle =
701 std::make_unique<ConstPassthroughTensorHandle>(inputTensor.GetInfo(),inputTensor.GetMemoryArea());
704 m_InputTensorPins.emplace_back(std::move(tensorHandle), inputTensor.GetInfo(), layerId);
707 for (
auto outputTensorPair : outputTensors)
709 auto outputTensor = outputTensorPair.second;
711 std::unique_ptr<ITensorHandle> tensorHandle =
712 std::make_unique<PassthroughTensorHandle>(outputTensor.GetInfo(), outputTensor.GetMemoryArea());
715 m_OutputTensorPins.emplace_back(std::move(tensorHandle), outputTensor.GetInfo(), layerId);
721 return GetTensorPin(
id, m_InputTensorPins,
"input");
726 return GetTensorPin(
id, m_OutputTensorPins,
"output");
731 std::vector<TensorPin> m_InputTensorPins;
732 std::vector<TensorPin> m_OutputTensorPins;
739 std::vector<ImportedInputId> preImportedInputIds,
740 std::vector<ImportedOutputId> preImportedOutputIds)
742 const Graph& graph = m_OptimizedNetwork->pOptimizedNetworkImpl->GetGraph();
747 ARMNN_LOG(
warning) <<
"IRuntime::EnqueueWorkload()::Less than two nodes in graph";
752 WorkloadData workloadData(inputTensors, outputTensors);
762 m_InputQueue.clear();
770 unsigned int inputIndex = 0;
771 unsigned int importedInputIdIndex = 0;
772 std::sort(preImportedInputIds.begin(), preImportedInputIds.end());
775 if (importedInputIdIndex < preImportedInputIds.size() &&
776 inputIndex == preImportedInputIds[importedInputIdIndex])
779 if (!m_IsInputImported[inputIndex])
781 auto outputTensorHandle = m_PreImportedInputHandles[inputIndex].m_TensorHandle.get();
783 for (
const auto& workloadInfo: m_InputWorkloadSlotPairs[inputLayer->GetBindingId()])
785 auto workload = m_WorkloadQueue[workloadInfo.m_WorkloadIndex].get();
786 workload->ReplaceInputTensorHandle(outputTensorHandle, workloadInfo.m_SlotIndex);
788 m_IsInputImported[inputIndex] =
true;
790 importedInputIdIndex++;
794 if (m_IsInputImported[inputIndex])
798 for (
const auto& workloadInfo: m_InputWorkloadSlotPairs[inputLayer->GetBindingId()])
800 auto workload = m_WorkloadQueue[workloadInfo.m_WorkloadIndex].get();
801 workload->ReplaceInputTensorHandle(handler.
GetData(), workloadInfo.m_SlotIndex);
804 m_IsInputImported[inputIndex] =
false;
808 const TensorPin& pin = workloadData.GetInputTensorPin(inputLayer->GetBindingId());
809 EnqueueInput(*inputLayer, pin.GetTensorHandle(), pin.GetTensorInfo());
817 m_OutputQueue.clear();
825 unsigned int outputIndex = 0;
826 unsigned int importedOutputIdIndex = 0;
827 std::sort(preImportedOutputIds.begin(), preImportedOutputIds.end());
830 if (importedOutputIdIndex < preImportedOutputIds.size() &&
831 outputIndex == preImportedOutputIds[importedOutputIdIndex])
834 ITensorHandle* inputTensorHandle = m_PreImportedOutputHandles[outputIndex].m_TensorHandle.get();
836 if (!m_IsOutputImported[outputIndex])
838 const auto bindingId = outputLayer->GetBindingId();
839 const auto& indices = m_OutputWorkloadSlotPairs[bindingId];
841 auto outputWorkload = m_WorkloadQueue[indices.m_OutputSlotIndices.m_WorkloadIndex].get();
843 outputWorkload->ReplaceOutputTensorHandle(inputTensorHandle,
844 indices.m_OutputSlotIndices.m_SlotIndex);
846 for (
const auto& workloadInfo: indices.m_InputSlotIndices)
848 auto inputWorkload = m_WorkloadQueue[workloadInfo.m_WorkloadIndex].get();
849 inputWorkload->ReplaceInputTensorHandle(inputTensorHandle, workloadInfo.m_SlotIndex);
851 m_IsOutputImported[outputIndex] =
true;
854 ARMNN_ASSERT_MSG(inputTensorHandle !=
nullptr,
"Data should have been allocated.");
856 syncDesc.
m_Inputs.push_back(inputTensorHandle);
859 outputLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo());
860 auto syncWorkload = std::make_unique<SyncMemGenericWorkload>(syncDesc,
info);
862 m_OutputQueue.push_back(move(syncWorkload));
863 importedOutputIdIndex++;
867 if (m_IsOutputImported[outputIndex])
869 const auto bindingId = outputLayer->GetBindingId();
870 const auto& indices = m_OutputWorkloadSlotPairs[bindingId];
872 auto outputWorkload = m_WorkloadQueue[indices.m_OutputSlotIndices.m_WorkloadIndex].get();
874 outputLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetOutputHandler();
876 outputWorkload->ReplaceOutputTensorHandle(
877 outputHandler.
GetData(), indices.m_OutputSlotIndices.m_SlotIndex);
879 for (
const auto& workloadInfo: indices.m_InputSlotIndices)
881 auto inputWorkload = m_WorkloadQueue[workloadInfo.m_WorkloadIndex].get();
882 inputWorkload->ReplaceInputTensorHandle(outputHandler.
GetData(), workloadInfo.m_SlotIndex);
884 m_IsOutputImported[outputIndex] =
false;
887 const TensorPin& pin = workloadData.GetOutputTensorPin(outputLayer->GetBindingId());
889 EnqueueOutput(*outputLayer, pin.GetTensorHandle(), pin.GetTensorInfo());
895 std::unique_ptr<TimelineUtilityMethods> timelineUtils =
897 ProfilingGuid inferenceGuid = m_ProfilingService.GetNextGuid();
901 ProfilingGuid networkGuid = m_OptimizedNetwork->GetGuid();
902 timelineUtils->CreateTypedEntity(inferenceGuid, LabelsAndEventClasses::INFERENCE_GUID);
903 timelineUtils->CreateRelationship(ProfilingRelationshipType::RetentionLink,
906 LabelsAndEventClasses::EXECUTION_OF_GUID);
907 timelineUtils->RecordEvent(inferenceGuid, LabelsAndEventClasses::ARMNN_PROFILING_SOL_EVENT_CLASS);
910 bool executionSucceeded =
true;
913 if (m_ProfilingService.IsProfilingEnabled())
915 m_ProfilingService.IncrementCounterValue(armnn::profiling::INFERENCES_RUN);
919 executionSucceeded = Execute(timelineUtils, inferenceGuid);
925 timelineUtils->RecordEvent(inferenceGuid, LabelsAndEventClasses::ARMNN_PROFILING_EOL_EVENT_CLASS);
926 timelineUtils->Commit();
939 if (tensorHandle ==
nullptr)
947 inputQueueDescriptor.
m_Inputs.push_back(tensorHandle);
952 const TensorInfo& outputTensorInfo = handler.GetTensorInfo();
955 "Data should have been allocated.");
956 inputQueueDescriptor.
m_Outputs.push_back(outputTensorHandle);
960 bool needMemCopy =
true;
961 if (m_NetworkProperties.m_ImportEnabled)
963 if(
CheckFlag(importFlags, m_NetworkProperties.m_InputSource))
967 void* mem = tensorHandle->
Map(
false);
968 if (outputTensorHandle->
Import(mem, m_NetworkProperties.m_InputSource))
970 tensorHandle->
Unmap();
973 tensorHandle->
Unmap();
980 std::unique_ptr<IWorkload> inputWorkload = std::make_unique<CopyMemGenericWorkload>(inputQueueDescriptor,
info);
984 std::unique_ptr<TimelineUtilityMethods> timelineUtils =
989 AddWorkloadStructure(timelineUtils, inputWorkload, layer);
990 timelineUtils->Commit();
993 m_InputQueue.push_back(move(inputWorkload));
1004 if (tensorHandle ==
nullptr)
1012 outputQueueDescriptor.
m_Outputs.push_back(tensorHandle);
1020 const TensorInfo& inputTensorInfo = outputHandler.GetTensorInfo();
1022 ARMNN_ASSERT_MSG(inputTensorHandle !=
nullptr,
"Data should have been allocated.");
1031 bool needMemCopy =
true;
1032 if (m_NetworkProperties.m_ExportEnabled &&
1033 (layer.
GetInputSlots()[0].GetConnectedOutputSlot()->GetNumConnections() == 1))
1038 if (
CheckFlag(importFlags, m_NetworkProperties.m_OutputSource))
1040 needMemCopy =
false;
1041 void *mem = tensorHandle->
Map(
false);
1042 bool importOk = inputTensorHandle->
Import(mem, m_NetworkProperties.m_OutputSource);
1043 tensorHandle->
Unmap();
1049 syncDesc.
m_Inputs.push_back(inputTensorHandle);
1051 auto syncWorkload = std::make_unique<SyncMemGenericWorkload>(syncDesc,
info);
1053 m_OutputQueue.push_back(move(syncWorkload));
1065 outputQueueDescriptor.
m_Inputs.push_back(inputTensorHandle);
1068 std::unique_ptr<IWorkload> outputWorkload =
1069 std::make_unique<CopyMemGenericWorkload>(outputQueueDescriptor,
info);
1072 std::unique_ptr<TimelineUtilityMethods> timelineUtils =
1077 AddWorkloadStructure(timelineUtils, outputWorkload, layer);
1078 timelineUtils->Commit();
1081 m_OutputQueue.push_back(move(outputWorkload));
1085 void LoadedNetwork::AllocateWorkingMemory(std::lock_guard<std::mutex>& lock)
1092 if (m_IsWorkingMemAllocated)
1097 if (m_ExternalMemoryManager)
1099 m_ExternalMemoryManager->Allocate();
1101 for (
unsigned int i = 0; i < m_TensorMemory.size(); ++i)
1103 m_Tensorhandles[i]->Import(m_TensorMemory[i].first->m_Data, m_TensorMemory[i].second);
1107 for (
auto&& memoryManager : m_BackendMemoryMangers)
1111 memoryManager->Acquire();
1114 m_TensorHandleFactoryRegistry.AquireMemory();
1115 m_IsWorkingMemAllocated =
true;
1120 std::lock_guard<std::mutex> lockGuard(m_WorkingMemMutex);
1122 if (!m_IsWorkingMemAllocated)
1127 if (m_ExternalMemoryManager)
1129 m_ExternalMemoryManager->Deallocate();
1133 for (
auto&& memoryManager : m_BackendMemoryMangers)
1137 memoryManager->Release();
1140 m_TensorHandleFactoryRegistry.ReleaseMemory();
1141 m_IsWorkingMemAllocated =
false;
1145 profiling::ProfilingGuid inferenceGuid)
1147 bool success =
true;
1149 auto Fail = [&](
const std::exception&
error)
1151 ARMNN_LOG(error) <<
"An error occurred attempting to execute a workload: " << error.what();
1157 std::lock_guard<std::mutex> lockGuard(m_WorkingMemMutex);
1158 AllocateWorkingMemory(lockGuard);
1160 ProfilingDynamicGuid workloadInferenceID(0);
1161 auto ExecuteQueue = [&timelineUtils, &workloadInferenceID, &inferenceGuid](
WorkloadQueue& queue)
1163 for (
auto& workload : queue)
1167 workloadInferenceID = timelineUtils->RecordWorkloadInferenceAndStartOfLifeEvent(workload->GetGuid(),
1170 workload->Execute();
1173 timelineUtils->RecordEndOfLifeEvent(workloadInferenceID);
1178 ExecuteQueue(m_InputQueue);
1179 ExecuteQueue(m_WorkloadQueue);
1180 ExecuteQueue(m_OutputQueue);
1186 catch (
const std::runtime_error& error)
1196 if (m_NetworkProperties.m_ImportEnabled)
1199 if (
CheckFlag(importFlags, m_NetworkProperties.m_InputSource) )
1201 std::unique_ptr<ITensorHandle> tensorHandle =
1202 std::make_unique<ConstPassthroughTensorHandle>(inputTensor.
GetInfo(),
1204 void* mem = tensorHandle->
Map(
false);
1206 if (inputTensorHandle->
Import(mem, m_NetworkProperties.m_InputSource))
1208 tensorHandle->Unmap();
1211 tensorHandle->Unmap();
1221 std::unique_ptr<ITensorHandle> tensorHandle =
1222 std::make_unique<ConstPassthroughTensorHandle>(inputTensor.
GetInfo(), inputTensor.
GetMemoryArea());
1224 auto copyFunc = [](
void* dst,
const void* src,
size_t size)
1226 memcpy(dst, src, size);
1239 void LoadedNetwork::ImportOutputTensor(
const Tensor& outputTensor,
ITensorHandle* outputTensorHandle)
1241 ARMNN_ASSERT_MSG(outputTensorHandle !=
nullptr,
"Data should have been allocated.");
1243 if (
CheckFlag(importFlags, m_NetworkProperties.m_OutputSource))
1245 std::unique_ptr<ITensorHandle> tensorHandle =
1246 std::make_unique<PassthroughTensorHandle>(outputTensor.
GetInfo(),
1249 void* mem = tensorHandle->
Map(
false);
1250 bool importOk = outputTensorHandle->
Import(mem, m_NetworkProperties.m_OutputSource);
1251 tensorHandle->Unmap();
1260 throw MemoryExportException(
"ImportOutputTensor: Memory Export failed, attempting to export Input Layer");
1267 auto copyFunc = [](
void* dst,
const void* src,
size_t size)
1269 memcpy(dst, src, size);
1272 std::unique_ptr<ITensorHandle> tensorHandle =
1273 std::make_unique<PassthroughTensorHandle>(outputTensor.
GetInfo(),
1282 for (
auto inputTensorPair : inputTensors)
1287 return inputTensorPair.second;
1295 for (
auto outputTensorPair : outputTensors)
1300 return outputTensorPair.second;
1309 if (!m_NetworkProperties.m_AsyncEnabled)
1314 throw MemoryImportException(
"ImportInputs: Memory Import failed, NetworkProperties.m_ImportEnabled");
1316 if (inputTensors.size() != m_OptimizedNetwork->pOptimizedNetworkImpl->GetGraph().GetNumInputs())
1321 std::vector<ImportedInputId> importedInputs;
1323 unsigned int inputIndex = 0;
1326 auto outputTensorHandle = m_PreImportedInputHandles[inputIndex].m_TensorHandle.get();
1328 if (!outputTensorHandle)
1334 auto layerBindingId = inputLayer->GetBindingId();
1335 auto it = std::find_if(inputTensors.begin(), inputTensors.end(), [=](
const auto& inputTensor)
1337 return inputTensor.first == layerBindingId;
1340 if (it == inputTensors.end())
1346 const auto& inputTensor = *it;
1347 std::unique_ptr<ITensorHandle> passThroughTensorHandle =
1348 std::make_unique<ConstPassthroughTensorHandle>(inputTensor.second.
GetInfo(),
1351 if (outputTensorHandle->
CanBeImported(passThroughTensorHandle->Map(), forceImportMemorySource)
1352 && (outputTensorHandle->
Import(passThroughTensorHandle->Map(), forceImportMemorySource)))
1354 importedInputs.push_back(inputIndex);
1356 passThroughTensorHandle->Unmap();
1361 return importedInputs;
1366 std::vector<ImportedInputId> importedInputs;
1369 for (
auto inputTensor : inputTensors)
1371 auto layerBindingId = inputTensor.first;
1380 "ImportInputs: Memory Import failed, unknown LayerBindingId: {}", layerBindingId));
1383 const Layer* layer = *it;
1392 std::string er = backend->GetId();
1393 er +=
" does not have PreImportIOTensors capability";
1405 ImportedTensorHandlePin importedTensorHandlePin{layerBindingId,
1408 ITensorHandle* tensorHandle = importedTensorHandlePin.m_TensorHandle.get();
1413 fmt::format(
"ImportInputs: Memory Import failed, backend: " 1414 "{} does not support importing from source {}" 1415 , factoryId, m_NetworkProperties.m_InputSource));
1418 std::unique_ptr<ITensorHandle> passThroughTensorHandle =
1419 std::make_unique<ConstPassthroughTensorHandle>(inputTensor.second.
GetInfo(),
1422 if (tensorHandle->
Import(passThroughTensorHandle->Map(), m_NetworkProperties.m_InputSource))
1424 importedInputs.push_back(m_CurImportedInputId++);
1425 passThroughTensorHandle->Unmap();
1429 passThroughTensorHandle->Unmap();
1433 m_PreImportedInputHandles.push_back(std::move(importedTensorHandlePin));
1435 return importedInputs;
1442 if (!m_NetworkProperties.m_AsyncEnabled)
1447 throw MemoryImportException(
"ImportOutputs: Memory Import failed, NetworkProperties.m_ImportEnabled");
1450 if (outputTensors.size() != m_OptimizedNetwork->pOptimizedNetworkImpl->GetGraph().GetNumOutputs())
1454 std::vector<ImportedInputId> importedOutputs;
1457 unsigned int outputIndex = 0;
1460 auto inputTensorHandle = m_PreImportedOutputHandles[outputIndex].m_TensorHandle.get();
1462 if (!inputTensorHandle)
1468 auto layerBindingId = outputLayer->GetBindingId();
1469 auto it = std::find_if(outputTensors.begin(), outputTensors.end(), [=] (
const auto& outputTensor)
1471 return outputTensor.first == layerBindingId;
1474 if (it == outputTensors.end())
1480 const auto outputTensor = *it;
1483 && inputTensorHandle->
Import(outputTensor.second.
GetMemoryArea(), forceImportMemorySource))
1485 importedOutputs.push_back(outputIndex);
1489 return importedOutputs;
1492 std::vector<ImportedOutputId> importedOutputs;
1495 for (
const auto& outputTensor : outputTensors)
1497 auto layerBindingId = outputTensor.first;
1505 throw MemoryImportException(fmt::format(
"ImportOutputs: Memory Import failed, unknown LayerBindingId: {}",
1509 const Layer* layer = *it;
1518 std::string er = backend->GetId();
1519 er +=
" does not have PreImportIOTensors capability";
1530 ImportedTensorHandlePin importedTensorHandlePin{layerBindingId,
1533 ITensorHandle* tensorHandle = importedTensorHandlePin.m_TensorHandle.get();
1538 "{} does not support importing from source {}" 1539 , factoryId, m_NetworkProperties.m_OutputSource));
1542 if (tensorHandle->
Import(outputTensor.second.
GetMemoryArea(), m_NetworkProperties.m_OutputSource))
1544 importedOutputs.push_back(m_CurImportedOutputId++);
1551 m_PreImportedOutputHandles.push_back(std::move(importedTensorHandlePin));
1554 return importedOutputs;
1559 for (
auto id : inputIds)
1561 if (
id > m_PreImportedInputHandles.size())
1566 auto& importedTensorHandle = m_PreImportedInputHandles[id].m_TensorHandle;
1567 if (!importedTensorHandle)
1570 fmt::format(
"ClearImportedInputs::ImportedInput with id: {} has already been deleted",
id));
1573 importedTensorHandle->Unimport();
1574 importedTensorHandle = {};
1580 for (
auto id : outputIds)
1582 if (
id > m_PreImportedOutputHandles.size())
1587 auto& importedTensorHandle = m_PreImportedOutputHandles[id].m_TensorHandle;
1588 if (!importedTensorHandle)
1591 fmt::format(
"ClearImportedOutputs::ImportedOutput with id: {} has already been deleted",
id));
1594 importedTensorHandle->Unimport();
1595 importedTensorHandle = {};
1602 std::vector<ImportedInputId> preImportedInputs,
1603 std::vector<ImportedOutputId> preImportedOutputs)
1605 const Graph& graph = m_OptimizedNetwork->pOptimizedNetworkImpl->GetGraph();
1607 if (inputTensors.size() + preImportedInputs.size() != graph.
GetNumInputs())
1609 if (preImportedInputs.empty())
1616 "Number of inputs + preImportedInputs provided does not match network.");
1620 if (outputTensors.size() + preImportedOutputs.size() != graph.
GetNumOutputs())
1622 if (preImportedOutputs.empty())
1625 "Number of outputs provided does not match network.");
1630 "Number of outputs + preImportedOutputs provided does not match network.");
1637 unsigned int index = 0;
1638 for (
auto pair : inputTensors)
1640 bindingIds[index++] = pair.first;
1644 bindingIds[index++] = ValidateImportedInputID(
id);
1646 for (
auto pair : outputTensors)
1648 bindingIds[index++] = pair.first;
1652 bindingIds[index++] = ValidateImportedOutputID(
id);
1657 auto resetMemHandle = [&]()
1661 const LayerBindingId layerBindingId = m_PreImportedInputHandles[id].m_LayerBindingId;
1663 auto inputHandle = workingMemHandle.
GetInputHandle(layerBindingId);
1665 for (
auto it : inputConnections)
1673 const LayerBindingId layerBindingId = m_PreImportedOutputHandles[id].m_LayerBindingId;
1678 for (
auto it : outputConnections)
1685 std::unique_ptr<profiling::TimelineUtilityMethods> timelineUtils =
1687 profiling::ProfilingGuid inferenceGuid = m_ProfilingService.GetNextGuid();
1691 profiling::ProfilingGuid networkGuid = m_OptimizedNetwork->GetGuid();
1692 timelineUtils->CreateTypedEntity(inferenceGuid, profiling::LabelsAndEventClasses::INFERENCE_GUID);
1696 profiling::LabelsAndEventClasses::EXECUTION_OF_GUID);
1697 timelineUtils->RecordEvent(inferenceGuid, profiling::LabelsAndEventClasses::ARMNN_PROFILING_SOL_EVENT_CLASS);
1700 bool executionSucceeded =
true;
1705 timelineUtils->RecordEvent(inferenceGuid, profiling::LabelsAndEventClasses::ARMNN_PROFILING_EOL_EVENT_CLASS);
1706 timelineUtils->Commit();
1716 for (
auto pair : inputTensors)
1718 EnqueueInput(pair.second, workingMemHandle.
GetInputHandle(pair.first));
1724 const ImportedTensorHandlePin& importedInputPin = m_PreImportedInputHandles[id];
1725 const LayerBindingId layerBindingId = m_PreImportedInputHandles[id].m_LayerBindingId;
1726 const auto& preimportedHandle = importedInputPin.m_TensorHandle;
1729 for (
auto it : inputConnections)
1731 *it = preimportedHandle.get();
1737 if (m_NetworkProperties.m_ExportEnabled)
1739 for (
auto pair: outputTensors)
1741 ImportOutputTensor(pair.second, workingMemHandle.
GetOutputHandle(pair.first));
1747 const ImportedTensorHandlePin& importedOutputPin = m_PreImportedOutputHandles[id];
1748 const LayerBindingId layerBindingId = m_PreImportedOutputHandles[id].m_LayerBindingId;
1749 const auto& preimportedHandle = importedOutputPin.m_TensorHandle;
1753 for (
auto it : outputConnections)
1755 *it = preimportedHandle.get();
1760 auto Fail = [&](
const std::exception&
error)
1762 ARMNN_LOG(error) <<
"An error occurred attempting to execute a workload: " << error.what();
1763 executionSucceeded =
false;
1765 profiling::ProfilingDynamicGuid workloadInferenceID(0);
1769 for (
unsigned int i = 0; i < m_WorkloadQueue.size(); ++i)
1771 auto& workload = m_WorkloadQueue[i];
1774 workloadInferenceID = timelineUtils->RecordWorkloadInferenceAndStartOfLifeEvent(workload->GetGuid(),
1781 timelineUtils->RecordEndOfLifeEvent(workloadInferenceID);
1790 catch (
const std::runtime_error& error)
1801 if (!m_NetworkProperties.m_ExportEnabled)
1803 for (
auto pair: outputTensors)
1823 Graph& order = m_OptimizedNetwork->pOptimizedNetworkImpl->GetGraph();
1826 std::vector<std::unique_ptr<ITensorHandle>> managedTensorHandles;
1828 std::vector<std::unique_ptr<ITensorHandle>> unmanagedTensorHandles;
1830 std::vector<WorkingMemDescriptor> workingMemDescriptors;
1831 std::unordered_map<LayerGuid, WorkingMemDescriptor> workingMemDescriptorMap;
1833 auto GetTensorHandle = [&](
Layer* layer,
const OutputSlot& outputSlot)
1836 const TensorInfo& tensorInfo = outputSlot.GetTensorInfo();
1842 return m_WorkloadFactories.at(
id)->CreateTensorHandle(tensorInfo,
false);
1857 bool m_IsInputLayerHandle =
false;
1858 bool m_IsOutputLayerHandle =
false;
1864 std::unordered_map<const OutputSlot*, HandleInfo> outputToHandleInfoMap;
1866 unsigned int layerIndex = 0;
1867 for (
auto&& layer : order)
1877 bool isMemoryManaged =
true;
1878 bool isInputLayer =
false;
1879 bool isOutputLayer =
false;
1880 bool isConnectedToOutputLayer =
false;
1886 isInputLayer =
true;
1887 isMemoryManaged = !m_NetworkProperties.m_ImportEnabled;
1891 isOutputLayer =
true;
1894 unsigned int slotIndex = 0;
1899 for (
unsigned int i = 0; i < slot.GetNumConnections(); ++i)
1903 if (!isConnectedToOutputLayer)
1905 isConnectedToOutputLayer =
true;
1907 isMemoryManaged = !m_NetworkProperties.m_ExportEnabled;
1913 fmt::format(
"Layer name: '{0}' guid: '{1}' has two or more OutputLayers connected to it. " 1914 "This will prevent importing on the connected OutputLayers.",
1916 isMemoryManaged =
true;
1922 if (isMemoryManaged)
1924 managedTensorHandles.emplace_back(GetTensorHandle(layer, slot));
1925 tensorHandle = managedTensorHandles.back().get();
1929 unmanagedTensorHandles.emplace_back(GetTensorHandle(layer, slot));
1930 tensorHandle = unmanagedTensorHandles.back().get();
1933 workingMemDescriptor.
m_Outputs.push_back(tensorHandle);
1935 HandleInfo& handleInfo = outputToHandleInfoMap[&slot];
1936 handleInfo.m_TensorHandle = tensorHandle;
1939 if (isConnectedToOutputLayer)
1941 handleInfo.m_IsOutputLayerHandle =
true;
1942 handleInfo.m_OutputMemDescriptorCoords.m_OutputSlotCoords = {layerIndex, slotIndex};
1947 handleInfo.m_IsInputLayerHandle =
true;
1949 handleInfo.m_InputMemDescriptorCoords.m_LayerBindingId = bindingId;
1960 auto outputSlot = slot.GetConnectedOutputSlot();
1961 auto key = outputSlot->GetOwningLayer().GetGuid();
1964 auto found = m_ConstantTensorHandles.find(key);
1965 if (found != m_ConstantTensorHandles.end())
1968 workingMemDescriptor.
m_Inputs.push_back(tensorHandle);
1976 HandleInfo& handleInfo = outputToHandleInfoMap[outputSlot];
1977 handleInfo.m_TensorHandle = tensorHandle;
1978 handleInfo.m_IsOutputLayerHandle =
true;
1979 handleInfo.m_OutputMemDescriptorCoords.m_LayerBindingIds.push_back(bindingId);
1980 handleInfo.m_OutputMemDescriptorCoords.m_InputSlotCoords.push_back({layerIndex, 0});
1985 HandleInfo& handleInfo = outputToHandleInfoMap.at(outputSlot);
1987 ITensorHandle* inputTensorHandle = handleInfo.m_TensorHandle;
1988 workingMemDescriptor.
m_Inputs.push_back(inputTensorHandle);
1994 handleInfo.m_OutputMemDescriptorCoords.m_LayerBindingIds.push_back(bindingId);
1995 handleInfo.m_OutputMemDescriptorCoords.m_InputSlotCoords.push_back({layerIndex, 0});
1999 else if (handleInfo.m_IsOutputLayerHandle)
2001 handleInfo.m_OutputMemDescriptorCoords.m_InputSlotCoords.push_back({layerIndex, slot.GetSlotIndex()});
2006 if (handleInfo.m_IsInputLayerHandle)
2008 std::pair<LayerGuid, unsigned int> connectionLocation{layerIndex, slot.GetSlotIndex()};
2009 handleInfo.m_InputMemDescriptorCoords.m_InputSlotCoords.emplace_back(connectionLocation);
2012 workingMemDescriptorMap.insert({layer->
GetGuid(), workingMemDescriptor});
2018 workingMemDescriptors.push_back(workingMemDescriptor);
2023 std::vector<std::pair<std::shared_ptr<TensorMemory>,
MemorySource>> tensorMemory;
2025 auto externalMemoryManager = CreateExternalMemoryManger(tensorMemory);
2028 std::sort(tensorMemory.begin(), tensorMemory.end(),
2029 [](
const std::pair<std::shared_ptr<TensorMemory>,
MemorySource>& lhs,
2030 const std::pair<std::shared_ptr<TensorMemory>,
MemorySource>& rhs)
2032 return lhs.first->m_OutputSlotId < rhs.first->m_OutputSlotId;
2035 std::vector<WorkingMemHandle::InputMemDescriptorCoords> inputConnectionsInfo;
2036 std::vector<WorkingMemHandle::OutputMemDescriptorCoords> outputConnectionsInfo;
2038 for (
const auto& handleInfo: outputToHandleInfoMap)
2040 if (handleInfo.second.m_IsOutputLayerHandle)
2042 outputConnectionsInfo.emplace_back(handleInfo.second.m_OutputMemDescriptorCoords);
2045 if (handleInfo.second.m_IsInputLayerHandle)
2047 inputConnectionsInfo.emplace_back(handleInfo.second.m_InputMemDescriptorCoords);
2051 return std::make_unique<WorkingMemHandle>(networkId,
2052 inputConnectionsInfo,
2053 outputConnectionsInfo,
2054 workingMemDescriptors,
2055 workingMemDescriptorMap,
2056 std::move(externalMemoryManager),
2057 std::move(tensorMemory),
2058 std::move(managedTensorHandles),
2059 std::move(unmanagedTensorHandles));
2064 for (
auto&& workloadPtr: m_WorkloadQueue)
2066 workloadPtr.get()->RegisterDebugCallback(func);
2071 void LoadedNetwork::CreateMemoryProfileAsync()
2075 unsigned int m_StartOfLife;
2076 unsigned int m_Lifetime;
2079 unsigned int m_Index;
2084 auto align = [](
size_t numToAlign)
2086 const size_t alignment =
sizeof(float);
2087 return ((numToAlign + alignment - 1) / alignment) * alignment;
2090 std::unordered_map<const OutputSlot*, PartialBlock> memBlockTrackerMap;
2095 unsigned int timestep = 0;
2096 unsigned int outputIndex = 0;
2099 for (
auto&& layer : order)
2123 if (!m_SupportsExternallyManagedMemory[backendId])
2128 PartialBlock partialBlock;
2130 partialBlock.m_StartOfLife = timestep;
2132 size_t alignedSize = align(outputSlot.GetOutputHandler().GetTensorInfo().GetNumBytes());
2133 partialBlock.m_MemSize = alignedSize;
2134 partialBlock.m_Index = outputIndex++;
2135 partialBlock.m_Lifetime = outputSlot.GetNumConnections();
2136 partialBlock.m_BackendId = backendId;
2138 if (partialBlock.m_Lifetime == 0)
2140 m_MemBlockMap[partialBlock.m_BackendId].emplace_back(partialBlock.m_StartOfLife,
2141 partialBlock.m_StartOfLife,
2142 partialBlock.m_MemSize,
2144 partialBlock.m_Index);
2148 memBlockTrackerMap[&outputSlot] = partialBlock;
2154 const Layer& connectedInputLayer = inputSlot.GetConnectedOutputSlot()->GetOwningLayer();
2166 auto outputSlot = inputSlot.GetConnectedOutputSlot();
2168 PartialBlock& partialBlock = memBlockTrackerMap.at(outputSlot);
2170 auto& lifetime = partialBlock.m_Lifetime;
2175 m_MemBlockMap[partialBlock.m_BackendId].emplace_back(partialBlock.m_StartOfLife,
2177 partialBlock.m_MemSize,
2179 partialBlock.m_Index);
2186 void LoadedNetwork::CreateMemoryProfile()
2190 auto TraceSubTensorHandleAncestry = [](
ITensorHandle*
const subTensorHandle)
2193 while (ancestor && ancestor->
GetParent())
2202 unsigned int m_StartOfLife;
2203 unsigned int m_Lifetime;
2206 unsigned int m_Index;
2211 auto align = [](
size_t numToAlign)
2213 const size_t alignment =
sizeof(float);
2214 return ((numToAlign + alignment - 1) / alignment) * alignment;
2217 std::unordered_map<ITensorHandle*, PartialBlock> memBlockTrackerMap;
2222 unsigned int timestep = 0;
2223 unsigned int outputIndex = 0;
2226 for (
auto&& layer : order)
2250 if (!m_SupportsExternallyManagedMemory[backendId])
2255 ITensorHandle* tensorHandle = outputSlot.GetOutputHandler().GetData();
2256 tensorHandle = TraceSubTensorHandleAncestry(tensorHandle);
2258 if (memBlockTrackerMap.find(tensorHandle) == memBlockTrackerMap.end())
2260 PartialBlock partialBlock;
2262 partialBlock.m_StartOfLife = timestep;
2264 size_t alignedSize = align(outputSlot.GetOutputHandler().GetTensorInfo().GetNumBytes());
2265 partialBlock.m_MemSize = alignedSize;
2266 partialBlock.m_Index = outputIndex++;
2267 partialBlock.m_Lifetime = outputSlot.GetNumConnections();
2268 partialBlock.m_BackendId = backendId;
2270 if (partialBlock.m_Lifetime == 0)
2272 m_MemBlockMap[partialBlock.m_BackendId].emplace_back(partialBlock.m_StartOfLife,
2273 partialBlock.m_StartOfLife,
2274 partialBlock.m_MemSize,
2276 partialBlock.m_Index);
2280 memBlockTrackerMap[tensorHandle] = partialBlock;
2282 m_Tensorhandles.push_back(tensorHandle);
2287 memBlockTrackerMap.at(tensorHandle).m_Lifetime += outputSlot.GetNumConnections();
2293 const Layer& connectedInputLayer = inputSlot.GetConnectedOutputSlot()->GetOwningLayer();
2304 if (!m_SupportsExternallyManagedMemory[connectedInputLayer.
GetBackendId()])
2309 auto outputSlot = inputSlot.GetConnectedOutputSlot();
2311 ITensorHandle* tensorHandle = outputSlot->GetOutputHandler().GetData();
2312 tensorHandle = TraceSubTensorHandleAncestry(tensorHandle);
2314 PartialBlock& partialBlock = memBlockTrackerMap.at(tensorHandle);
2316 auto& lifetime = partialBlock.m_Lifetime;
2321 m_MemBlockMap[partialBlock.m_BackendId].emplace_back(partialBlock.m_StartOfLife,
2323 partialBlock.m_MemSize,
2325 partialBlock.m_Index);
2333 std::unique_ptr<MemoryManager> LoadedNetwork::CreateExternalMemoryManger(
2334 std::vector<std::pair<std::shared_ptr<TensorMemory>,
MemorySource>>& tensorMemoryVec)
2336 std::unique_ptr<MemoryManager> memoryManager = std::make_unique<MemoryManager>();
2339 for (
auto& backend : m_MemBinMap)
2341 std::vector<BufferStorage> bufferStorageVec;
2343 std::shared_ptr<ICustomAllocator> backendAllocator;
2344 if (allocatorMap.find(backend.first) != allocatorMap.end())
2346 backendAllocator = allocatorMap[backend.first];
2350 backendAllocator = m_Backends[backend.first]->GetDefaultAllocator();
2353 for (
auto& memBin : backend.second)
2359 for (
auto& memBlock : memBin.m_MemBlocks)
2361 auto tensorMemory = std::make_shared<TensorMemory>(
TensorMemory{memBlock.
m_Offset, memBlock.m_Index});
2363 tensorMemoryVec.emplace_back(tensorMemory, backendAllocator->GetMemorySourceType());
2367 bufferStorageVec.emplace_back(std::move(bufferStorage));
2370 memoryManager->StoreMemToAllocate(bufferStorageVec, backendAllocator, 4);
2373 return memoryManager;
2380 const auto& importedTensorHandlePin = m_PreImportedInputHandles.at(
id);
2381 if (!importedTensorHandlePin.m_TensorHandle)
2384 "PreImportedInput: {} has been deleted",
id));
2386 return importedTensorHandlePin.m_LayerBindingId;
2388 catch (
const std::out_of_range&)
2398 const auto& importedTensorHandlePin = m_PreImportedOutputHandles.at(
id);
2399 if (!importedTensorHandlePin.m_TensorHandle)
2402 "PreImportedOutput: {} has been deleted",
id));
2404 return importedTensorHandlePin.m_LayerBindingId;
2406 catch (
const std::out_of_range&)
Status Execute(const InputTensors &inputTensors, const OutputTensors &outputTensors, IWorkingMemHandle &workingMemHandle, std::vector< ImportedInputId > preImportedInputs={}, std::vector< ImportedOutputId > preImportedOutputs={})
Thread safe execution of the loaded network.
std::vector< std::shared_ptr< TensorMemory > > m_TensorMemoryVector
Vector of pointer to .
std::unique_ptr< IWorkingMemHandle > CreateWorkingMemHandle(NetworkId networkId)
Create a new unique WorkingMemHandle object.
bool HasCapability(const std::string &name, const BackendCapabilities &capabilities)
Convenience function to check if a capability exists in a BackendCapabilites struct.
virtual bool Import(void *memory, MemorySource source)
Import externally allocated memory.
FactoryFunction GetFactory(const BackendId &id) const
std::unique_ptr< IWorkloadFactory > IWorkloadFactoryPtr
unsigned int GetNumInputSlots() const override
Returns the number of connectable input slots.
static std::unique_ptr< TimelineUtilityMethods > GetTimelineUtils(ProfilingService &profilingService)
static ProfilerManager & GetInstance()
#define ARMNN_NO_DEPRECATE_WARN_BEGIN
virtual IMemoryManagerUniquePtr CreateMemoryManager() const
LayerBindingId GetBindingId() const
virtual unsigned int GetImportFlags() const
Get flags describing supported import sources.
const bool m_AsyncEnabled
const armnn::Tensor GetOutputTensor(const LayerBindingId layerId, const OutputTensors &outputTensors)
MemoryOptimizerStrategiesMapRef GetMemoryOptimizerStrategies()
unsigned int ImportedOutputId
WorkingMemDescriptor & GetWorkingMemDescriptorAt(unsigned int id) override
Get the WorkingMemDescriptor at an index.
size_t m_Offset
Number of bytes the value is away from the .m_Buffer.
virtual void Allocate()=0
Indicate to the memory manager that this resource is no longer active.
TensorInfo GetInputTensorInfo(LayerBindingId layerId) const
#define ARMNN_LOG(severity)
size_t m_BufferSize
Total size of the buffer.
ITensorHandle * GetOutputHandle(LayerBindingId layerBindingId) const
BackendRegistry & BackendRegistryInstance()
std::vector< std::pair< LayerBindingId, class ConstTensor > > InputTensors
const ProfilingDetailsMethod m_OutputNetworkDetailsMethod
unsigned int MemorySourceFlags
MemoryType GetMemoryArea() const
size_t GetNumOutputs() const
void CopyToOutputTensor(const Tensor &outputTensor, ITensorHandle *outputTensorHandle)
TensorInfo GetOutputTensorInfo(LayerBindingId layerId) const
Copyright (c) 2021 ARM Limited and Contributors.
void IgnoreUnused(Ts &&...)
const std::vector< InputSlot > & GetInputSlots() const
std::vector< ImportedInputId > ImportInputs(const InputTensors &inputTensors, MemorySource forceImportMemorySource=MemorySource::Undefined)
std::function< void(LayerGuid guid, unsigned int slotIndex, ITensorHandle *tensorHandle)> DebugCallbackFunction
Define the type of callback for the Debug layer to call.
unsigned int GetNumOutputSlots() const override
Returns the number of connectable output slots.
int LayerBindingId
Type of identifiers for bindable layers (inputs, outputs).
#define ARMNN_SCOPED_PROFILING_EVENT(backendId, name)
virtual const BackendId & GetId() const =0
ConstIteratorOutputs begin() const
A tensor defined by a TensorInfo (shape and data type) and a mutable backing store.
virtual IWorkloadFactoryPtr CreateWorkloadFactory(const IMemoryManagerSharedPtr &memoryManager=nullptr) const =0
std::vector< ITensorHandle * > m_Inputs
unsigned int GetNumConnections() const override
const InputSlot & GetInputSlot(unsigned int index) const override
Get a const input slot handle by slot index.
std::vector< TensorInfo > m_InputTensorInfos
const std::vector< std::vector< ITensorHandle * >::iterator > & GetOutputConnection(LayerBindingId layerBindingId) const
void ValidateBindingIds()
#define ARMNN_NO_DEPRECATE_WARN_END
#define ARMNN_ASSERT_MSG(COND, MSG)
bool SupportsTensorAllocatorAPI() const
#define ARMNN_SCOPED_HEAP_PROFILING(TAG)
A tensor defined by a TensorInfo (shape and data type) and an immutable backing store.
virtual ITensorHandle * GetParent() const =0
Get the parent tensor if this is a subtensor.
std::vector< std::pair< LayerBindingId, class Tensor > > OutputTensors
const std::string & GetNameStr() const
LayerType GetType() const override
Returns the armnn::LayerType of this layer.
const std::vector< std::vector< ITensorHandle * >::iterator > & GetInputConnections(LayerBindingId layerBindingId) const
#define ARMNN_ASSERT(COND)
void ClearImportedInputs(const std::vector< ImportedInputId > inputIds)
std::vector< TensorInfo > m_OutputTensorInfos
static bool IsLayerSupported(const BackendId &backendId, const IConnectableLayer &layer, Optional< DataType > dataType, std::string &outReasonIfUnsupported)
ITensorHandle * GetData() const
Gets the allocated tensor memory.
std::vector< std::unique_ptr< IWorkload > > WorkloadQueue
const TensorInfo & GetInfo() const
const BackendId & GetBackendId() const
void Allocate() override
Allocate the backing memory required for execution.
const std::vector< OutputSlot > & GetOutputSlots() const
virtual bool CanBeImported(void *memory, MemorySource source)
Implementations must determine if this memory block can be imported.
OutputLayersAccessor GetOutputLayers() const
Returns a wrapper object with begin(), end() methods to iterate over the output layers in a range-bas...
unsigned int ImportedInputId
Status EnqueueWorkload(const InputTensors &inputTensors, const OutputTensors &outputTensors, std::vector< ImportedInputId > preImportedInputIds={}, std::vector< ImportedOutputId > preImportedOutputIds={})
Single thread execution of the loaded network.
void RegisterProfiler(IProfiler *profiler)
virtual const void * Map(bool blocking=true) const =0
Map the tensor data for access.
std::vector< LayerBindingId > & GetBindingIdVector()
profiling::ProfilingGuid GetNetworkGuid()
std::unordered_map< BackendId, std::shared_ptr< ICustomAllocator > > GetAllocators()
virtual BackendCapabilities GetCapabilities() const
Returns a BackendCapability if the backend lists the capability The BackendCapability must then be in...
virtual void Unmap() const =0
Unmap the tensor data.
bool IsAllocated() override
IsAllocated returns true if the backing memory is currently allocated.
std::vector< ITensorHandle * > m_Outputs
Base class for all ArmNN exceptions so that users can filter to just those.
const OutputHandler & GetOutputHandler(unsigned int i=0) const
std::vector< ImportedOutputId > ImportOutputs(const OutputTensors &outputTensors, MemorySource forceImportMemorySource=MemorySource::Undefined)
MemorySource
Define the Memory Source to reduce copies.
const std::string & Get() const
void RegisterDebugCallback(const DebugCallbackFunction &func)
ConstIteratorOutputs end() const
std::vector< ITensorHandle * > m_Outputs
std::enable_if_t< std::is_unsigned< Source >::value &&std::is_unsigned< Dest >::value, Dest > numeric_cast(Source source)
Contains information about TensorInfos of a layer.
const char * GetName() const override
Returns the name of the layer.
ITensorHandleFactory::FactoryId GetTensorHandleFactoryId() const
bool CheckFlag(MemorySourceFlags flags, MemorySource source)
void CopyTensorContentsGeneric(const ITensorHandle *srcTensor, ITensorHandle *dstTensor, CopyFunc copy)
Graph & TopologicalSort()
Sorts layers in topological order and return this.
InputLayersAccessor GetInputLayers() const
Returns a wrapper object with begin(), end() methods to iterate over the input layers in a range-base...
std::vector< ITensorHandle * > m_Inputs
size_t GetNumLayers() const
const armnn::ConstTensor GetInputTensor(const LayerBindingId layerId, const InputTensors &inputTensors)
const TensorInfo & GetTensorInfo(const ITensorHandle *tensorHandle)
float32 helpers
static std::unique_ptr< LoadedNetwork > MakeLoadedNetwork(std::unique_ptr< IOptimizedNetwork > net, std::string &errorMessage, const INetworkProperties &networkProperties, profiling::ProfilingService &profilingService)
const TensorInfo & GetTensorInfo() const override
ITensorHandle * GetInputHandle(LayerBindingId layerBindingId) const
size_t GetNumInputs() const
virtual std::unique_ptr< ITensorHandle > CreateTensorHandle(const TensorInfo &tensorInfo) const =0
static const FactoryId LegacyFactoryId
const bool m_ProfilingEnabled
void ClearImportedOutputs(const std::vector< ImportedOutputId > outputIds)
LayerType
When adding a new layer, adapt also the LastLayer enum value in the enum class LayerType below...
LayerGuid GetGuid() const final
Returns the unique id of the layer.
void SendNetworkStructure()