27 #include <common/include/Processes.hpp> 29 #include <fmt/format.h> 40 template <
typename ExceptionType>
41 std::string ToErrorMessage(
const char * prefix,
const ExceptionType &
error)
44 ss << prefix <<
" " << error.what();
48 void AddLayerStructure(std::unique_ptr<TimelineUtilityMethods>& timelineUtils,
50 ProfilingGuid networkGuid)
53 std::string layerName = layer.GetNameStr().empty() ?
"<Unnamed>" : layer.GetNameStr();
54 timelineUtils->CreateNamedTypedChildEntity(layer.GetGuid(),
57 LabelsAndEventClasses::LAYER_GUID);
58 for (
auto&& input : layer.GetInputSlots())
60 const IOutputSlot* source = input.GetConnectedOutputSlot();
62 timelineUtils->CreateConnectionRelationship(ProfilingRelationshipType::RetentionLink,
63 source->GetOwningLayerGuid(),
68 void AddWorkloadStructure(std::unique_ptr<TimelineUtilityMethods>& timelineUtils,
69 std::unique_ptr<IWorkload>& workload,
73 timelineUtils->CreateTypedEntity(workload->GetGuid(), LabelsAndEventClasses::WORKLOAD_GUID);
74 timelineUtils->MarkEntityWithLabel(workload->GetGuid(),
75 layer.GetBackendId().Get(),
76 LabelsAndEventClasses::BACKENDID_GUID);
79 timelineUtils->CreateRelationship(ProfilingRelationshipType::RetentionLink,
82 LabelsAndEventClasses::CHILD_GUID);
88 std::string& errorMessage,
90 arm::pipe::IProfilingService* profilingService)
92 std::unique_ptr<LoadedNetwork> loadedNetwork;
94 auto Fail = [&](
const std::exception&
error) -> std::unique_ptr<LoadedNetwork>
96 errorMessage = ToErrorMessage(
"An error occurred when preparing the network workloads: ", error);
99 return std::unique_ptr<LoadedNetwork>();
104 loadedNetwork.reset(
new LoadedNetwork(std::move(net), networkProperties, profilingService));
114 catch (
const std::runtime_error& error)
119 return loadedNetwork;
122 LoadedNetwork::LoadedNetwork(std::unique_ptr<IOptimizedNetwork> net,
124 arm::pipe::IProfilingService* profilingService) :
125 m_OptimizedNetwork(std::move(net)),
126 m_NetworkProperties(networkProperties),
127 m_TensorHandleFactoryRegistry(),
128 m_ProfilingService(profilingService)
132 const std::shared_ptr<IProfiler>& profiler = m_OptimizedNetwork->GetProfiler();
144 bool useExternalMemoryManager =
false;
145 bool useInternalMemoryManager =
false;
150 m_IsInputImported = std::vector<bool>(order.
GetNumInputs(),
false);
151 m_IsOutputImported = std::vector<bool>(order.
GetNumOutputs(),
false);
154 for (
auto&& layer : order)
156 auto const& backendId = layer->GetBackendId();
157 if (m_Backends.count(backendId) == 0)
160 auto it = m_Backends.emplace(std::make_pair(backendId, createBackend()));
167 std::string er = backend->
GetId();
168 er +=
" does not support AsyncExecution";
176 std::string er = backend->
GetId();
177 er +=
" does not support ExternallyManagedMemory\n";
178 er +=
"AsyncEnabled networks require all backends to support ExternallyManagedMemory";
183 && (m_NetworkProperties.m_ExternalMemoryManagementEnabled || m_NetworkProperties.m_AsyncEnabled))
185 m_SupportsExternallyManagedMemory[backend->
GetId()] =
true;
186 useExternalMemoryManager =
true;
190 m_SupportsExternallyManagedMemory[backend->
GetId()] =
false;
191 useInternalMemoryManager =
true;
198 m_TensorHandleFactoryRegistry,
199 m_OptimizedNetwork->pOptimizedNetworkImpl->GetModelOptions(),
201 static_cast<MemorySourceFlags>(m_NetworkProperties.m_OutputSource));
207 m_BackendMemoryMangers.back(), m_OptimizedNetwork->pOptimizedNetworkImpl->GetModelOptions());
209 m_WorkloadFactories[backendId ] = std::move(workloadFactory);
215 for (
auto&& layer : order)
217 auto& workloadFactory = GetWorkloadFactory(*layer);
218 bool supportsExternalManager = m_SupportsExternallyManagedMemory[layer->GetBackendId()];
220 switch (layer->GetType())
227 layer->CreateTensorHandles(m_TensorHandleFactoryRegistry,
229 !supportsExternalManager && !m_NetworkProperties.m_ImportEnabled);
234 layer->CreateTensorHandles(m_TensorHandleFactoryRegistry, workloadFactory,
true);
241 if ((layer->GetNumOutputSlots() == 1) &&
242 (layer->GetOutputSlots()[0].GetNumConnections() == 1) &&
243 (layer->GetOutputSlots()[0].GetConnection(0)->GetOwningLayer().GetType() ==
LayerType::Output))
245 layer->CreateTensorHandles(m_TensorHandleFactoryRegistry,
247 !supportsExternalManager && !m_NetworkProperties.m_ExportEnabled);
251 layer->CreateTensorHandles(m_TensorHandleFactoryRegistry,
253 !supportsExternalManager);
260 ProfilingGuid networkGuid = m_OptimizedNetwork->GetGuid();
261 std::unique_ptr<TimelineUtilityMethods> timelineUtils =
262 TimelineUtilityMethods::GetTimelineUtils(*m_ProfilingService);
265 timelineUtils->CreateTypedEntity(networkGuid, LabelsAndEventClasses::NETWORK_GUID);
267 timelineUtils->RecordEvent(networkGuid, LabelsAndEventClasses::ARMNN_PROFILING_SOL_EVENT_CLASS);
269 int processID = arm::pipe::GetCurrentProcessId();
270 std::stringstream ss;
272 timelineUtils->MarkEntityWithLabel(networkGuid, ss.str(), LabelsAndEventClasses::PROCESS_ID_GUID);
275 std::vector<IWorkload*> ConstWorkloads;
280 for (
auto&& layer: order)
285 AddLayerStructure(timelineUtils, *layer, networkGuid);
290 switch (layer->GetType())
300 auto workload = layer->CreateWorkload(workloadFactory);
304 const char*
const layerName =
305 layer->GetNameStr().length() != 0 ? layer->GetName() :
"<Unnamed>";
307 fmt::format(
"No workload created for layer (name: '{0}' type: '{1}') (compute '{2}')",
308 layerName, static_cast<int>(layer->GetType()), layer->GetBackendId().Get()
315 AddWorkloadStructure(timelineUtils, workload, *layer);
320 if((networkProperties.
m_AsyncEnabled || useExternalMemoryManager) &&
323 m_ConstantTensorHandles[layer->GetGuid()] =
324 layer->GetOutputSlot(0).GetOutputHandler().GetData();
325 m_ConstantWorkloads[layer->GetGuid()] = std::move(workload);
329 m_WorkloadQueue.push_back(std::move(workload));
334 ConstWorkloads.push_back(m_WorkloadQueue.back().get());
338 layer->ReleaseConstantData();
346 if (!networkProperties.
m_AsyncEnabled && m_WorkloadQueue.size() != 0)
354 const auto bindingId = layer->GetBindingId();
356 bool supportsReplacement =
true;
358 for (
const auto inputSlot: layer->GetOutputSlot(0).GetConnections())
360 auto workloadIndex = std::distance(order.begin(), order.GetPosInGraph(inputSlot->GetOwningLayer()));
361 workloadIndex -= noOfInputs;
363 m_InputWorkloadSlotPairs[bindingId].emplace_back(WorkloadIndices{
366 auto workload = m_WorkloadQueue[m_InputWorkloadSlotPairs[bindingId].back().m_WorkloadIndex].get();
367 supportsReplacement &= workload->SupportsTensorHandleReplacement();
373 m_TensorHandleFactoryRegistry.GetMatchingImportFactoryId(factoryId);
375 ITensorHandleFactory *importFactory = m_TensorHandleFactoryRegistry.GetFactory(importFactoryId);
377 if (supportsReplacement && importFactory)
379 m_PreImportedInputHandles.emplace_back(
380 bindingId, importFactory->
CreateTensorHandle(layer->GetOutputSlot(0).GetTensorInfo(),
false));
384 m_PreImportedInputHandles.emplace_back(bindingId,
nullptr);
392 const auto bindingId = layer->GetBindingId();
394 const auto outputSlot = layer->GetInputSlot(0).GetConnectedOutputSlot();
395 auto& indices = m_OutputWorkloadSlotPairs[bindingId];
397 auto workloadIndex = std::distance(order.begin(), order.GetPosInGraph(outputSlot->GetOwningLayer()));
398 workloadIndex -= noOfInputs;
400 indices.m_OutputSlotIndices = WorkloadIndices{
numeric_cast<
unsigned int>(workloadIndex),
401 outputSlot->CalculateIndexOnOwner()};
403 bool supportsReplacement =
true;
404 auto outputWorkload = m_WorkloadQueue[indices.m_OutputSlotIndices.m_WorkloadIndex].get();
405 supportsReplacement &= outputWorkload->SupportsTensorHandleReplacement();
407 for (
auto &inputSlot: outputSlot->GetConnections())
411 auto inWorkloadIndex = std::distance(order.begin(),
412 order.GetPosInGraph(inputSlot->GetOwningLayer()));
413 inWorkloadIndex -= noOfInputs;
414 indices.m_InputSlotIndices.emplace_back(WorkloadIndices{
numeric_cast<
unsigned int>(inWorkloadIndex),
415 inputSlot->GetSlotIndex()});
416 auto inputWorkload = m_WorkloadQueue[indices.m_InputSlotIndices.back().m_WorkloadIndex].get();
417 supportsReplacement &= inputWorkload->SupportsTensorHandleReplacement();
424 m_TensorHandleFactoryRegistry.GetMatchingImportFactoryId(factoryId);
425 ITensorHandleFactory *importFactory = m_TensorHandleFactoryRegistry.GetFactory(importFactoryId);
427 if (supportsReplacement && importFactory)
429 m_PreImportedOutputHandles.emplace_back(
434 m_PreImportedOutputHandles.emplace_back(bindingId,
nullptr);
439 for (
auto&& workloadFactory : m_WorkloadFactories)
441 workloadFactory.second->AfterWorkloadsCreated();
447 timelineUtils->Commit();
450 if (useExternalMemoryManager)
454 CreateMemoryProfileAsync();
458 CreateMemoryProfile();
462 for (
auto& backendMemoryProfile : m_MemBlockMap)
464 const BackendId& backendId = backendMemoryProfile.first;
465 if (backendStrategyMap.find(backendId) != backendStrategyMap.end())
467 m_MemBinMap[backendId] = backendStrategyMap[backendId]->Optimize(backendMemoryProfile.second);
471 m_MemBinMap[backendId] = m_ConstantStrategy->Optimize(backendMemoryProfile.second);
477 m_ExternalMemoryManager = CreateExternalMemoryManger(m_TensorMemory);
480 std::sort(m_TensorMemory.begin(), m_TensorMemory.end(),
481 [](
const std::pair<std::shared_ptr<TensorMemory>,
MemorySource>& lhs,
482 const std::pair<std::shared_ptr<TensorMemory>,
MemorySource>& rhs)
484 return lhs.first->m_OutputSlotId < rhs.first->m_OutputSlotId;
493 if (useInternalMemoryManager)
496 m_OptimizedNetwork->pOptimizedNetworkImpl->GetGraph().AllocateDynamicBuffers();
499 for (
auto &workload : m_WorkloadQueue)
501 workload->PostAllocationConfigure();
505 if (useExternalMemoryManager)
509 AllocateAndExecuteConstantWorkloads();
513 AllocateAndExecuteConstantWorkloadsAsync();
519 for (
auto workload: ConstWorkloads)
526 void LoadedNetwork::AllocateAndExecuteConstantWorkloads()
529 for (
auto& pair : m_ConstantWorkloads)
531 auto tensorHandle = m_ConstantTensorHandles[pair.first];
532 tensorHandle->Allocate();
533 pair.second->Execute();
537 void LoadedNetwork::AllocateAndExecuteConstantWorkloadsAsync()
540 Graph& order = m_OptimizedNetwork->pOptimizedNetworkImpl->GetGraph();
541 for (
auto&& layer : order)
545 const auto& outSlot = layer->GetOutputSlots()[0];
546 const auto factoryId = outSlot.GetTensorHandleFactoryId();
548 auto& workloadFactory = GetWorkloadFactory(*layer);
550 layer->CreateTensorHandles(m_TensorHandleFactoryRegistry, workloadFactory);
551 ITensorHandle* tensorHandle = outSlot.GetOutputHandler().GetData();
553 m_ConstantTensorHandles[layer->GetGuid()] = tensorHandle;
557 memDesc.
m_Outputs.push_back(tensorHandle);
558 m_ConstantWorkloads[layer->GetGuid()]->ExecuteAsync(memDesc);
567 ProfilingGuid networkGuid = m_OptimizedNetwork->GetGuid();
569 std::unique_ptr<TimelineUtilityMethods> timelineUtils =
570 TimelineUtilityMethods::GetTimelineUtils(profilingService);
572 timelineUtils->CreateTypedEntity(networkGuid, LabelsAndEventClasses::NETWORK_GUID);
574 for (
auto&& layer : order)
577 AddLayerStructure(timelineUtils, *layer, networkGuid);
578 switch (layer->GetType())
588 for (
auto& workload : m_WorkloadQueue)
591 AddWorkloadStructure(timelineUtils, workload, *layer);
598 timelineUtils->Commit();
603 return m_OptimizedNetwork->GetGuid();
608 for (
auto&& inputLayer : m_OptimizedNetwork->pOptimizedNetworkImpl->GetGraph().GetInputLayers())
610 ARMNN_ASSERT_MSG(inputLayer->GetNumOutputSlots() == 1,
"Input layer should have exactly 1 output slot");
611 if (inputLayer->GetBindingId() == layerId)
613 return inputLayer->GetOutputSlot(0).GetTensorInfo();
622 for (
auto&& outputLayer : m_OptimizedNetwork->pOptimizedNetworkImpl->GetGraph().GetOutputLayers())
624 ARMNN_ASSERT_MSG(outputLayer->GetNumInputSlots() == 1,
"Output layer should have exactly 1 input slot");
625 ARMNN_ASSERT_MSG(outputLayer->GetInputSlot(0).GetConnection(),
"Input slot on Output layer must be connected");
626 if (outputLayer->GetBindingId() == layerId)
628 return outputLayer->GetInputSlot(0).GetConnection()->GetTensorInfo();
639 auto it = m_WorkloadFactories.find(layer.
GetBackendId());
640 if (it == m_WorkloadFactories.end())
642 throw RuntimeException(fmt::format(
"No workload factory for {0} to be used for layer: {1}",
648 workloadFactory = it->second.get();
652 std::string reasonIfUnsupported;
656 m_OptimizedNetwork->pOptimizedNetworkImpl->GetModelOptions()),
657 "Factory does not support layer");
659 return *workloadFactory;
669 : m_TensorHandle(std::move(handle))
675 ITensorHandle* GetTensorHandle()
const {
return m_TensorHandle.get(); }
680 std::unique_ptr<ITensorHandle> m_TensorHandle;
686 const std::vector<TensorPin>& pins,
687 char const* bindingPointDesc)
689 auto it = std::find_if(pins.begin(), pins.end(),
690 [id](
const TensorPin& pin)
692 return pin.GetBindingId() == id;
695 if (it != pins.end())
711 m_InputTensorPins.reserve(inputTensors.size());
712 m_OutputTensorPins.reserve(outputTensors.size());
714 for (
auto inputTensorPair : inputTensors)
716 auto inputTensor = inputTensorPair.second;
718 std::unique_ptr<ITensorHandle> tensorHandle =
719 std::make_unique<ConstPassthroughTensorHandle>(inputTensor.GetInfo(),inputTensor.GetMemoryArea());
722 m_InputTensorPins.emplace_back(std::move(tensorHandle), inputTensor.GetInfo(), layerId);
725 for (
auto outputTensorPair : outputTensors)
727 auto outputTensor = outputTensorPair.second;
729 std::unique_ptr<ITensorHandle> tensorHandle =
730 std::make_unique<PassthroughTensorHandle>(outputTensor.GetInfo(), outputTensor.GetMemoryArea());
733 m_OutputTensorPins.emplace_back(std::move(tensorHandle), outputTensor.GetInfo(), layerId);
739 return GetTensorPin(
id, m_InputTensorPins,
"input");
744 return GetTensorPin(
id, m_OutputTensorPins,
"output");
749 std::vector<TensorPin> m_InputTensorPins;
750 std::vector<TensorPin> m_OutputTensorPins;
757 std::vector<ImportedInputId> preImportedInputIds,
758 std::vector<ImportedOutputId> preImportedOutputIds)
760 const Graph& graph = m_OptimizedNetwork->pOptimizedNetworkImpl->GetGraph();
765 ARMNN_LOG(
warning) <<
"IRuntime::EnqueueWorkload()::Less than two nodes in graph";
770 WorkloadData workloadData(inputTensors, outputTensors);
780 m_InputQueue.clear();
788 unsigned int inputIndex = 0;
789 unsigned int importedInputIdIndex = 0;
790 std::sort(preImportedInputIds.begin(), preImportedInputIds.end());
793 if (importedInputIdIndex < preImportedInputIds.size() &&
794 inputIndex == preImportedInputIds[importedInputIdIndex])
797 if (!m_IsInputImported[inputIndex])
799 auto outputTensorHandle = m_PreImportedInputHandles[inputIndex].m_TensorHandle.get();
801 for (
const auto& workloadInfo: m_InputWorkloadSlotPairs[inputLayer->GetBindingId()])
803 auto workload = m_WorkloadQueue[workloadInfo.m_WorkloadIndex].get();
804 workload->ReplaceInputTensorHandle(outputTensorHandle, workloadInfo.m_SlotIndex);
806 m_IsInputImported[inputIndex] =
true;
808 importedInputIdIndex++;
812 if (m_IsInputImported[inputIndex])
816 for (
const auto& workloadInfo: m_InputWorkloadSlotPairs[inputLayer->GetBindingId()])
818 auto workload = m_WorkloadQueue[workloadInfo.m_WorkloadIndex].get();
819 workload->ReplaceInputTensorHandle(handler.
GetData(), workloadInfo.m_SlotIndex);
822 m_IsInputImported[inputIndex] =
false;
826 const TensorPin& pin = workloadData.GetInputTensorPin(inputLayer->GetBindingId());
827 EnqueueInput(*inputLayer, pin.GetTensorHandle(), pin.GetTensorInfo());
835 m_OutputQueue.clear();
843 unsigned int outputIndex = 0;
844 unsigned int importedOutputIdIndex = 0;
845 std::sort(preImportedOutputIds.begin(), preImportedOutputIds.end());
848 if (importedOutputIdIndex < preImportedOutputIds.size() &&
849 outputIndex == preImportedOutputIds[importedOutputIdIndex])
852 ITensorHandle* inputTensorHandle = m_PreImportedOutputHandles[outputIndex].m_TensorHandle.get();
854 if (!m_IsOutputImported[outputIndex])
856 const auto bindingId = outputLayer->GetBindingId();
857 const auto& indices = m_OutputWorkloadSlotPairs[bindingId];
859 auto outputWorkload = m_WorkloadQueue[indices.m_OutputSlotIndices.m_WorkloadIndex].get();
861 outputWorkload->ReplaceOutputTensorHandle(inputTensorHandle,
862 indices.m_OutputSlotIndices.m_SlotIndex);
864 for (
const auto& workloadInfo: indices.m_InputSlotIndices)
866 auto inputWorkload = m_WorkloadQueue[workloadInfo.m_WorkloadIndex].get();
867 inputWorkload->ReplaceInputTensorHandle(inputTensorHandle, workloadInfo.m_SlotIndex);
869 m_IsOutputImported[outputIndex] =
true;
872 ARMNN_ASSERT_MSG(inputTensorHandle !=
nullptr,
"Data should have been allocated.");
874 syncDesc.
m_Inputs.push_back(inputTensorHandle);
877 outputLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo());
878 auto syncWorkload = std::make_unique<SyncMemGenericWorkload>(syncDesc,
info);
880 m_OutputQueue.push_back(move(syncWorkload));
881 importedOutputIdIndex++;
885 if (m_IsOutputImported[outputIndex])
887 const auto bindingId = outputLayer->GetBindingId();
888 const auto& indices = m_OutputWorkloadSlotPairs[bindingId];
890 auto outputWorkload = m_WorkloadQueue[indices.m_OutputSlotIndices.m_WorkloadIndex].get();
892 outputLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetOutputHandler();
894 outputWorkload->ReplaceOutputTensorHandle(
895 outputHandler.
GetData(), indices.m_OutputSlotIndices.m_SlotIndex);
897 for (
const auto& workloadInfo: indices.m_InputSlotIndices)
899 auto inputWorkload = m_WorkloadQueue[workloadInfo.m_WorkloadIndex].get();
900 inputWorkload->ReplaceInputTensorHandle(outputHandler.
GetData(), workloadInfo.m_SlotIndex);
902 m_IsOutputImported[outputIndex] =
false;
905 const TensorPin& pin = workloadData.GetOutputTensorPin(outputLayer->GetBindingId());
907 EnqueueOutput(*outputLayer, pin.GetTensorHandle(), pin.GetTensorInfo());
913 std::unique_ptr<TimelineUtilityMethods> timelineUtils =
914 TimelineUtilityMethods::GetTimelineUtils(*m_ProfilingService);
915 ProfilingGuid inferenceGuid = m_ProfilingService->GetNextGuid();
919 ProfilingGuid networkGuid = m_OptimizedNetwork->GetGuid();
920 timelineUtils->CreateTypedEntity(inferenceGuid, LabelsAndEventClasses::INFERENCE_GUID);
921 timelineUtils->CreateRelationship(ProfilingRelationshipType::RetentionLink,
924 LabelsAndEventClasses::EXECUTION_OF_GUID);
925 timelineUtils->RecordEvent(inferenceGuid, LabelsAndEventClasses::ARMNN_PROFILING_SOL_EVENT_CLASS);
928 bool executionSucceeded =
true;
931 if (m_ProfilingService->IsProfilingEnabled())
933 m_ProfilingService->IncrementCounterValue(INFERENCES_RUN);
937 executionSucceeded = Execute(timelineUtils, inferenceGuid);
943 timelineUtils->RecordEvent(inferenceGuid, LabelsAndEventClasses::ARMNN_PROFILING_EOL_EVENT_CLASS);
944 timelineUtils->Commit();
957 if (tensorHandle ==
nullptr)
965 inputQueueDescriptor.
m_Inputs.push_back(tensorHandle);
970 const TensorInfo& outputTensorInfo = handler.GetTensorInfo();
973 "Data should have been allocated.");
974 inputQueueDescriptor.
m_Outputs.push_back(outputTensorHandle);
978 bool needMemCopy =
true;
979 if (m_NetworkProperties.m_ImportEnabled)
981 if(
CheckFlag(importFlags, m_NetworkProperties.m_InputSource))
985 void* mem = tensorHandle->
Map(
false);
986 if (outputTensorHandle->
Import(mem, m_NetworkProperties.m_InputSource))
988 tensorHandle->
Unmap();
991 tensorHandle->
Unmap();
998 std::unique_ptr<IWorkload> inputWorkload = std::make_unique<CopyMemGenericWorkload>(inputQueueDescriptor,
info);
1002 std::unique_ptr<TimelineUtilityMethods> timelineUtils =
1003 TimelineUtilityMethods::GetTimelineUtils(*m_ProfilingService);
1007 AddWorkloadStructure(timelineUtils, inputWorkload, layer);
1008 timelineUtils->Commit();
1011 m_InputQueue.push_back(move(inputWorkload));
1022 if (tensorHandle ==
nullptr)
1030 outputQueueDescriptor.
m_Outputs.push_back(tensorHandle);
1038 const TensorInfo& inputTensorInfo = outputHandler.GetTensorInfo();
1040 ARMNN_ASSERT_MSG(inputTensorHandle !=
nullptr,
"Data should have been allocated.");
1049 bool needMemCopy =
true;
1050 if (m_NetworkProperties.m_ExportEnabled &&
1051 (layer.
GetInputSlots()[0].GetConnectedOutputSlot()->GetNumConnections() == 1))
1056 if (
CheckFlag(importFlags, m_NetworkProperties.m_OutputSource))
1058 needMemCopy =
false;
1059 void *mem = tensorHandle->
Map(
false);
1060 bool importOk = inputTensorHandle->
Import(mem, m_NetworkProperties.m_OutputSource);
1061 tensorHandle->
Unmap();
1067 syncDesc.
m_Inputs.push_back(inputTensorHandle);
1069 auto syncWorkload = std::make_unique<SyncMemGenericWorkload>(syncDesc,
info);
1071 m_OutputQueue.push_back(move(syncWorkload));
1083 outputQueueDescriptor.
m_Inputs.push_back(inputTensorHandle);
1086 std::unique_ptr<IWorkload> outputWorkload =
1087 std::make_unique<CopyMemGenericWorkload>(outputQueueDescriptor,
info);
1090 std::unique_ptr<TimelineUtilityMethods> timelineUtils =
1091 TimelineUtilityMethods::GetTimelineUtils(*m_ProfilingService);
1095 AddWorkloadStructure(timelineUtils, outputWorkload, layer);
1096 timelineUtils->Commit();
1099 m_OutputQueue.push_back(move(outputWorkload));
1103 void LoadedNetwork::AllocateWorkingMemory(
1104 #
if !defined(ARMNN_DISABLE_THREADS)
1105 std::lock_guard<std::mutex>& lock
1111 #if !defined(ARMNN_DISABLE_THREADS) 1115 if (m_IsWorkingMemAllocated)
1120 if (m_ExternalMemoryManager)
1122 m_ExternalMemoryManager->Allocate();
1124 for (
unsigned int i = 0; i < m_TensorMemory.size(); ++i)
1126 m_Tensorhandles[i]->Import(m_TensorMemory[i].first->m_Data, m_TensorMemory[i].second);
1130 for (
auto&& memoryManager : m_BackendMemoryMangers)
1134 memoryManager->Acquire();
1137 m_TensorHandleFactoryRegistry.AquireMemory();
1138 m_IsWorkingMemAllocated =
true;
1143 #if !defined(ARMNN_DISABLE_THREADS) 1144 std::lock_guard<std::mutex> lockGuard(m_WorkingMemMutex);
1147 if (!m_IsWorkingMemAllocated)
1152 if (m_ExternalMemoryManager)
1154 m_ExternalMemoryManager->Deallocate();
1158 for (
auto&& memoryManager : m_BackendMemoryMangers)
1162 memoryManager->Release();
1165 m_TensorHandleFactoryRegistry.ReleaseMemory();
1166 m_IsWorkingMemAllocated =
false;
1170 ProfilingGuid inferenceGuid)
1172 bool success =
true;
1174 auto Fail = [&](
const std::exception&
error)
1176 ARMNN_LOG(error) <<
"An error occurred attempting to execute a workload: " << error.what();
1182 #if !defined(ARMNN_DISABLE_THREADS) 1183 std::lock_guard<std::mutex> lockGuard(m_WorkingMemMutex);
1184 AllocateWorkingMemory(lockGuard);
1186 AllocateWorkingMemory();
1189 ProfilingDynamicGuid workloadInferenceID(0);
1190 auto ExecuteQueue = [&timelineUtils, &workloadInferenceID, &inferenceGuid](
WorkloadQueue& queue)
1192 for (
auto& workload : queue)
1196 workloadInferenceID = timelineUtils->RecordWorkloadInferenceAndStartOfLifeEvent(workload->GetGuid(),
1199 workload->Execute();
1202 timelineUtils->RecordEndOfLifeEvent(workloadInferenceID);
1207 ExecuteQueue(m_InputQueue);
1208 ExecuteQueue(m_WorkloadQueue);
1209 ExecuteQueue(m_OutputQueue);
1215 catch (
const std::runtime_error& error)
1225 if (m_NetworkProperties.m_ImportEnabled)
1228 if (
CheckFlag(importFlags, m_NetworkProperties.m_InputSource) )
1230 std::unique_ptr<ITensorHandle> tensorHandle =
1231 std::make_unique<ConstPassthroughTensorHandle>(inputTensor.
GetInfo(),
1233 void* mem = tensorHandle->
Map(
false);
1235 if (inputTensorHandle->
Import(mem, m_NetworkProperties.m_InputSource))
1237 tensorHandle->Unmap();
1240 tensorHandle->Unmap();
1250 std::unique_ptr<ITensorHandle> tensorHandle =
1251 std::make_unique<ConstPassthroughTensorHandle>(inputTensor.
GetInfo(), inputTensor.
GetMemoryArea());
1253 auto copyFunc = [](
void* dst,
const void* src,
size_t size)
1255 memcpy(dst, src, size);
1268 void LoadedNetwork::ImportOutputTensor(
const Tensor& outputTensor,
ITensorHandle* outputTensorHandle)
1270 ARMNN_ASSERT_MSG(outputTensorHandle !=
nullptr,
"Data should have been allocated.");
1272 if (
CheckFlag(importFlags, m_NetworkProperties.m_OutputSource))
1274 std::unique_ptr<ITensorHandle> tensorHandle =
1275 std::make_unique<PassthroughTensorHandle>(outputTensor.
GetInfo(),
1278 void* mem = tensorHandle->
Map(
false);
1279 bool importOk = outputTensorHandle->
Import(mem, m_NetworkProperties.m_OutputSource);
1280 tensorHandle->Unmap();
1289 throw MemoryExportException(
"ImportOutputTensor: Memory Export failed, attempting to export Input Layer");
1296 auto copyFunc = [](
void* dst,
const void* src,
size_t size)
1298 memcpy(dst, src, size);
1301 std::unique_ptr<ITensorHandle> tensorHandle =
1302 std::make_unique<PassthroughTensorHandle>(outputTensor.
GetInfo(),
1311 for (
auto inputTensorPair : inputTensors)
1316 return inputTensorPair.second;
1324 for (
auto outputTensorPair : outputTensors)
1329 return outputTensorPair.second;
1338 if (!m_NetworkProperties.m_AsyncEnabled)
1343 throw MemoryImportException(
"ImportInputs: Memory Import failed, NetworkProperties.m_ImportEnabled");
1345 if (inputTensors.size() != m_OptimizedNetwork->pOptimizedNetworkImpl->GetGraph().GetNumInputs())
1350 std::vector<ImportedInputId> importedInputs;
1352 unsigned int inputIndex = 0;
1355 auto outputTensorHandle = m_PreImportedInputHandles[inputIndex].m_TensorHandle.get();
1357 if (!outputTensorHandle)
1363 auto layerBindingId = inputLayer->GetBindingId();
1364 auto it = std::find_if(inputTensors.begin(), inputTensors.end(), [=](
const auto& inputTensor)
1366 return inputTensor.first == layerBindingId;
1369 if (it == inputTensors.end())
1375 const auto& inputTensor = *it;
1376 std::unique_ptr<ITensorHandle> passThroughTensorHandle =
1377 std::make_unique<ConstPassthroughTensorHandle>(inputTensor.second.
GetInfo(),
1380 if (outputTensorHandle->
CanBeImported(passThroughTensorHandle->Map(), forceImportMemorySource)
1381 && (outputTensorHandle->
Import(passThroughTensorHandle->Map(), forceImportMemorySource)))
1383 importedInputs.push_back(inputIndex);
1385 passThroughTensorHandle->Unmap();
1390 return importedInputs;
1395 std::vector<ImportedInputId> importedInputs;
1398 for (
auto inputTensor : inputTensors)
1400 auto layerBindingId = inputTensor.first;
1409 "ImportInputs: Memory Import failed, unknown LayerBindingId: {}", layerBindingId));
1412 const Layer* layer = *it;
1421 std::string er = backend->GetId();
1422 er +=
" does not have PreImportIOTensors capability";
1434 ImportedTensorHandlePin importedTensorHandlePin{layerBindingId,
1437 ITensorHandle* tensorHandle = importedTensorHandlePin.m_TensorHandle.get();
1442 fmt::format(
"ImportInputs: Memory Import failed, backend: " 1443 "{} does not support importing from source {}" 1444 , factoryId, m_NetworkProperties.m_InputSource));
1447 std::unique_ptr<ITensorHandle> passThroughTensorHandle =
1448 std::make_unique<ConstPassthroughTensorHandle>(inputTensor.second.
GetInfo(),
1451 if (tensorHandle->
Import(passThroughTensorHandle->Map(), m_NetworkProperties.m_InputSource))
1453 importedInputs.push_back(m_CurImportedInputId++);
1454 passThroughTensorHandle->Unmap();
1458 passThroughTensorHandle->Unmap();
1462 m_PreImportedInputHandles.push_back(std::move(importedTensorHandlePin));
1464 return importedInputs;
1471 if (!m_NetworkProperties.m_AsyncEnabled)
1476 throw MemoryImportException(
"ImportOutputs: Memory Import failed, NetworkProperties.m_ImportEnabled");
1479 if (outputTensors.size() != m_OptimizedNetwork->pOptimizedNetworkImpl->GetGraph().GetNumOutputs())
1483 std::vector<ImportedOutputId> importedOutputs;
1486 unsigned int outputIndex = 0;
1489 auto inputTensorHandle = m_PreImportedOutputHandles[outputIndex].m_TensorHandle.get();
1491 if (!inputTensorHandle)
1497 auto layerBindingId = outputLayer->GetBindingId();
1498 auto it = std::find_if(outputTensors.begin(), outputTensors.end(), [=] (
const auto& outputTensor)
1500 return outputTensor.first == layerBindingId;
1503 if (it == outputTensors.end())
1509 const auto outputTensor = *it;
1512 && inputTensorHandle->
Import(outputTensor.second.
GetMemoryArea(), forceImportMemorySource))
1514 importedOutputs.push_back(outputIndex);
1518 return importedOutputs;
1521 std::vector<ImportedOutputId> importedOutputs;
1524 for (
const auto& outputTensor : outputTensors)
1526 auto layerBindingId = outputTensor.first;
1534 throw MemoryImportException(fmt::format(
"ImportOutputs: Memory Import failed, unknown LayerBindingId: {}",
1538 const Layer* layer = *it;
1547 std::string er = backend->GetId();
1548 er +=
" does not have PreImportIOTensors capability";
1559 ImportedTensorHandlePin importedTensorHandlePin{layerBindingId,
1562 ITensorHandle* tensorHandle = importedTensorHandlePin.m_TensorHandle.get();
1567 "{} does not support importing from source {}" 1568 , factoryId, m_NetworkProperties.m_OutputSource));
1571 if (tensorHandle->
Import(outputTensor.second.
GetMemoryArea(), m_NetworkProperties.m_OutputSource))
1573 importedOutputs.push_back(m_CurImportedOutputId++);
1580 m_PreImportedOutputHandles.push_back(std::move(importedTensorHandlePin));
1583 return importedOutputs;
1588 for (
auto id : inputIds)
1590 if (
id > m_PreImportedInputHandles.size())
1595 auto& importedTensorHandle = m_PreImportedInputHandles[id].m_TensorHandle;
1596 if (!importedTensorHandle)
1599 fmt::format(
"ClearImportedInputs::ImportedInput with id: {} has already been deleted",
id));
1602 importedTensorHandle->Unimport();
1603 importedTensorHandle = {};
1609 for (
auto id : outputIds)
1611 if (
id > m_PreImportedOutputHandles.size())
1616 auto& importedTensorHandle = m_PreImportedOutputHandles[id].m_TensorHandle;
1617 if (!importedTensorHandle)
1620 fmt::format(
"ClearImportedOutputs::ImportedOutput with id: {} has already been deleted",
id));
1623 importedTensorHandle->Unimport();
1624 importedTensorHandle = {};
1631 std::vector<ImportedInputId> preImportedInputs,
1632 std::vector<ImportedOutputId> preImportedOutputs)
1634 const Graph& graph = m_OptimizedNetwork->pOptimizedNetworkImpl->GetGraph();
1636 if (inputTensors.size() + preImportedInputs.size() != graph.
GetNumInputs())
1638 if (preImportedInputs.empty())
1645 "Number of inputs + preImportedInputs provided does not match network.");
1649 if (outputTensors.size() + preImportedOutputs.size() != graph.
GetNumOutputs())
1651 if (preImportedOutputs.empty())
1654 "Number of outputs provided does not match network.");
1659 "Number of outputs + preImportedOutputs provided does not match network.");
1666 unsigned int index = 0;
1667 for (
auto pair : inputTensors)
1669 bindingIds[index++] = pair.first;
1673 bindingIds[index++] = ValidateImportedInputID(
id);
1675 for (
auto pair : outputTensors)
1677 bindingIds[index++] = pair.first;
1681 bindingIds[index++] = ValidateImportedOutputID(
id);
1686 auto resetMemHandle = [&]()
1690 const LayerBindingId layerBindingId = m_PreImportedInputHandles[id].m_LayerBindingId;
1692 auto inputHandle = workingMemHandle.
GetInputHandle(layerBindingId);
1694 for (
auto it : inputConnections)
1702 const LayerBindingId layerBindingId = m_PreImportedOutputHandles[id].m_LayerBindingId;
1707 for (
auto it : outputConnections)
1714 std::unique_ptr<TimelineUtilityMethods> timelineUtils =
1715 TimelineUtilityMethods::GetTimelineUtils(*m_ProfilingService);
1716 ProfilingGuid inferenceGuid = m_ProfilingService->GetNextGuid();
1720 ProfilingGuid networkGuid = m_OptimizedNetwork->GetGuid();
1721 timelineUtils->CreateTypedEntity(inferenceGuid,LabelsAndEventClasses::INFERENCE_GUID);
1722 timelineUtils->CreateRelationship(ProfilingRelationshipType::RetentionLink,
1725 LabelsAndEventClasses::EXECUTION_OF_GUID);
1726 timelineUtils->RecordEvent(inferenceGuid,LabelsAndEventClasses::ARMNN_PROFILING_SOL_EVENT_CLASS);
1729 bool executionSucceeded =
true;
1734 timelineUtils->RecordEvent(inferenceGuid,LabelsAndEventClasses::ARMNN_PROFILING_EOL_EVENT_CLASS);
1735 timelineUtils->Commit();
1745 for (
auto pair : inputTensors)
1747 EnqueueInput(pair.second, workingMemHandle.
GetInputHandle(pair.first));
1753 const ImportedTensorHandlePin& importedInputPin = m_PreImportedInputHandles[id];
1754 const LayerBindingId layerBindingId = m_PreImportedInputHandles[id].m_LayerBindingId;
1755 const auto& preimportedHandle = importedInputPin.m_TensorHandle;
1758 for (
auto it : inputConnections)
1760 *it = preimportedHandle.get();
1766 if (m_NetworkProperties.m_ExportEnabled)
1768 for (
auto pair: outputTensors)
1770 ImportOutputTensor(pair.second, workingMemHandle.
GetOutputHandle(pair.first));
1776 const ImportedTensorHandlePin& importedOutputPin = m_PreImportedOutputHandles[id];
1777 const LayerBindingId layerBindingId = m_PreImportedOutputHandles[id].m_LayerBindingId;
1778 const auto& preimportedHandle = importedOutputPin.m_TensorHandle;
1782 for (
auto it : outputConnections)
1784 *it = preimportedHandle.get();
1789 auto Fail = [&](
const std::exception&
error)
1791 ARMNN_LOG(error) <<
"An error occurred attempting to execute a workload: " << error.what();
1792 executionSucceeded =
false;
1794 ProfilingDynamicGuid workloadInferenceID(0);
1798 for (
unsigned int i = 0; i < m_WorkloadQueue.size(); ++i)
1800 auto& workload = m_WorkloadQueue[i];
1803 workloadInferenceID = timelineUtils->RecordWorkloadInferenceAndStartOfLifeEvent(workload->GetGuid(),
1810 timelineUtils->RecordEndOfLifeEvent(workloadInferenceID);
1819 catch (
const std::runtime_error& error)
1830 if (!m_NetworkProperties.m_ExportEnabled)
1832 for (
auto pair: outputTensors)
1852 Graph& order = m_OptimizedNetwork->pOptimizedNetworkImpl->GetGraph();
1855 std::vector<std::unique_ptr<ITensorHandle>> managedTensorHandles;
1857 std::vector<std::unique_ptr<ITensorHandle>> unmanagedTensorHandles;
1859 std::vector<WorkingMemDescriptor> workingMemDescriptors;
1860 std::unordered_map<LayerGuid, WorkingMemDescriptor> workingMemDescriptorMap;
1862 auto GetTensorHandle = [&](
Layer* layer,
const OutputSlot& outputSlot)
1865 const TensorInfo& tensorInfo = outputSlot.GetTensorInfo();
1871 return m_WorkloadFactories.at(
id)->CreateTensorHandle(tensorInfo,
false);
1886 bool m_IsInputLayerHandle =
false;
1887 bool m_IsOutputLayerHandle =
false;
1893 std::unordered_map<const OutputSlot*, HandleInfo> outputToHandleInfoMap;
1895 unsigned int layerIndex = 0;
1896 for (
auto&& layer : order)
1906 bool isMemoryManaged =
true;
1907 bool isInputLayer =
false;
1908 bool isOutputLayer =
false;
1909 bool isConnectedToOutputLayer =
false;
1915 isInputLayer =
true;
1916 isMemoryManaged = !m_NetworkProperties.m_ImportEnabled;
1920 isOutputLayer =
true;
1923 unsigned int slotIndex = 0;
1928 for (
unsigned int i = 0; i < slot.GetNumConnections(); ++i)
1932 if (!isConnectedToOutputLayer)
1934 isConnectedToOutputLayer =
true;
1936 isMemoryManaged = !m_NetworkProperties.m_ExportEnabled;
1942 fmt::format(
"Layer name: '{0}' guid: '{1}' has two or more OutputLayers connected to it. " 1943 "This will prevent importing on the connected OutputLayers.",
1945 isMemoryManaged =
true;
1951 if (isMemoryManaged)
1953 managedTensorHandles.emplace_back(GetTensorHandle(layer, slot));
1954 tensorHandle = managedTensorHandles.back().get();
1958 unmanagedTensorHandles.emplace_back(GetTensorHandle(layer, slot));
1959 tensorHandle = unmanagedTensorHandles.back().get();
1962 workingMemDescriptor.
m_Outputs.push_back(tensorHandle);
1964 HandleInfo& handleInfo = outputToHandleInfoMap[&slot];
1965 handleInfo.m_TensorHandle = tensorHandle;
1968 if (isConnectedToOutputLayer)
1970 handleInfo.m_IsOutputLayerHandle =
true;
1971 handleInfo.m_OutputMemDescriptorCoords.m_OutputSlotCoords = {layerIndex, slotIndex};
1976 handleInfo.m_IsInputLayerHandle =
true;
1978 handleInfo.m_InputMemDescriptorCoords.m_LayerBindingId = bindingId;
1989 auto outputSlot = slot.GetConnectedOutputSlot();
1990 auto key = outputSlot->GetOwningLayer().GetGuid();
1993 auto found = m_ConstantTensorHandles.find(key);
1994 if (found != m_ConstantTensorHandles.end())
1997 workingMemDescriptor.
m_Inputs.push_back(tensorHandle);
2005 HandleInfo& handleInfo = outputToHandleInfoMap[outputSlot];
2006 handleInfo.m_TensorHandle = tensorHandle;
2007 handleInfo.m_IsOutputLayerHandle =
true;
2008 handleInfo.m_OutputMemDescriptorCoords.m_LayerBindingIds.push_back(bindingId);
2009 handleInfo.m_OutputMemDescriptorCoords.m_InputSlotCoords.push_back({layerIndex, 0});
2014 HandleInfo& handleInfo = outputToHandleInfoMap.at(outputSlot);
2016 ITensorHandle* inputTensorHandle = handleInfo.m_TensorHandle;
2017 workingMemDescriptor.
m_Inputs.push_back(inputTensorHandle);
2023 handleInfo.m_OutputMemDescriptorCoords.m_LayerBindingIds.push_back(bindingId);
2024 handleInfo.m_OutputMemDescriptorCoords.m_InputSlotCoords.push_back({layerIndex, 0});
2028 else if (handleInfo.m_IsOutputLayerHandle)
2030 handleInfo.m_OutputMemDescriptorCoords.m_InputSlotCoords.push_back({layerIndex, slot.GetSlotIndex()});
2035 if (handleInfo.m_IsInputLayerHandle)
2037 std::pair<LayerGuid, unsigned int> connectionLocation{layerIndex, slot.GetSlotIndex()};
2038 handleInfo.m_InputMemDescriptorCoords.m_InputSlotCoords.emplace_back(connectionLocation);
2041 workingMemDescriptorMap.insert({layer->
GetGuid(), workingMemDescriptor});
2047 workingMemDescriptors.push_back(workingMemDescriptor);
2052 std::vector<std::pair<std::shared_ptr<TensorMemory>,
MemorySource>> tensorMemory;
2054 auto externalMemoryManager = CreateExternalMemoryManger(tensorMemory);
2057 std::sort(tensorMemory.begin(), tensorMemory.end(),
2058 [](
const std::pair<std::shared_ptr<TensorMemory>,
MemorySource>& lhs,
2059 const std::pair<std::shared_ptr<TensorMemory>,
MemorySource>& rhs)
2061 return lhs.first->m_OutputSlotId < rhs.first->m_OutputSlotId;
2064 std::vector<WorkingMemHandle::InputMemDescriptorCoords> inputConnectionsInfo;
2065 std::vector<WorkingMemHandle::OutputMemDescriptorCoords> outputConnectionsInfo;
2067 for (
const auto& handleInfo: outputToHandleInfoMap)
2069 if (handleInfo.second.m_IsOutputLayerHandle)
2071 outputConnectionsInfo.emplace_back(handleInfo.second.m_OutputMemDescriptorCoords);
2074 if (handleInfo.second.m_IsInputLayerHandle)
2076 inputConnectionsInfo.emplace_back(handleInfo.second.m_InputMemDescriptorCoords);
2080 return std::make_unique<WorkingMemHandle>(networkId,
2081 inputConnectionsInfo,
2082 outputConnectionsInfo,
2083 workingMemDescriptors,
2084 workingMemDescriptorMap,
2085 std::move(externalMemoryManager),
2086 std::move(tensorMemory),
2087 std::move(managedTensorHandles),
2088 std::move(unmanagedTensorHandles));
2093 for (
auto&& workloadPtr: m_WorkloadQueue)
2095 workloadPtr.get()->RegisterDebugCallback(func);
2100 void LoadedNetwork::CreateMemoryProfileAsync()
2104 unsigned int m_StartOfLife;
2105 unsigned int m_Lifetime;
2108 unsigned int m_Index;
2113 auto align = [](
size_t numToAlign)
2115 const size_t alignment =
sizeof(float);
2116 return ((numToAlign + alignment - 1) / alignment) * alignment;
2119 std::unordered_map<const OutputSlot*, PartialBlock> memBlockTrackerMap;
2124 unsigned int timestep = 0;
2125 unsigned int outputIndex = 0;
2128 for (
auto&& layer : order)
2152 if (!m_SupportsExternallyManagedMemory[backendId])
2157 PartialBlock partialBlock;
2159 partialBlock.m_StartOfLife = timestep;
2161 size_t alignedSize = align(outputSlot.GetOutputHandler().GetTensorInfo().GetNumBytes());
2162 partialBlock.m_MemSize = alignedSize;
2163 partialBlock.m_Index = outputIndex++;
2164 partialBlock.m_Lifetime = outputSlot.GetNumConnections();
2165 partialBlock.m_BackendId = backendId;
2167 if (partialBlock.m_Lifetime == 0)
2169 m_MemBlockMap[partialBlock.m_BackendId].emplace_back(partialBlock.m_StartOfLife,
2170 partialBlock.m_StartOfLife,
2171 partialBlock.m_MemSize,
2173 partialBlock.m_Index);
2177 memBlockTrackerMap[&outputSlot] = partialBlock;
2183 const Layer& connectedInputLayer = inputSlot.GetConnectedOutputSlot()->GetOwningLayer();
2195 auto outputSlot = inputSlot.GetConnectedOutputSlot();
2197 PartialBlock& partialBlock = memBlockTrackerMap.at(outputSlot);
2199 auto& lifetime = partialBlock.m_Lifetime;
2204 m_MemBlockMap[partialBlock.m_BackendId].emplace_back(partialBlock.m_StartOfLife,
2206 partialBlock.m_MemSize,
2208 partialBlock.m_Index);
2215 void LoadedNetwork::CreateMemoryProfile()
2219 auto TraceSubTensorHandleAncestry = [](
ITensorHandle*
const subTensorHandle)
2222 while (ancestor && ancestor->
GetParent())
2231 unsigned int m_StartOfLife;
2232 unsigned int m_Lifetime;
2235 unsigned int m_Index;
2240 auto align = [](
size_t numToAlign)
2242 const size_t alignment =
sizeof(float);
2243 return ((numToAlign + alignment - 1) / alignment) * alignment;
2246 std::unordered_map<ITensorHandle*, PartialBlock> memBlockTrackerMap;
2251 unsigned int timestep = 0;
2252 unsigned int outputIndex = 0;
2255 for (
auto&& layer : order)
2279 if (!m_SupportsExternallyManagedMemory[backendId])
2284 ITensorHandle* tensorHandle = outputSlot.GetOutputHandler().GetData();
2285 tensorHandle = TraceSubTensorHandleAncestry(tensorHandle);
2287 if (memBlockTrackerMap.find(tensorHandle) == memBlockTrackerMap.end())
2289 PartialBlock partialBlock;
2291 partialBlock.m_StartOfLife = timestep;
2293 size_t alignedSize = align(outputSlot.GetOutputHandler().GetTensorInfo().GetNumBytes());
2294 partialBlock.m_MemSize = alignedSize;
2295 partialBlock.m_Index = outputIndex++;
2296 partialBlock.m_Lifetime = outputSlot.GetNumConnections();
2297 partialBlock.m_BackendId = backendId;
2299 if (partialBlock.m_Lifetime == 0)
2301 m_MemBlockMap[partialBlock.m_BackendId].emplace_back(partialBlock.m_StartOfLife,
2302 partialBlock.m_StartOfLife,
2303 partialBlock.m_MemSize,
2305 partialBlock.m_Index);
2309 memBlockTrackerMap[tensorHandle] = partialBlock;
2311 m_Tensorhandles.push_back(tensorHandle);
2316 memBlockTrackerMap.at(tensorHandle).m_Lifetime += outputSlot.GetNumConnections();
2322 const Layer& connectedInputLayer = inputSlot.GetConnectedOutputSlot()->GetOwningLayer();
2333 if (!m_SupportsExternallyManagedMemory[connectedInputLayer.
GetBackendId()])
2338 auto outputSlot = inputSlot.GetConnectedOutputSlot();
2340 ITensorHandle* tensorHandle = outputSlot->GetOutputHandler().GetData();
2341 tensorHandle = TraceSubTensorHandleAncestry(tensorHandle);
2343 PartialBlock& partialBlock = memBlockTrackerMap.at(tensorHandle);
2345 auto& lifetime = partialBlock.m_Lifetime;
2350 m_MemBlockMap[partialBlock.m_BackendId].emplace_back(partialBlock.m_StartOfLife,
2352 partialBlock.m_MemSize,
2354 partialBlock.m_Index);
2362 std::unique_ptr<MemoryManager> LoadedNetwork::CreateExternalMemoryManger(
2363 std::vector<std::pair<std::shared_ptr<TensorMemory>,
MemorySource>>& tensorMemoryVec)
2365 std::unique_ptr<MemoryManager> memoryManager = std::make_unique<MemoryManager>();
2368 for (
auto& backend : m_MemBinMap)
2370 std::vector<BufferStorage> bufferStorageVec;
2372 std::shared_ptr<ICustomAllocator> backendAllocator;
2373 if (allocatorMap.find(backend.first) != allocatorMap.end())
2375 backendAllocator = allocatorMap[backend.first];
2379 backendAllocator = m_Backends[backend.first]->GetDefaultAllocator();
2382 for (
auto& memBin : backend.second)
2388 for (
auto& memBlock : memBin.m_MemBlocks)
2390 auto tensorMemory = std::make_shared<TensorMemory>(
TensorMemory{memBlock.
m_Offset, memBlock.m_Index});
2392 tensorMemoryVec.emplace_back(tensorMemory, backendAllocator->GetMemorySourceType());
2396 bufferStorageVec.emplace_back(std::move(bufferStorage));
2399 memoryManager->StoreMemToAllocate(bufferStorageVec, backendAllocator, 4);
2402 return memoryManager;
2409 const auto& importedTensorHandlePin = m_PreImportedInputHandles.at(
id);
2410 if (!importedTensorHandlePin.m_TensorHandle)
2413 "PreImportedInput: {} has been deleted",
id));
2415 return importedTensorHandlePin.m_LayerBindingId;
2417 catch (
const std::out_of_range&)
2427 const auto& importedTensorHandlePin = m_PreImportedOutputHandles.at(
id);
2428 if (!importedTensorHandlePin.m_TensorHandle)
2431 "PreImportedOutput: {} has been deleted",
id));
2433 return importedTensorHandlePin.m_LayerBindingId;
2435 catch (
const std::out_of_range&)
Status Execute(const InputTensors &inputTensors, const OutputTensors &outputTensors, IWorkingMemHandle &workingMemHandle, std::vector< ImportedInputId > preImportedInputs={}, std::vector< ImportedOutputId > preImportedOutputs={})
Thread safe execution of the loaded network.
std::vector< std::shared_ptr< TensorMemory > > m_TensorMemoryVector
Vector of pointer to .
std::unique_ptr< IWorkingMemHandle > CreateWorkingMemHandle(NetworkId networkId)
Create a new unique WorkingMemHandle object.
bool HasCapability(const std::string &name, const BackendCapabilities &capabilities)
Convenience function to check if a capability exists in a BackendCapabilites struct.
virtual bool Import(void *memory, MemorySource source)
Import externally allocated memory.
FactoryFunction GetFactory(const BackendId &id) const
std::unique_ptr< IWorkloadFactory > IWorkloadFactoryPtr
unsigned int GetNumInputSlots() const override
Returns the number of connectable input slots.
static ProfilerManager & GetInstance()
#define ARMNN_NO_DEPRECATE_WARN_BEGIN
virtual IMemoryManagerUniquePtr CreateMemoryManager() const
LayerBindingId GetBindingId() const
virtual unsigned int GetImportFlags() const
Get flags describing supported import sources.
const bool m_AsyncEnabled
const armnn::Tensor GetOutputTensor(const LayerBindingId layerId, const OutputTensors &outputTensors)
MemoryOptimizerStrategiesMapRef GetMemoryOptimizerStrategies()
unsigned int ImportedOutputId
WorkingMemDescriptor & GetWorkingMemDescriptorAt(unsigned int id) override
Get the WorkingMemDescriptor at an index.
size_t m_Offset
Number of bytes the value is away from the .m_Buffer.
virtual void Allocate()=0
Indicate to the memory manager that this resource is no longer active.
TensorInfo GetInputTensorInfo(LayerBindingId layerId) const
#define ARMNN_LOG(severity)
size_t m_BufferSize
Total size of the buffer.
ITensorHandle * GetOutputHandle(LayerBindingId layerBindingId) const
BackendRegistry & BackendRegistryInstance()
std::vector< std::pair< LayerBindingId, class ConstTensor > > InputTensors
const ProfilingDetailsMethod m_OutputNetworkDetailsMethod
unsigned int MemorySourceFlags
MemoryType GetMemoryArea() const
size_t GetNumOutputs() const
void CopyToOutputTensor(const Tensor &outputTensor, ITensorHandle *outputTensorHandle)
TensorInfo GetOutputTensorInfo(LayerBindingId layerId) const
Copyright (c) 2021 ARM Limited and Contributors.
void IgnoreUnused(Ts &&...)
const std::vector< InputSlot > & GetInputSlots() const
std::vector< ImportedInputId > ImportInputs(const InputTensors &inputTensors, MemorySource forceImportMemorySource=MemorySource::Undefined)
std::function< void(LayerGuid guid, unsigned int slotIndex, ITensorHandle *tensorHandle)> DebugCallbackFunction
Define the type of callback for the Debug layer to call.
unsigned int GetNumOutputSlots() const override
Returns the number of connectable output slots.
int LayerBindingId
Type of identifiers for bindable layers (inputs, outputs).
#define ARMNN_SCOPED_PROFILING_EVENT(backendId, name)
virtual const BackendId & GetId() const =0
ConstIteratorOutputs begin() const
A tensor defined by a TensorInfo (shape and data type) and a mutable backing store.
virtual IWorkloadFactoryPtr CreateWorkloadFactory(const IMemoryManagerSharedPtr &memoryManager=nullptr) const =0
std::vector< ITensorHandle * > m_Inputs
unsigned int GetNumConnections() const override
const InputSlot & GetInputSlot(unsigned int index) const override
Get a const input slot handle by slot index.
std::vector< TensorInfo > m_InputTensorInfos
const std::vector< std::vector< ITensorHandle * >::iterator > & GetOutputConnection(LayerBindingId layerBindingId) const
void ValidateBindingIds()
#define ARMNN_NO_DEPRECATE_WARN_END
#define ARMNN_ASSERT_MSG(COND, MSG)
bool SupportsTensorAllocatorAPI() const
#define ARMNN_SCOPED_HEAP_PROFILING(TAG)
A tensor defined by a TensorInfo (shape and data type) and an immutable backing store.
virtual ITensorHandle * GetParent() const =0
Get the parent tensor if this is a subtensor.
std::vector< std::pair< LayerBindingId, class Tensor > > OutputTensors
const std::string & GetNameStr() const
LayerType GetType() const override
Returns the armnn::LayerType of this layer.
void SendNetworkStructure(arm::pipe::IProfilingService &profilingService)
const std::vector< std::vector< ITensorHandle * >::iterator > & GetInputConnections(LayerBindingId layerBindingId) const
#define ARMNN_ASSERT(COND)
void ClearImportedInputs(const std::vector< ImportedInputId > inputIds)
std::vector< TensorInfo > m_OutputTensorInfos
static bool IsLayerSupported(const BackendId &backendId, const IConnectableLayer &layer, Optional< DataType > dataType, std::string &outReasonIfUnsupported)
ITensorHandle * GetData() const
Gets the allocated tensor memory.
std::vector< std::unique_ptr< IWorkload > > WorkloadQueue
const TensorInfo & GetInfo() const
const BackendId & GetBackendId() const
void Allocate() override
Allocate the backing memory required for execution.
arm::pipe::ProfilingGuid GetNetworkGuid()
const std::vector< OutputSlot > & GetOutputSlots() const
virtual bool CanBeImported(void *memory, MemorySource source)
Implementations must determine if this memory block can be imported.
OutputLayersAccessor GetOutputLayers() const
Returns a wrapper object with begin(), end() methods to iterate over the output layers in a range-bas...
unsigned int ImportedInputId
Status EnqueueWorkload(const InputTensors &inputTensors, const OutputTensors &outputTensors, std::vector< ImportedInputId > preImportedInputIds={}, std::vector< ImportedOutputId > preImportedOutputIds={})
Single thread execution of the loaded network.
void RegisterProfiler(IProfiler *profiler)
virtual const void * Map(bool blocking=true) const =0
Map the tensor data for access.
std::vector< LayerBindingId > & GetBindingIdVector()
std::unordered_map< BackendId, std::shared_ptr< ICustomAllocator > > GetAllocators()
virtual BackendCapabilities GetCapabilities() const
Returns a BackendCapability if the backend lists the capability The BackendCapability must then be in...
virtual void Unmap() const =0
Unmap the tensor data.
bool IsAllocated() override
IsAllocated returns true if the backing memory is currently allocated.
std::vector< ITensorHandle * > m_Outputs
Base class for all ArmNN exceptions so that users can filter to just those.
const OutputHandler & GetOutputHandler(unsigned int i=0) const
std::vector< ImportedOutputId > ImportOutputs(const OutputTensors &outputTensors, MemorySource forceImportMemorySource=MemorySource::Undefined)
MemorySource
Define the Memory Source to reduce copies.
const std::string & Get() const
void RegisterDebugCallback(const DebugCallbackFunction &func)
ConstIteratorOutputs end() const
std::vector< ITensorHandle * > m_Outputs
std::enable_if_t< std::is_unsigned< Source >::value &&std::is_unsigned< Dest >::value, Dest > numeric_cast(Source source)
Contains information about TensorInfos of a layer.
const char * GetName() const override
Returns the name of the layer.
ITensorHandleFactory::FactoryId GetTensorHandleFactoryId() const
bool CheckFlag(MemorySourceFlags flags, MemorySource source)
void CopyTensorContentsGeneric(const ITensorHandle *srcTensor, ITensorHandle *dstTensor, CopyFunc copy)
Graph & TopologicalSort()
Sorts layers in topological order and return this.
InputLayersAccessor GetInputLayers() const
Returns a wrapper object with begin(), end() methods to iterate over the input layers in a range-base...
std::vector< ITensorHandle * > m_Inputs
size_t GetNumLayers() const
const armnn::ConstTensor GetInputTensor(const LayerBindingId layerId, const InputTensors &inputTensors)
const TensorInfo & GetTensorInfo(const ITensorHandle *tensorHandle)
float32 helpers
const TensorInfo & GetTensorInfo() const override
static std::unique_ptr< LoadedNetwork > MakeLoadedNetwork(std::unique_ptr< IOptimizedNetwork > net, std::string &errorMessage, const INetworkProperties &networkProperties, arm::pipe::IProfilingService *profilingService)
ITensorHandle * GetInputHandle(LayerBindingId layerBindingId) const
size_t GetNumInputs() const
virtual std::unique_ptr< ITensorHandle > CreateTensorHandle(const TensorInfo &tensorInfo) const =0
static const FactoryId LegacyFactoryId
const bool m_ProfilingEnabled
void ClearImportedOutputs(const std::vector< ImportedOutputId > outputIds)
LayerType
When adding a new layer, adapt also the LastLayer enum value in the enum class LayerType below...
LayerGuid GetGuid() const final
Returns the unique id of the layer.