29 #include <common/include/Processes.hpp> 31 #include <fmt/format.h> 42 template <
typename ExceptionType>
43 std::string ToErrorMessage(
const char * prefix,
const ExceptionType &
error)
46 ss << prefix <<
" " << error.what();
50 void AddLayerStructure(std::unique_ptr<TimelineUtilityMethods>& timelineUtils,
52 ProfilingGuid networkGuid)
55 std::string layerName = layer.GetNameStr().empty() ?
"<Unnamed>" : layer.GetNameStr();
56 timelineUtils->CreateNamedTypedChildEntity(layer.GetGuid(),
59 LabelsAndEventClasses::LAYER_GUID);
60 for (
auto&& input : layer.GetInputSlots())
62 const IOutputSlot* source = input.GetConnectedOutputSlot();
64 timelineUtils->CreateConnectionRelationship(ProfilingRelationshipType::RetentionLink,
65 source->GetOwningLayerGuid(),
70 void AddWorkloadStructure(std::unique_ptr<TimelineUtilityMethods>& timelineUtils,
71 std::unique_ptr<IWorkload>& workload,
75 timelineUtils->CreateTypedEntity(workload->GetGuid(), LabelsAndEventClasses::WORKLOAD_GUID);
76 timelineUtils->MarkEntityWithLabel(workload->GetGuid(),
77 layer.GetBackendId().Get(),
78 LabelsAndEventClasses::BACKENDID_GUID);
81 timelineUtils->CreateRelationship(ProfilingRelationshipType::RetentionLink,
84 LabelsAndEventClasses::CHILD_GUID);
103 const vector<BackendOptions>::iterator& backendItr =
104 find_if(optimizedOptions.begin(), optimizedOptions.end(), [](
const BackendOptions& backend) {
105 if (backend.GetBackendId().Get() ==
"Global")
114 bool importEnabled =
false;
115 bool exportEnabled =
false;
116 if (backendItr != optimizedOptions.end())
119 for (
size_t i = 0; i < backendItr->GetOptionCount(); i++)
122 if (option.
GetName() ==
"ImportEnabled")
126 if (option.
GetName() ==
"ExportEnabled")
138 auto message = fmt::format(
"The input memory source specified, '{0}',", networkProperties.
m_InputSource);
141 message.append(
" requires that memory import be enabled. However, " 142 "it was disabled when this network was optimized.");
146 message.append(
" requires that memory import be disabled. However, " 147 "it was enabled when this network was optimized.");
155 auto message = fmt::format(
"The output memory source specified, '{0}',", networkProperties.
m_OutputSource);
158 message.append(
" requires that memory export be enabled. However, " 159 "it was disabled when this network was optimized.");
163 message.append(
" requires that memory export be disabled. However, " 164 "it was enabled when this network was optimized.");
171 std::string& errorMessage,
173 arm::pipe::IProfilingService* profilingService)
175 std::unique_ptr<LoadedNetwork> loadedNetwork;
177 auto Fail = [&](
const std::exception&
error) -> std::unique_ptr<LoadedNetwork>
179 errorMessage = ToErrorMessage(
"An error occurred when preparing the network workloads: ", error);
182 return std::unique_ptr<LoadedNetwork>();
187 loadedNetwork.reset(
new LoadedNetwork(std::move(net), networkProperties, profilingService));
197 catch (
const std::runtime_error& error)
202 return loadedNetwork;
205 LoadedNetwork::LoadedNetwork(std::unique_ptr<IOptimizedNetwork> net,
207 arm::pipe::IProfilingService* profilingService) :
208 m_OptimizedNetwork(std::move(net)),
209 m_NetworkProperties(networkProperties),
210 m_TensorHandleFactoryRegistry(),
211 m_ProfilingService(profilingService)
215 const std::shared_ptr<IProfiler>& profiler = m_OptimizedNetwork->GetProfiler();
225 m_NetworkProperties);
232 bool useExternalMemoryManager =
false;
233 bool useInternalMemoryManager =
false;
234 Graph& order = m_OptimizedNetwork->pOptimizedNetworkImpl->GetGraph();
241 m_IsInputImported = std::vector<bool>(order.
GetNumInputs(),
false);
242 m_IsOutputImported = std::vector<bool>(order.
GetNumOutputs(),
false);
245 for (
auto&& layer : order)
247 auto const& backendId = layer->GetBackendId();
248 if (m_Backends.count(backendId) == 0)
251 auto it = m_Backends.emplace(std::make_pair(backendId, createBackend()));
260 std::string er = backend->
GetId();
261 er +=
" does not support AsyncExecution";
267 std::string er = backend->
GetId();
268 er +=
" does not support ExternallyManagedMemory\n";
269 er +=
"AsyncEnabled networks require all backends to support ExternallyManagedMemory";
272 m_SupportsExternallyManagedMemory[backend->
GetId()] =
true;
273 useExternalMemoryManager =
true;
277 m_SupportsExternallyManagedMemory[backend->
GetId()] =
false;
278 useInternalMemoryManager =
true;
285 m_TensorHandleFactoryRegistry,
286 m_OptimizedNetwork->pOptimizedNetworkImpl->GetModelOptions(),
288 static_cast<MemorySourceFlags>(m_NetworkProperties.m_OutputSource));
294 m_BackendMemoryMangers.back(), m_OptimizedNetwork->pOptimizedNetworkImpl->GetModelOptions());
296 m_WorkloadFactories[backendId ] = std::move(workloadFactory);
302 for (
auto&& layer : order)
304 auto& workloadFactory = GetWorkloadFactory(*layer);
305 bool supportsExternalManager = m_SupportsExternallyManagedMemory[layer->GetBackendId()];
307 switch (layer->GetType())
314 layer->CreateTensorHandles(m_TensorHandleFactoryRegistry,
316 !supportsExternalManager && !m_NetworkProperties.m_ImportEnabled);
321 layer->CreateTensorHandles(m_TensorHandleFactoryRegistry, workloadFactory,
true);
328 if ((layer->GetNumOutputSlots() == 1) &&
329 (layer->GetOutputSlots()[0].GetNumConnections() == 1) &&
330 (layer->GetOutputSlots()[0].GetConnection(0)->GetOwningLayer().GetType() ==
LayerType::Output))
332 layer->CreateTensorHandles(m_TensorHandleFactoryRegistry,
334 !supportsExternalManager && !m_NetworkProperties.m_ExportEnabled);
338 layer->CreateTensorHandles(m_TensorHandleFactoryRegistry,
340 !supportsExternalManager);
347 ProfilingGuid networkGuid = m_OptimizedNetwork->GetGuid();
348 std::unique_ptr<TimelineUtilityMethods> timelineUtils =
349 TimelineUtilityMethods::GetTimelineUtils(*m_ProfilingService);
352 timelineUtils->CreateTypedEntity(networkGuid, LabelsAndEventClasses::NETWORK_GUID);
354 timelineUtils->RecordEvent(networkGuid, LabelsAndEventClasses::ARMNN_PROFILING_SOL_EVENT_CLASS);
356 int processID = arm::pipe::GetCurrentProcessId();
357 std::stringstream ss;
359 timelineUtils->MarkEntityWithLabel(networkGuid, ss.str(), LabelsAndEventClasses::PROCESS_ID_GUID);
362 std::vector<IWorkload*> ConstWorkloads;
367 for (
auto&& layer: order)
372 AddLayerStructure(timelineUtils, *layer, networkGuid);
377 switch (layer->GetType())
387 auto workload = layer->CreateWorkload(workloadFactory);
391 const char*
const layerName =
392 layer->GetNameStr().length() != 0 ? layer->GetName() :
"<Unnamed>";
394 fmt::format(
"No workload created for layer (name: '{0}' type: '{1}') (compute '{2}')",
395 layerName, static_cast<int>(layer->GetType()), layer->GetBackendId().Get()
402 AddWorkloadStructure(timelineUtils, workload, *layer);
407 if((networkProperties.
m_AsyncEnabled || useExternalMemoryManager) &&
410 m_ConstantTensorHandles[layer->GetGuid()] =
411 layer->GetOutputSlot(0).GetOutputHandler().GetData();
412 m_ConstantWorkloads[layer->GetGuid()] = std::move(workload);
416 m_WorkloadQueue.push_back(std::move(workload));
421 ConstWorkloads.push_back(m_WorkloadQueue.back().get());
425 layer->ReleaseConstantData();
433 if (!networkProperties.
m_AsyncEnabled && m_WorkloadQueue.size() != 0)
441 const auto bindingId = layer->GetBindingId();
443 bool supportsReplacement =
true;
445 for (
const auto inputSlot: layer->GetOutputSlot(0).GetConnections())
447 auto workloadIndex = std::distance(order.begin(), order.GetPosInGraph(inputSlot->GetOwningLayer()));
448 workloadIndex -= noOfInputs;
450 m_InputWorkloadSlotPairs[bindingId].emplace_back(WorkloadIndices{
453 auto workload = m_WorkloadQueue[m_InputWorkloadSlotPairs[bindingId].back().m_WorkloadIndex].get();
454 supportsReplacement &= workload->SupportsTensorHandleReplacement();
460 m_TensorHandleFactoryRegistry.GetMatchingImportFactoryId(factoryId);
462 ITensorHandleFactory *importFactory = m_TensorHandleFactoryRegistry.GetFactory(importFactoryId);
464 if (supportsReplacement && importFactory)
466 m_PreImportedInputHandles.emplace_back(
467 bindingId, importFactory->
CreateTensorHandle(layer->GetOutputSlot(0).GetTensorInfo(),
false));
471 m_PreImportedInputHandles.emplace_back(bindingId,
nullptr);
479 const auto bindingId = layer->GetBindingId();
481 const auto outputSlot = layer->GetInputSlot(0).GetConnectedOutputSlot();
482 auto& indices = m_OutputWorkloadSlotPairs[bindingId];
484 auto workloadIndex = std::distance(order.begin(), order.GetPosInGraph(outputSlot->GetOwningLayer()));
485 workloadIndex -= noOfInputs;
487 indices.m_OutputSlotIndices = WorkloadIndices{
numeric_cast<
unsigned int>(workloadIndex),
488 outputSlot->CalculateIndexOnOwner()};
490 bool supportsReplacement =
true;
491 auto outputWorkload = m_WorkloadQueue[indices.m_OutputSlotIndices.m_WorkloadIndex].get();
492 supportsReplacement &= outputWorkload->SupportsTensorHandleReplacement();
494 for (
auto &inputSlot: outputSlot->GetConnections())
498 auto inWorkloadIndex = std::distance(order.begin(),
499 order.GetPosInGraph(inputSlot->GetOwningLayer()));
500 inWorkloadIndex -= noOfInputs;
501 indices.m_InputSlotIndices.emplace_back(WorkloadIndices{
numeric_cast<
unsigned int>(inWorkloadIndex),
502 inputSlot->GetSlotIndex()});
503 auto inputWorkload = m_WorkloadQueue[indices.m_InputSlotIndices.back().m_WorkloadIndex].get();
504 supportsReplacement &= inputWorkload->SupportsTensorHandleReplacement();
511 m_TensorHandleFactoryRegistry.GetMatchingImportFactoryId(factoryId);
512 ITensorHandleFactory *importFactory = m_TensorHandleFactoryRegistry.GetFactory(importFactoryId);
514 if (supportsReplacement && importFactory)
516 m_PreImportedOutputHandles.emplace_back(
521 m_PreImportedOutputHandles.emplace_back(bindingId,
nullptr);
526 for (
auto&& workloadFactory : m_WorkloadFactories)
528 workloadFactory.second->AfterWorkloadsCreated();
534 timelineUtils->Commit();
537 if (useExternalMemoryManager)
541 CreateMemoryProfileAsync();
545 CreateMemoryProfile();
549 for (
auto& backendMemoryProfile : m_MemBlockMap)
551 const BackendId& backendId = backendMemoryProfile.first;
552 if (backendStrategyMap.find(backendId) != backendStrategyMap.end())
554 m_MemBinMap[backendId] = backendStrategyMap[backendId]->Optimize(backendMemoryProfile.second);
558 m_MemBinMap[backendId] = m_ConstantStrategy->Optimize(backendMemoryProfile.second);
564 m_ExternalMemoryManager = CreateExternalMemoryManger(m_TensorMemory);
567 std::sort(m_TensorMemory.begin(), m_TensorMemory.end(),
568 [](
const std::pair<std::shared_ptr<TensorMemory>,
MemorySource>& lhs,
569 const std::pair<std::shared_ptr<TensorMemory>,
MemorySource>& rhs)
571 return lhs.first->m_OutputSlotId < rhs.first->m_OutputSlotId;
580 if (useInternalMemoryManager)
583 m_OptimizedNetwork->pOptimizedNetworkImpl->GetGraph().AllocateDynamicBuffers();
586 for (
auto &workload : m_WorkloadQueue)
588 workload->PostAllocationConfigure();
592 if (useExternalMemoryManager)
596 AllocateAndExecuteConstantWorkloads();
600 AllocateAndExecuteConstantWorkloadsAsync();
606 for (
auto workload: ConstWorkloads)
613 void LoadedNetwork::AllocateAndExecuteConstantWorkloads()
616 for (
auto& pair : m_ConstantWorkloads)
618 auto tensorHandle = m_ConstantTensorHandles[pair.first];
619 tensorHandle->Allocate();
620 pair.second->Execute();
624 void LoadedNetwork::AllocateAndExecuteConstantWorkloadsAsync()
627 Graph& order = m_OptimizedNetwork->pOptimizedNetworkImpl->GetGraph();
628 for (
auto&& layer : order)
632 const auto& outSlot = layer->GetOutputSlots()[0];
633 const auto factoryId = outSlot.GetTensorHandleFactoryId();
635 auto& workloadFactory = GetWorkloadFactory(*layer);
637 layer->CreateTensorHandles(m_TensorHandleFactoryRegistry, workloadFactory);
638 ITensorHandle* tensorHandle = outSlot.GetOutputHandler().GetData();
640 m_ConstantTensorHandles[layer->GetGuid()] = tensorHandle;
643 auto& backend = m_Backends.at(layer->GetBackendId());
646 memDesc.
m_Outputs.push_back(tensorHandle);
648 ExecutionData executionData = backend->CreateExecutionData(memDesc);
649 m_ConstantWorkloads[layer->GetGuid()]->ExecuteAsync(executionData);
658 ProfilingGuid networkGuid = m_OptimizedNetwork->GetGuid();
660 std::unique_ptr<TimelineUtilityMethods> timelineUtils =
661 TimelineUtilityMethods::GetTimelineUtils(profilingService);
663 timelineUtils->CreateTypedEntity(networkGuid, LabelsAndEventClasses::NETWORK_GUID);
665 for (
auto&& layer : order)
668 AddLayerStructure(timelineUtils, *layer, networkGuid);
669 switch (layer->GetType())
679 for (
auto& workload : m_WorkloadQueue)
682 AddWorkloadStructure(timelineUtils, workload, *layer);
689 timelineUtils->Commit();
694 return m_OptimizedNetwork->GetGuid();
699 for (
auto&& inputLayer : m_OptimizedNetwork->pOptimizedNetworkImpl->GetGraph().GetInputLayers())
701 ARMNN_ASSERT_MSG(inputLayer->GetNumOutputSlots() == 1,
"Input layer should have exactly 1 output slot");
702 if (inputLayer->GetBindingId() == layerId)
704 return inputLayer->GetOutputSlot(0).GetTensorInfo();
713 for (
auto&& outputLayer : m_OptimizedNetwork->pOptimizedNetworkImpl->GetGraph().GetOutputLayers())
715 ARMNN_ASSERT_MSG(outputLayer->GetNumInputSlots() == 1,
"Output layer should have exactly 1 input slot");
716 ARMNN_ASSERT_MSG(outputLayer->GetInputSlot(0).GetConnection(),
"Input slot on Output layer must be connected");
717 if (outputLayer->GetBindingId() == layerId)
719 return outputLayer->GetInputSlot(0).GetConnection()->GetTensorInfo();
730 auto it = m_WorkloadFactories.find(layer.
GetBackendId());
731 if (it == m_WorkloadFactories.end())
733 throw RuntimeException(fmt::format(
"No workload factory for {0} to be used for layer: {1}",
739 workloadFactory = it->second.get();
743 std::string reasonIfUnsupported;
747 m_OptimizedNetwork->pOptimizedNetworkImpl->GetModelOptions()),
748 "Factory does not support layer");
750 return *workloadFactory;
760 : m_TensorHandle(std::move(handle))
766 ITensorHandle* GetTensorHandle()
const {
return m_TensorHandle.get(); }
771 std::unique_ptr<ITensorHandle> m_TensorHandle;
777 const std::vector<TensorPin>& pins,
778 char const* bindingPointDesc)
780 auto it = std::find_if(pins.begin(), pins.end(),
781 [id](
const TensorPin& pin)
783 return pin.GetBindingId() == id;
786 if (it != pins.end())
802 m_InputTensorPins.reserve(inputTensors.size());
803 m_OutputTensorPins.reserve(outputTensors.size());
805 for (
auto inputTensorPair : inputTensors)
807 auto inputTensor = inputTensorPair.second;
809 std::unique_ptr<ITensorHandle> tensorHandle =
810 std::make_unique<ConstPassthroughTensorHandle>(inputTensor.GetInfo(),inputTensor.GetMemoryArea());
813 m_InputTensorPins.emplace_back(std::move(tensorHandle), inputTensor.GetInfo(), layerId);
816 for (
auto outputTensorPair : outputTensors)
818 auto outputTensor = outputTensorPair.second;
820 std::unique_ptr<ITensorHandle> tensorHandle =
821 std::make_unique<PassthroughTensorHandle>(outputTensor.GetInfo(), outputTensor.GetMemoryArea());
824 m_OutputTensorPins.emplace_back(std::move(tensorHandle), outputTensor.GetInfo(), layerId);
830 return GetTensorPin(
id, m_InputTensorPins,
"input");
835 return GetTensorPin(
id, m_OutputTensorPins,
"output");
840 std::vector<TensorPin> m_InputTensorPins;
841 std::vector<TensorPin> m_OutputTensorPins;
848 std::vector<ImportedInputId> preImportedInputIds,
849 std::vector<ImportedOutputId> preImportedOutputIds)
851 const Graph& graph = m_OptimizedNetwork->pOptimizedNetworkImpl->GetGraph();
856 ARMNN_LOG(
warning) <<
"IRuntime::EnqueueWorkload()::Less than two nodes in graph";
861 WorkloadData workloadData(inputTensors, outputTensors);
865 if (graph.
GetNumInputs() != (inputTensors.size() + preImportedInputIds.size()))
873 m_InputQueue.clear();
876 unsigned int inputIndex = 0;
877 unsigned int importedInputIdIndex = 0;
878 std::sort(preImportedInputIds.begin(), preImportedInputIds.end());
881 if (importedInputIdIndex < preImportedInputIds.size() &&
882 inputIndex == preImportedInputIds[importedInputIdIndex])
885 if (!m_IsInputImported[inputIndex])
887 auto outputTensorHandle = m_PreImportedInputHandles[inputIndex].m_TensorHandle.get();
889 for (
const auto& workloadInfo: m_InputWorkloadSlotPairs[inputLayer->GetBindingId()])
891 auto workload = m_WorkloadQueue[workloadInfo.m_WorkloadIndex].get();
892 workload->ReplaceInputTensorHandle(outputTensorHandle, workloadInfo.m_SlotIndex);
894 m_IsInputImported[inputIndex] =
true;
896 importedInputIdIndex++;
900 if (m_IsInputImported[inputIndex])
904 for (
const auto& workloadInfo: m_InputWorkloadSlotPairs[inputLayer->GetBindingId()])
906 auto workload = m_WorkloadQueue[workloadInfo.m_WorkloadIndex].get();
907 workload->ReplaceInputTensorHandle(handler.
GetData(), workloadInfo.m_SlotIndex);
910 m_IsInputImported[inputIndex] =
false;
914 const TensorPin& pin = workloadData.GetInputTensorPin(inputLayer->GetBindingId());
915 EnqueueInput(*inputLayer, pin.GetTensorHandle(), pin.GetTensorInfo());
923 m_OutputQueue.clear();
931 unsigned int outputIndex = 0;
932 unsigned int importedOutputIdIndex = 0;
933 std::sort(preImportedOutputIds.begin(), preImportedOutputIds.end());
936 if (importedOutputIdIndex < preImportedOutputIds.size() &&
937 outputIndex == preImportedOutputIds[importedOutputIdIndex])
940 ITensorHandle* inputTensorHandle = m_PreImportedOutputHandles[outputIndex].m_TensorHandle.get();
942 if (!m_IsOutputImported[outputIndex])
944 const auto bindingId = outputLayer->GetBindingId();
945 const auto& indices = m_OutputWorkloadSlotPairs[bindingId];
947 auto outputWorkload = m_WorkloadQueue[indices.m_OutputSlotIndices.m_WorkloadIndex].get();
949 outputWorkload->ReplaceOutputTensorHandle(inputTensorHandle,
950 indices.m_OutputSlotIndices.m_SlotIndex);
952 for (
const auto& workloadInfo: indices.m_InputSlotIndices)
954 auto inputWorkload = m_WorkloadQueue[workloadInfo.m_WorkloadIndex].get();
955 inputWorkload->ReplaceInputTensorHandle(inputTensorHandle, workloadInfo.m_SlotIndex);
957 m_IsOutputImported[outputIndex] =
true;
960 ARMNN_ASSERT_MSG(inputTensorHandle !=
nullptr,
"Data should have been allocated.");
962 syncDesc.
m_Inputs.push_back(inputTensorHandle);
965 outputLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo());
966 auto syncWorkload = std::make_unique<SyncMemGenericWorkload>(syncDesc,
info);
968 m_OutputQueue.push_back(move(syncWorkload));
969 importedOutputIdIndex++;
973 if (m_IsOutputImported[outputIndex])
975 const auto bindingId = outputLayer->GetBindingId();
976 const auto& indices = m_OutputWorkloadSlotPairs[bindingId];
978 auto outputWorkload = m_WorkloadQueue[indices.m_OutputSlotIndices.m_WorkloadIndex].get();
980 outputLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetOutputHandler();
982 outputWorkload->ReplaceOutputTensorHandle(
983 outputHandler.
GetData(), indices.m_OutputSlotIndices.m_SlotIndex);
985 for (
const auto& workloadInfo: indices.m_InputSlotIndices)
987 auto inputWorkload = m_WorkloadQueue[workloadInfo.m_WorkloadIndex].get();
988 inputWorkload->ReplaceInputTensorHandle(outputHandler.
GetData(), workloadInfo.m_SlotIndex);
990 m_IsOutputImported[outputIndex] =
false;
993 const TensorPin& pin = workloadData.GetOutputTensorPin(outputLayer->GetBindingId());
995 EnqueueOutput(*outputLayer, pin.GetTensorHandle(), pin.GetTensorInfo());
1001 std::unique_ptr<TimelineUtilityMethods> timelineUtils =
1002 TimelineUtilityMethods::GetTimelineUtils(*m_ProfilingService);
1003 ProfilingGuid inferenceGuid = m_ProfilingService->GetNextGuid();
1007 ProfilingGuid networkGuid = m_OptimizedNetwork->GetGuid();
1008 timelineUtils->CreateTypedEntity(inferenceGuid, LabelsAndEventClasses::INFERENCE_GUID);
1009 timelineUtils->CreateRelationship(ProfilingRelationshipType::RetentionLink,
1012 LabelsAndEventClasses::EXECUTION_OF_GUID);
1013 timelineUtils->RecordEvent(inferenceGuid, LabelsAndEventClasses::ARMNN_PROFILING_SOL_EVENT_CLASS);
1016 bool executionSucceeded =
true;
1019 if (m_ProfilingService->IsProfilingEnabled())
1021 m_ProfilingService->IncrementCounterValue(INFERENCES_RUN);
1025 executionSucceeded = Execute(timelineUtils, inferenceGuid);
1031 timelineUtils->RecordEvent(inferenceGuid, LabelsAndEventClasses::ARMNN_PROFILING_EOL_EVENT_CLASS);
1032 timelineUtils->Commit();
1045 if (tensorHandle ==
nullptr)
1053 inputQueueDescriptor.
m_Inputs.push_back(tensorHandle);
1058 const TensorInfo& outputTensorInfo = handler.GetTensorInfo();
1061 "Data should have been allocated.");
1062 inputQueueDescriptor.
m_Outputs.push_back(outputTensorHandle);
1066 bool needMemCopy =
true;
1067 if (m_NetworkProperties.m_ImportEnabled)
1069 if(
CheckFlag(importFlags, m_NetworkProperties.m_InputSource))
1071 needMemCopy =
false;
1073 void* mem = tensorHandle->
Map(
false);
1074 if (outputTensorHandle->
Import(mem, m_NetworkProperties.m_InputSource))
1076 tensorHandle->
Unmap();
1079 tensorHandle->
Unmap();
1086 std::unique_ptr<IWorkload> inputWorkload = std::make_unique<CopyMemGenericWorkload>(inputQueueDescriptor,
info);
1090 std::unique_ptr<TimelineUtilityMethods> timelineUtils =
1091 TimelineUtilityMethods::GetTimelineUtils(*m_ProfilingService);
1095 AddWorkloadStructure(timelineUtils, inputWorkload, layer);
1096 timelineUtils->Commit();
1099 m_InputQueue.push_back(move(inputWorkload));
1110 if (tensorHandle ==
nullptr)
1118 outputQueueDescriptor.
m_Outputs.push_back(tensorHandle);
1126 const TensorInfo& inputTensorInfo = outputHandler.GetTensorInfo();
1128 ARMNN_ASSERT_MSG(inputTensorHandle !=
nullptr,
"Data should have been allocated.");
1137 bool needMemCopy =
true;
1138 if (m_NetworkProperties.m_ExportEnabled &&
1139 (layer.
GetInputSlots()[0].GetConnectedOutputSlot()->GetNumConnections() == 1))
1144 if (
CheckFlag(importFlags, m_NetworkProperties.m_OutputSource))
1146 needMemCopy =
false;
1147 void *mem = tensorHandle->
Map(
false);
1148 bool importOk = inputTensorHandle->
Import(mem, m_NetworkProperties.m_OutputSource);
1149 tensorHandle->
Unmap();
1155 syncDesc.
m_Inputs.push_back(inputTensorHandle);
1157 auto syncWorkload = std::make_unique<SyncMemGenericWorkload>(syncDesc,
info);
1159 m_OutputQueue.push_back(move(syncWorkload));
1171 outputQueueDescriptor.
m_Inputs.push_back(inputTensorHandle);
1174 std::unique_ptr<IWorkload> outputWorkload =
1175 std::make_unique<CopyMemGenericWorkload>(outputQueueDescriptor,
info);
1178 std::unique_ptr<TimelineUtilityMethods> timelineUtils =
1179 TimelineUtilityMethods::GetTimelineUtils(*m_ProfilingService);
1183 AddWorkloadStructure(timelineUtils, outputWorkload, layer);
1184 timelineUtils->Commit();
1187 m_OutputQueue.push_back(move(outputWorkload));
1191 void LoadedNetwork::AllocateWorkingMemory(
1192 #
if !defined(ARMNN_DISABLE_THREADS)
1193 std::lock_guard<std::mutex>& lock
1199 #if !defined(ARMNN_DISABLE_THREADS) 1203 if (m_IsWorkingMemAllocated)
1208 if (m_ExternalMemoryManager)
1210 m_ExternalMemoryManager->Allocate();
1212 for (
unsigned int i = 0; i < m_TensorMemory.size(); ++i)
1214 m_Tensorhandles[i]->Import(m_TensorMemory[i].first->m_Data, m_TensorMemory[i].second);
1218 for (
auto&& memoryManager : m_BackendMemoryMangers)
1222 memoryManager->Acquire();
1225 m_TensorHandleFactoryRegistry.AquireMemory();
1226 m_IsWorkingMemAllocated =
true;
1231 #if !defined(ARMNN_DISABLE_THREADS) 1232 std::lock_guard<std::mutex> lockGuard(m_WorkingMemMutex);
1235 if (!m_IsWorkingMemAllocated)
1240 if (m_ExternalMemoryManager)
1242 m_ExternalMemoryManager->Deallocate();
1246 for (
auto&& memoryManager : m_BackendMemoryMangers)
1250 memoryManager->Release();
1253 m_TensorHandleFactoryRegistry.ReleaseMemory();
1254 m_IsWorkingMemAllocated =
false;
1258 ProfilingGuid inferenceGuid)
1260 bool success =
true;
1262 auto Fail = [&](
const std::exception&
error)
1264 ARMNN_LOG(error) <<
"An error occurred attempting to execute a workload: " << error.what();
1270 #if !defined(ARMNN_DISABLE_THREADS) 1271 std::lock_guard<std::mutex> lockGuard(m_WorkingMemMutex);
1272 AllocateWorkingMemory(lockGuard);
1274 AllocateWorkingMemory();
1277 ProfilingDynamicGuid workloadInferenceID(0);
1278 auto ExecuteQueue = [&timelineUtils, &workloadInferenceID, &inferenceGuid](
WorkloadQueue& queue)
1280 for (
auto& workload : queue)
1284 workloadInferenceID = timelineUtils->RecordWorkloadInferenceAndStartOfLifeEvent(workload->GetGuid(),
1287 workload->Execute();
1290 timelineUtils->RecordEndOfLifeEvent(workloadInferenceID);
1295 ExecuteQueue(m_InputQueue);
1296 ExecuteQueue(m_WorkloadQueue);
1297 ExecuteQueue(m_OutputQueue);
1303 catch (
const std::runtime_error& error)
1313 if (m_NetworkProperties.m_ImportEnabled)
1316 if (
CheckFlag(importFlags, m_NetworkProperties.m_InputSource) )
1318 std::unique_ptr<ITensorHandle> tensorHandle =
1319 std::make_unique<ConstPassthroughTensorHandle>(inputTensor.
GetInfo(),
1321 void* mem = tensorHandle->
Map(
false);
1323 if (inputTensorHandle->
Import(mem, m_NetworkProperties.m_InputSource))
1325 tensorHandle->Unmap();
1328 tensorHandle->Unmap();
1338 std::unique_ptr<ITensorHandle> tensorHandle =
1339 std::make_unique<ConstPassthroughTensorHandle>(inputTensor.
GetInfo(), inputTensor.
GetMemoryArea());
1341 auto copyFunc = [](
void* dst,
const void* src,
size_t size)
1343 memcpy(dst, src, size);
1356 void LoadedNetwork::ImportOutputTensor(
const Tensor& outputTensor,
ITensorHandle* outputTensorHandle)
1358 ARMNN_ASSERT_MSG(outputTensorHandle !=
nullptr,
"Data should have been allocated.");
1360 if (
CheckFlag(importFlags, m_NetworkProperties.m_OutputSource))
1362 std::unique_ptr<ITensorHandle> tensorHandle =
1363 std::make_unique<PassthroughTensorHandle>(outputTensor.
GetInfo(),
1366 void* mem = tensorHandle->
Map(
false);
1367 bool importOk = outputTensorHandle->
Import(mem, m_NetworkProperties.m_OutputSource);
1368 tensorHandle->Unmap();
1377 throw MemoryExportException(
"ImportOutputTensor: Memory Export failed, attempting to export Input Layer");
1384 auto copyFunc = [](
void* dst,
const void* src,
size_t size)
1386 memcpy(dst, src, size);
1389 std::unique_ptr<ITensorHandle> tensorHandle =
1390 std::make_unique<PassthroughTensorHandle>(outputTensor.
GetInfo(),
1399 for (
auto inputTensorPair : inputTensors)
1404 return inputTensorPair.second;
1412 for (
auto outputTensorPair : outputTensors)
1417 return outputTensorPair.second;
1426 if (!m_NetworkProperties.m_AsyncEnabled)
1431 throw MemoryImportException(
"ImportInputs: Memory Import failed, NetworkProperties.m_ImportEnabled");
1434 if (inputTensors.size() > m_OptimizedNetwork->pOptimizedNetworkImpl->GetGraph().GetNumInputs())
1436 throw MemoryImportException(
"ImportInputs: The number of tensors provided exceeds the number of inputs.");
1439 std::vector<ImportedInputId> importedInputs;
1441 unsigned int inputIndex = 0;
1444 auto outputTensorHandle = m_PreImportedInputHandles[inputIndex].m_TensorHandle.get();
1446 if (!outputTensorHandle)
1452 auto layerBindingId = inputLayer->GetBindingId();
1453 auto it = std::find_if(inputTensors.begin(), inputTensors.end(), [=](
const auto& inputTensor)
1455 return inputTensor.first == layerBindingId;
1458 if (it == inputTensors.end())
1464 const auto& inputTensor = *it;
1465 std::unique_ptr<ITensorHandle> passThroughTensorHandle =
1466 std::make_unique<ConstPassthroughTensorHandle>(inputTensor.second.
GetInfo(),
1471 if (outputTensorHandle->
CanBeImported(passThroughTensorHandle->Map(), forceImportMemorySource)
1472 && (outputTensorHandle->
Import(passThroughTensorHandle->Map(), forceImportMemorySource)))
1474 importedInputs.push_back(inputIndex);
1476 passThroughTensorHandle->Unmap();
1480 ARMNN_LOG(error) <<
"An error occurred attempting to import input_" 1481 << inputIndex <<
" : " << exception.
what();
1482 passThroughTensorHandle->Unmap();
1487 return importedInputs;
1492 std::vector<ImportedInputId> importedInputs;
1495 for (
auto inputTensor : inputTensors)
1497 auto layerBindingId = inputTensor.first;
1506 "ImportInputs: Memory Import failed, unknown LayerBindingId: {}", layerBindingId));
1509 const Layer* layer = *it;
1518 std::string er = backend->GetId();
1519 er +=
" does not have PreImportIOTensors capability";
1531 ImportedTensorHandlePin importedTensorHandlePin{layerBindingId,
1534 ITensorHandle* tensorHandle = importedTensorHandlePin.m_TensorHandle.get();
1539 fmt::format(
"ImportInputs: Memory Import failed, backend: " 1540 "{} does not support importing from source {}" 1541 , factoryId, m_NetworkProperties.m_InputSource));
1544 std::unique_ptr<ITensorHandle> passThroughTensorHandle =
1545 std::make_unique<ConstPassthroughTensorHandle>(inputTensor.second.
GetInfo(),
1548 if (tensorHandle->
Import(passThroughTensorHandle->Map(), forceImportMemorySource))
1550 importedInputs.push_back(m_CurImportedInputId++);
1551 passThroughTensorHandle->Unmap();
1555 passThroughTensorHandle->Unmap();
1559 m_PreImportedInputHandles.push_back(std::move(importedTensorHandlePin));
1561 return importedInputs;
1568 if (!m_NetworkProperties.m_AsyncEnabled)
1573 throw MemoryImportException(
"ImportOutputs: Memory Import failed, NetworkProperties.m_ImportEnabled");
1576 if (outputTensors.size() != m_OptimizedNetwork->pOptimizedNetworkImpl->GetGraph().GetNumOutputs())
1580 std::vector<ImportedOutputId> importedOutputs;
1583 unsigned int outputIndex = 0;
1586 auto inputTensorHandle = m_PreImportedOutputHandles[outputIndex].m_TensorHandle.get();
1587 if (!inputTensorHandle)
1593 auto layerBindingId = outputLayer->GetBindingId();
1594 auto it = std::find_if(outputTensors.begin(), outputTensors.end(), [=] (
const auto& outputTensor)
1596 return outputTensor.first == layerBindingId;
1599 if (it == outputTensors.end())
1605 const auto outputTensor = *it;
1610 && inputTensorHandle->
Import(outputTensor.second.
GetMemoryArea(), forceImportMemorySource))
1612 importedOutputs.push_back(outputIndex);
1617 ARMNN_LOG(error) <<
"An error occurred attempting to import output_" 1618 << outputIndex <<
" : " << exception.
what();
1622 return importedOutputs;
1625 std::vector<ImportedOutputId> importedOutputs;
1628 for (
const auto& outputTensor : outputTensors)
1630 auto layerBindingId = outputTensor.first;
1638 throw MemoryImportException(fmt::format(
"ImportOutputs: Memory Import failed, unknown LayerBindingId: {}",
1642 const Layer* layer = *it;
1651 std::string er = backend->GetId();
1652 er +=
" does not have PreImportIOTensors capability";
1663 ImportedTensorHandlePin importedTensorHandlePin{layerBindingId,
1666 ITensorHandle* tensorHandle = importedTensorHandlePin.m_TensorHandle.get();
1671 "{} does not support importing from source {}" 1672 , factoryId, forceImportMemorySource));
1677 importedOutputs.push_back(m_CurImportedOutputId++);
1684 m_PreImportedOutputHandles.push_back(std::move(importedTensorHandlePin));
1687 return importedOutputs;
1692 for (
auto id : inputIds)
1694 if (
id > m_PreImportedInputHandles.size())
1699 auto& importedTensorHandle = m_PreImportedInputHandles[id].m_TensorHandle;
1700 if (!importedTensorHandle)
1703 fmt::format(
"ClearImportedInputs::ImportedInput with id: {} has already been deleted",
id));
1706 importedTensorHandle->Unimport();
1707 importedTensorHandle = {};
1713 for (
auto id : outputIds)
1715 if (
id > m_PreImportedOutputHandles.size())
1720 auto& importedTensorHandle = m_PreImportedOutputHandles[id].m_TensorHandle;
1721 if (!importedTensorHandle)
1724 fmt::format(
"ClearImportedOutputs::ImportedOutput with id: {} has already been deleted",
id));
1727 importedTensorHandle->Unimport();
1728 importedTensorHandle = {};
1735 std::vector<ImportedInputId> preImportedInputs,
1736 std::vector<ImportedOutputId> preImportedOutputs)
1738 const Graph& graph = m_OptimizedNetwork->pOptimizedNetworkImpl->GetGraph();
1740 if (inputTensors.size() + preImportedInputs.size() != graph.
GetNumInputs())
1742 if (preImportedInputs.empty())
1749 "Number of inputs + preImportedInputs provided does not match network.");
1753 if (outputTensors.size() + preImportedOutputs.size() != graph.
GetNumOutputs())
1755 if (preImportedOutputs.empty())
1758 "Number of outputs provided does not match network.");
1763 "Number of outputs + preImportedOutputs provided does not match network.");
1770 unsigned int index = 0;
1771 for (
auto pair : inputTensors)
1773 bindingIds[index++] = pair.first;
1777 bindingIds[index++] = ValidateImportedInputID(
id);
1779 for (
auto pair : outputTensors)
1781 bindingIds[index++] = pair.first;
1785 bindingIds[index++] = ValidateImportedOutputID(
id);
1790 auto resetMemHandle = [&]()
1794 const LayerBindingId layerBindingId = m_PreImportedInputHandles[id].m_LayerBindingId;
1796 auto inputHandle = workingMemHandle.
GetInputHandle(layerBindingId);
1798 for (
auto it : inputConnections)
1806 const LayerBindingId layerBindingId = m_PreImportedOutputHandles[id].m_LayerBindingId;
1811 for (
auto it : outputConnections)
1818 std::unique_ptr<TimelineUtilityMethods> timelineUtils =
1819 TimelineUtilityMethods::GetTimelineUtils(*m_ProfilingService);
1820 ProfilingGuid inferenceGuid = m_ProfilingService->GetNextGuid();
1824 ProfilingGuid networkGuid = m_OptimizedNetwork->GetGuid();
1825 timelineUtils->CreateTypedEntity(inferenceGuid,LabelsAndEventClasses::INFERENCE_GUID);
1826 timelineUtils->CreateRelationship(ProfilingRelationshipType::RetentionLink,
1829 LabelsAndEventClasses::EXECUTION_OF_GUID);
1830 timelineUtils->RecordEvent(inferenceGuid,LabelsAndEventClasses::ARMNN_PROFILING_SOL_EVENT_CLASS);
1833 bool executionSucceeded =
true;
1838 timelineUtils->RecordEvent(inferenceGuid,LabelsAndEventClasses::ARMNN_PROFILING_EOL_EVENT_CLASS);
1839 timelineUtils->Commit();
1849 for (
auto pair : inputTensors)
1851 EnqueueInput(pair.second, workingMemHandle.
GetInputHandle(pair.first));
1857 const ImportedTensorHandlePin& importedInputPin = m_PreImportedInputHandles[id];
1858 const LayerBindingId layerBindingId = m_PreImportedInputHandles[id].m_LayerBindingId;
1859 const auto& preimportedHandle = importedInputPin.m_TensorHandle;
1862 for (
auto it : inputConnections)
1864 *it = preimportedHandle.get();
1870 if (m_NetworkProperties.m_ExportEnabled)
1872 for (
auto pair: outputTensors)
1874 ImportOutputTensor(pair.second, workingMemHandle.
GetOutputHandle(pair.first));
1880 const ImportedTensorHandlePin& importedOutputPin = m_PreImportedOutputHandles[id];
1881 const LayerBindingId layerBindingId = m_PreImportedOutputHandles[id].m_LayerBindingId;
1882 const auto& preimportedHandle = importedOutputPin.m_TensorHandle;
1885 for (
auto it : outputConnections)
1887 *it = preimportedHandle.get();
1892 auto Fail = [&](
const std::exception&
error)
1894 ARMNN_LOG(error) <<
"An error occurred attempting to execute a workload: " << error.what();
1895 executionSucceeded =
false;
1897 ProfilingDynamicGuid workloadInferenceID(0);
1901 for (
unsigned int i = 0; i < m_WorkloadQueue.size(); ++i)
1903 auto& workload = m_WorkloadQueue[i];
1906 workloadInferenceID = timelineUtils->RecordWorkloadInferenceAndStartOfLifeEvent(workload->GetGuid(),
1914 timelineUtils->RecordEndOfLifeEvent(workloadInferenceID);
1923 catch (
const std::runtime_error& error)
1934 if (!m_NetworkProperties.m_ExportEnabled)
1936 for (
auto pair: outputTensors)
1956 Graph& order = m_OptimizedNetwork->pOptimizedNetworkImpl->GetGraph();
1959 std::vector<std::unique_ptr<ITensorHandle>> managedTensorHandles;
1961 std::vector<std::unique_ptr<ITensorHandle>> unmanagedTensorHandles;
1963 std::vector<WorkingMemDescriptor> workingMemDescriptors;
1964 std::vector<std::pair<BackendId, ExecutionData>> executionDataVec;
1966 auto GetTensorHandle = [&](
Layer* layer,
const OutputSlot& outputSlot)
1969 const TensorInfo& tensorInfo = outputSlot.GetTensorInfo();
1975 return m_WorkloadFactories.at(
id)->CreateTensorHandle(tensorInfo,
false);
1990 bool m_IsInputLayerHandle =
false;
1991 bool m_IsOutputLayerHandle =
false;
1997 std::unordered_map<const OutputSlot*, HandleInfo> outputToHandleInfoMap;
1999 unsigned int layerIndex = 0;
2000 for (
auto&& layer : order)
2010 bool isMemoryManaged =
true;
2011 bool isInputLayer =
false;
2012 bool isOutputLayer =
false;
2013 bool isConnectedToOutputLayer =
false;
2019 isInputLayer =
true;
2020 isMemoryManaged = !m_NetworkProperties.m_ImportEnabled;
2024 isOutputLayer =
true;
2027 unsigned int slotIndex = 0;
2032 for (
unsigned int i = 0; i < slot.GetNumConnections(); ++i)
2036 if (!isConnectedToOutputLayer)
2038 isConnectedToOutputLayer =
true;
2040 isMemoryManaged = !m_NetworkProperties.m_ExportEnabled;
2046 fmt::format(
"Layer name: '{0}' guid: '{1}' has two or more OutputLayers connected to it. " 2047 "This will prevent importing on the connected OutputLayers.",
2049 isMemoryManaged =
true;
2055 if (isMemoryManaged)
2057 managedTensorHandles.emplace_back(GetTensorHandle(layer, slot));
2058 tensorHandle = managedTensorHandles.back().get();
2062 unmanagedTensorHandles.emplace_back(GetTensorHandle(layer, slot));
2063 tensorHandle = unmanagedTensorHandles.back().get();
2066 workingMemDescriptor.
m_Outputs.push_back(tensorHandle);
2068 HandleInfo& handleInfo = outputToHandleInfoMap[&slot];
2069 handleInfo.m_TensorHandle = tensorHandle;
2072 if (isConnectedToOutputLayer)
2074 handleInfo.m_IsOutputLayerHandle =
true;
2075 handleInfo.m_OutputMemDescriptorCoords.m_OutputSlotCoords = {layerIndex, slotIndex};
2080 handleInfo.m_IsInputLayerHandle =
true;
2082 handleInfo.m_InputMemDescriptorCoords.m_LayerBindingId = bindingId;
2093 auto outputSlot = slot.GetConnectedOutputSlot();
2094 auto key = outputSlot->GetOwningLayer().GetGuid();
2097 auto found = m_ConstantTensorHandles.find(key);
2098 if (found != m_ConstantTensorHandles.end())
2101 workingMemDescriptor.
m_Inputs.push_back(tensorHandle);
2109 HandleInfo& handleInfo = outputToHandleInfoMap[outputSlot];
2110 handleInfo.m_TensorHandle = tensorHandle;
2111 handleInfo.m_IsOutputLayerHandle =
true;
2112 handleInfo.m_OutputMemDescriptorCoords.m_LayerBindingIds.push_back(bindingId);
2113 handleInfo.m_OutputMemDescriptorCoords.m_InputSlotCoords.push_back({layerIndex, 0});
2118 HandleInfo& handleInfo = outputToHandleInfoMap.at(outputSlot);
2120 ITensorHandle* inputTensorHandle = handleInfo.m_TensorHandle;
2121 workingMemDescriptor.
m_Inputs.push_back(inputTensorHandle);
2127 handleInfo.m_OutputMemDescriptorCoords.m_LayerBindingIds.push_back(bindingId);
2128 handleInfo.m_OutputMemDescriptorCoords.m_InputSlotCoords.push_back({layerIndex, 0});
2132 else if (handleInfo.m_IsOutputLayerHandle)
2134 handleInfo.m_OutputMemDescriptorCoords.m_InputSlotCoords.push_back({layerIndex, slot.GetSlotIndex()});
2139 if (handleInfo.m_IsInputLayerHandle)
2141 std::pair<LayerGuid, unsigned int> connectionLocation{layerIndex, slot.GetSlotIndex()};
2142 handleInfo.m_InputMemDescriptorCoords.m_InputSlotCoords.emplace_back(connectionLocation);
2152 std::pair<BackendId, ExecutionData> dataPair;
2155 executionDataVec.push_back(dataPair);
2156 workingMemDescriptors.push_back(workingMemDescriptor);
2162 std::vector<std::pair<std::shared_ptr<TensorMemory>,
MemorySource>> tensorMemory;
2164 auto externalMemoryManager = CreateExternalMemoryManger(tensorMemory);
2167 std::sort(tensorMemory.begin(), tensorMemory.end(),
2168 [](
const std::pair<std::shared_ptr<TensorMemory>,
MemorySource>& lhs,
2169 const std::pair<std::shared_ptr<TensorMemory>,
MemorySource>& rhs)
2171 return lhs.first->m_OutputSlotId < rhs.first->m_OutputSlotId;
2174 std::vector<WorkingMemHandle::InputMemDescriptorCoords> inputConnectionsInfo;
2175 std::vector<WorkingMemHandle::OutputMemDescriptorCoords> outputConnectionsInfo;
2177 for (
const auto& handleInfo: outputToHandleInfoMap)
2179 if (handleInfo.second.m_IsOutputLayerHandle)
2181 outputConnectionsInfo.emplace_back(handleInfo.second.m_OutputMemDescriptorCoords);
2184 if (handleInfo.second.m_IsInputLayerHandle)
2186 inputConnectionsInfo.emplace_back(handleInfo.second.m_InputMemDescriptorCoords);
2190 return std::make_unique<WorkingMemHandle>(networkId,
2191 inputConnectionsInfo,
2192 outputConnectionsInfo,
2193 workingMemDescriptors,
2194 std::move(externalMemoryManager),
2195 std::move(tensorMemory),
2196 std::move(managedTensorHandles),
2197 std::move(unmanagedTensorHandles),
2204 for (
auto&& workloadPtr: m_WorkloadQueue)
2206 workloadPtr.get()->RegisterDebugCallback(func);
2211 void LoadedNetwork::CreateMemoryProfileAsync()
2215 unsigned int m_StartOfLife;
2216 unsigned int m_Lifetime;
2219 unsigned int m_Index;
2224 auto align = [](
size_t numToAlign)
2226 const size_t alignment =
sizeof(float);
2227 return ((numToAlign + alignment - 1) / alignment) * alignment;
2230 std::unordered_map<const OutputSlot*, PartialBlock> memBlockTrackerMap;
2235 unsigned int timestep = 0;
2236 unsigned int outputIndex = 0;
2239 for (
auto&& layer : order)
2263 if (!m_SupportsExternallyManagedMemory[backendId])
2268 PartialBlock partialBlock;
2270 partialBlock.m_StartOfLife = timestep;
2272 size_t alignedSize = align(outputSlot.GetOutputHandler().GetTensorInfo().GetNumBytes());
2273 partialBlock.m_MemSize = alignedSize;
2274 partialBlock.m_Index = outputIndex++;
2275 partialBlock.m_Lifetime = outputSlot.GetNumConnections();
2276 partialBlock.m_BackendId = backendId;
2278 if (partialBlock.m_Lifetime == 0)
2280 m_MemBlockMap[partialBlock.m_BackendId].emplace_back(partialBlock.m_StartOfLife,
2281 partialBlock.m_StartOfLife,
2282 partialBlock.m_MemSize,
2284 partialBlock.m_Index);
2288 memBlockTrackerMap[&outputSlot] = partialBlock;
2294 const Layer& connectedInputLayer = inputSlot.GetConnectedOutputSlot()->GetOwningLayer();
2306 auto outputSlot = inputSlot.GetConnectedOutputSlot();
2308 PartialBlock& partialBlock = memBlockTrackerMap.at(outputSlot);
2310 auto& lifetime = partialBlock.m_Lifetime;
2315 m_MemBlockMap[partialBlock.m_BackendId].emplace_back(partialBlock.m_StartOfLife,
2317 partialBlock.m_MemSize,
2319 partialBlock.m_Index);
2326 void LoadedNetwork::CreateMemoryProfile()
2330 auto TraceSubTensorHandleAncestry = [](
ITensorHandle*
const subTensorHandle)
2333 while (ancestor && ancestor->
GetParent())
2342 unsigned int m_StartOfLife;
2343 unsigned int m_Lifetime;
2346 unsigned int m_Index;
2351 auto align = [](
size_t numToAlign)
2353 const size_t alignment =
sizeof(float);
2354 return ((numToAlign + alignment - 1) / alignment) * alignment;
2357 std::unordered_map<ITensorHandle*, PartialBlock> memBlockTrackerMap;
2362 unsigned int timestep = 0;
2363 unsigned int outputIndex = 0;
2366 for (
auto&& layer : order)
2390 if (!m_SupportsExternallyManagedMemory[backendId])
2395 ITensorHandle* tensorHandle = outputSlot.GetOutputHandler().GetData();
2396 tensorHandle = TraceSubTensorHandleAncestry(tensorHandle);
2398 if (memBlockTrackerMap.find(tensorHandle) == memBlockTrackerMap.end())
2400 PartialBlock partialBlock;
2402 partialBlock.m_StartOfLife = timestep;
2404 size_t alignedSize = align(outputSlot.GetOutputHandler().GetTensorInfo().GetNumBytes());
2405 partialBlock.m_MemSize = alignedSize;
2406 partialBlock.m_Index = outputIndex++;
2407 partialBlock.m_Lifetime = outputSlot.GetNumConnections();
2408 partialBlock.m_BackendId = backendId;
2410 if (partialBlock.m_Lifetime == 0)
2412 m_MemBlockMap[partialBlock.m_BackendId].emplace_back(partialBlock.m_StartOfLife,
2413 partialBlock.m_StartOfLife,
2414 partialBlock.m_MemSize,
2416 partialBlock.m_Index);
2420 memBlockTrackerMap[tensorHandle] = partialBlock;
2422 m_Tensorhandles.push_back(tensorHandle);
2427 memBlockTrackerMap.at(tensorHandle).m_Lifetime += outputSlot.GetNumConnections();
2433 const Layer& connectedInputLayer = inputSlot.GetConnectedOutputSlot()->GetOwningLayer();
2444 if (!m_SupportsExternallyManagedMemory[connectedInputLayer.
GetBackendId()])
2449 auto outputSlot = inputSlot.GetConnectedOutputSlot();
2451 ITensorHandle* tensorHandle = outputSlot->GetOutputHandler().GetData();
2452 tensorHandle = TraceSubTensorHandleAncestry(tensorHandle);
2454 PartialBlock& partialBlock = memBlockTrackerMap.at(tensorHandle);
2456 auto& lifetime = partialBlock.m_Lifetime;
2461 m_MemBlockMap[partialBlock.m_BackendId].emplace_back(partialBlock.m_StartOfLife,
2463 partialBlock.m_MemSize,
2465 partialBlock.m_Index);
2473 std::unique_ptr<MemoryManager> LoadedNetwork::CreateExternalMemoryManger(
2474 std::vector<std::pair<std::shared_ptr<TensorMemory>,
MemorySource>>& tensorMemoryVec)
2476 std::unique_ptr<MemoryManager> memoryManager = std::make_unique<MemoryManager>();
2479 for (
auto& backend : m_MemBinMap)
2481 std::vector<BufferStorage> bufferStorageVec;
2483 std::shared_ptr<ICustomAllocator> backendAllocator;
2484 if (allocatorMap.find(backend.first) != allocatorMap.end())
2486 backendAllocator = allocatorMap[backend.first];
2490 backendAllocator = m_Backends[backend.first]->GetDefaultAllocator();
2493 for (
auto& memBin : backend.second)
2499 for (
auto& memBlock : memBin.m_MemBlocks)
2501 auto tensorMemory = std::make_shared<TensorMemory>(
TensorMemory{memBlock.
m_Offset, memBlock.m_Index});
2503 tensorMemoryVec.emplace_back(tensorMemory, backendAllocator->GetMemorySourceType());
2507 bufferStorageVec.emplace_back(std::move(bufferStorage));
2510 memoryManager->StoreMemToAllocate(bufferStorageVec, backendAllocator, 4);
2513 return memoryManager;
2520 const auto& importedTensorHandlePin = m_PreImportedInputHandles.at(
id);
2521 if (!importedTensorHandlePin.m_TensorHandle)
2524 "PreImportedInput: {} has been deleted",
id));
2526 return importedTensorHandlePin.m_LayerBindingId;
2528 catch (
const std::out_of_range&)
2538 const auto& importedTensorHandlePin = m_PreImportedOutputHandles.at(
id);
2539 if (!importedTensorHandlePin.m_TensorHandle)
2542 "PreImportedOutput: {} has been deleted",
id));
2544 return importedTensorHandlePin.m_LayerBindingId;
2546 catch (
const std::out_of_range&)
Status Execute(const InputTensors &inputTensors, const OutputTensors &outputTensors, IWorkingMemHandle &workingMemHandle, std::vector< ImportedInputId > preImportedInputs={}, std::vector< ImportedOutputId > preImportedOutputs={})
Thread safe execution of the loaded network.
std::vector< std::shared_ptr< TensorMemory > > m_TensorMemoryVector
Vector of pointer to .
std::unique_ptr< IWorkingMemHandle > CreateWorkingMemHandle(NetworkId networkId)
Create a new unique WorkingMemHandle object.
bool HasCapability(const std::string &name, const BackendCapabilities &capabilities)
Convenience function to check if a capability exists in a BackendCapabilites struct.
const MemorySource m_InputSource
virtual bool Import(void *memory, MemorySource source)
Import externally allocated memory.
FactoryFunction GetFactory(const BackendId &id) const
std::unique_ptr< IWorkloadFactory > IWorkloadFactoryPtr
unsigned int GetNumInputSlots() const override
Returns the number of connectable input slots.
static ProfilerManager & GetInstance()
#define ARMNN_NO_DEPRECATE_WARN_BEGIN
virtual IMemoryManagerUniquePtr CreateMemoryManager() const
LayerBindingId GetBindingId() const
virtual unsigned int GetImportFlags() const
Get flags describing supported import sources.
const bool m_AsyncEnabled
const armnn::Tensor GetOutputTensor(const LayerBindingId layerId, const OutputTensors &outputTensors)
MemoryOptimizerStrategiesMapRef GetMemoryOptimizerStrategies()
unsigned int ImportedOutputId
std::pair< BackendId, ExecutionData > & GetExecutionDataAt(unsigned int id) override
Get the ExecutionData at an index.
size_t m_Offset
Number of bytes the value is away from the .m_Buffer.
virtual void Allocate()=0
Indicate to the memory manager that this resource is no longer active.
virtual const char * what() const noexcept override
TensorInfo GetInputTensorInfo(LayerBindingId layerId) const
#define ARMNN_LOG(severity)
size_t m_BufferSize
Total size of the buffer.
ITensorHandle * GetOutputHandle(LayerBindingId layerBindingId) const
BackendRegistry & BackendRegistryInstance()
std::vector< std::pair< LayerBindingId, class ConstTensor > > InputTensors
const ProfilingDetailsMethod m_OutputNetworkDetailsMethod
unsigned int MemorySourceFlags
MemoryType GetMemoryArea() const
size_t GetNumOutputs() const
void CopyToOutputTensor(const Tensor &outputTensor, ITensorHandle *outputTensorHandle)
TensorInfo GetOutputTensorInfo(LayerBindingId layerId) const
Copyright (c) 2021 ARM Limited and Contributors.
void IgnoreUnused(Ts &&...)
const std::vector< InputSlot > & GetInputSlots() const
std::vector< ImportedInputId > ImportInputs(const InputTensors &inputTensors, MemorySource forceImportMemorySource=MemorySource::Undefined)
std::function< void(LayerGuid guid, unsigned int slotIndex, ITensorHandle *tensorHandle)> DebugCallbackFunction
Define the type of callback for the Debug layer to call.
unsigned int GetNumOutputSlots() const override
Returns the number of connectable output slots.
int LayerBindingId
Type of identifiers for bindable layers (inputs, outputs).
#define ARMNN_SCOPED_PROFILING_EVENT(backendId, name)
virtual const BackendId & GetId() const =0
ConstIteratorOutputs begin() const
A tensor defined by a TensorInfo (shape and data type) and a mutable backing store.
virtual IWorkloadFactoryPtr CreateWorkloadFactory(const IMemoryManagerSharedPtr &memoryManager=nullptr) const =0
std::vector< ITensorHandle * > m_Inputs
unsigned int GetNumConnections() const override
const InputSlot & GetInputSlot(unsigned int index) const override
Get a const input slot handle by slot index.
std::vector< TensorInfo > m_InputTensorInfos
const std::vector< std::vector< ITensorHandle * >::iterator > & GetOutputConnection(LayerBindingId layerBindingId) const
void ValidateBindingIds()
#define ARMNN_NO_DEPRECATE_WARN_END
#define ARMNN_ASSERT_MSG(COND, MSG)
bool SupportsTensorAllocatorAPI() const
#define ARMNN_SCOPED_HEAP_PROFILING(TAG)
A tensor defined by a TensorInfo (shape and data type) and an immutable backing store.
virtual ITensorHandle * GetParent() const =0
Get the parent tensor if this is a subtensor.
std::vector< std::pair< LayerBindingId, class Tensor > > OutputTensors
const std::string & GetNameStr() const
LayerType GetType() const override
Returns the armnn::LayerType of this layer.
void SendNetworkStructure(arm::pipe::IProfilingService &profilingService)
const std::vector< std::vector< ITensorHandle * >::iterator > & GetInputConnections(LayerBindingId layerBindingId) const
#define ARMNN_ASSERT(COND)
void ClearImportedInputs(const std::vector< ImportedInputId > inputIds)
std::vector< TensorInfo > m_OutputTensorInfos
void SetLayersOutOfOrder()
static bool IsLayerSupported(const BackendId &backendId, const IConnectableLayer &layer, Optional< DataType > dataType, std::string &outReasonIfUnsupported)
ITensorHandle * GetData() const
Gets the allocated tensor memory.
std::vector< std::unique_ptr< IWorkload > > WorkloadQueue
const TensorInfo & GetInfo() const
const BackendId & GetBackendId() const
void Allocate() override
Allocate the backing memory required for execution.
void ValidateSourcesMatchOptimizedNetwork(std::vector< BackendOptions > optimizedOptions, const INetworkProperties &networkProperties)
This function performs a sanity check to ensure that the combination of input and output memory sourc...
arm::pipe::ProfilingGuid GetNetworkGuid()
const std::vector< OutputSlot > & GetOutputSlots() const
virtual bool CanBeImported(void *memory, MemorySource source)
Implementations must determine if this memory block can be imported.
OutputLayersAccessor GetOutputLayers() const
Returns a wrapper object with begin(), end() methods to iterate over the output layers in a range-bas...
unsigned int ImportedInputId
Struct for the users to pass backend specific options.
Status EnqueueWorkload(const InputTensors &inputTensors, const OutputTensors &outputTensors, std::vector< ImportedInputId > preImportedInputIds={}, std::vector< ImportedOutputId > preImportedOutputIds={})
Single thread execution of the loaded network.
void RegisterProfiler(IProfiler *profiler)
virtual const void * Map(bool blocking=true) const =0
Map the tensor data for access.
std::vector< LayerBindingId > & GetBindingIdVector()
std::unordered_map< BackendId, std::shared_ptr< ICustomAllocator > > GetAllocators()
virtual BackendCapabilities GetCapabilities() const
Returns a BackendCapability if the backend lists the capability The BackendCapability must then be in...
virtual void Unmap() const =0
Unmap the tensor data.
bool AsBool() const
Value getters.
std::string GetName() const
bool IsAllocated() override
IsAllocated returns true if the backing memory is currently allocated.
std::vector< ITensorHandle * > m_Outputs
Base class for all ArmNN exceptions so that users can filter to just those.
const OutputHandler & GetOutputHandler(unsigned int i=0) const
std::vector< ImportedOutputId > ImportOutputs(const OutputTensors &outputTensors, MemorySource forceImportMemorySource=MemorySource::Undefined)
MemorySource
Define the Memory Source to reduce copies.
const std::string & Get() const
void RegisterDebugCallback(const DebugCallbackFunction &func)
ConstIteratorOutputs end() const
std::vector< ITensorHandle * > m_Outputs
std::enable_if_t< std::is_unsigned< Source >::value &&std::is_unsigned< Dest >::value, Dest > numeric_cast(Source source)
const MemorySource m_OutputSource
Contains information about TensorInfos of a layer.
const char * GetName() const override
Returns the name of the layer.
ITensorHandleFactory::FactoryId GetTensorHandleFactoryId() const
bool CheckFlag(MemorySourceFlags flags, MemorySource source)
void CopyTensorContentsGeneric(const ITensorHandle *srcTensor, ITensorHandle *dstTensor, CopyFunc copy)
Graph & TopologicalSort()
Sorts layers in topological order and return this.
InputLayersAccessor GetInputLayers() const
Returns a wrapper object with begin(), end() methods to iterate over the input layers in a range-base...
std::vector< ITensorHandle * > m_Inputs
size_t GetNumLayers() const
const armnn::ConstTensor GetInputTensor(const LayerBindingId layerId, const InputTensors &inputTensors)
const TensorInfo & GetTensorInfo(const ITensorHandle *tensorHandle)
float32 helpers
const TensorInfo & GetTensorInfo() const override
static std::unique_ptr< LoadedNetwork > MakeLoadedNetwork(std::unique_ptr< IOptimizedNetwork > net, std::string &errorMessage, const INetworkProperties &networkProperties, arm::pipe::IProfilingService *profilingService)
ITensorHandle * GetInputHandle(LayerBindingId layerBindingId) const
size_t GetNumInputs() const
virtual std::unique_ptr< ITensorHandle > CreateTensorHandle(const TensorInfo &tensorInfo) const =0
static const FactoryId LegacyFactoryId
const bool m_ProfilingEnabled
void ClearImportedOutputs(const std::vector< ImportedOutputId > outputIds)
LayerType
When adding a new layer, adapt also the LastLayer enum value in the enum class LayerType below...
LayerGuid GetGuid() const final
Returns the unique id of the layer.