29 #include <common/include/Processes.hpp> 31 #include <fmt/format.h> 42 template <
typename ExceptionType>
43 std::string ToErrorMessage(
const char * prefix,
const ExceptionType &
error)
46 ss << prefix <<
" " << error.what();
50 void AddLayerStructure(std::unique_ptr<TimelineUtilityMethods>& timelineUtils,
52 ProfilingGuid networkGuid)
55 std::string layerName = layer.GetNameStr().empty() ?
"<Unnamed>" : layer.GetNameStr();
56 timelineUtils->CreateNamedTypedChildEntity(layer.GetGuid(),
59 LabelsAndEventClasses::LAYER_GUID);
60 for (
auto&& input : layer.GetInputSlots())
62 const IOutputSlot* source = input.GetConnectedOutputSlot();
64 timelineUtils->CreateConnectionRelationship(ProfilingRelationshipType::RetentionLink,
65 source->GetOwningLayerGuid(),
70 void AddWorkloadStructure(std::unique_ptr<TimelineUtilityMethods>& timelineUtils,
71 std::unique_ptr<IWorkload>& workload,
75 timelineUtils->CreateTypedEntity(workload->GetGuid(), LabelsAndEventClasses::WORKLOAD_GUID);
76 timelineUtils->MarkEntityWithLabel(workload->GetGuid(),
77 layer.GetBackendId().Get(),
78 LabelsAndEventClasses::BACKENDID_GUID);
81 timelineUtils->CreateRelationship(ProfilingRelationshipType::RetentionLink,
84 LabelsAndEventClasses::CHILD_GUID);
103 const vector<BackendOptions>::iterator& backendItr =
104 find_if(optimizedOptions.begin(), optimizedOptions.end(), [](
const BackendOptions& backend) {
105 if (backend.GetBackendId().Get() ==
"Global")
114 bool importEnabled =
false;
115 bool exportEnabled =
false;
116 if (backendItr != optimizedOptions.end())
119 for (
size_t i = 0; i < backendItr->GetOptionCount(); i++)
122 if (option.
GetName() ==
"ImportEnabled")
126 if (option.
GetName() ==
"ExportEnabled")
138 auto message = fmt::format(
"The input memory source specified, '{0}',", networkProperties.
m_InputSource);
141 message.append(
" requires that memory import be enabled. However, " 142 "it was disabled when this network was optimized.");
146 message.append(
" requires that memory import be disabled. However, " 147 "it was enabled when this network was optimized.");
155 auto message = fmt::format(
"The output memory source specified, '{0}',", networkProperties.
m_OutputSource);
158 message.append(
" requires that memory export be enabled. However, " 159 "it was disabled when this network was optimized.");
163 message.append(
" requires that memory export be disabled. However, " 164 "it was enabled when this network was optimized.");
171 std::string& errorMessage,
173 arm::pipe::IProfilingService* profilingService)
175 std::unique_ptr<LoadedNetwork> loadedNetwork;
177 auto Fail = [&](
const std::exception&
error) -> std::unique_ptr<LoadedNetwork>
179 errorMessage = ToErrorMessage(
"An error occurred when preparing the network workloads: ", error);
182 return std::unique_ptr<LoadedNetwork>();
187 loadedNetwork.reset(
new LoadedNetwork(std::move(net), networkProperties, profilingService));
197 catch (
const std::runtime_error& error)
202 return loadedNetwork;
205 LoadedNetwork::LoadedNetwork(std::unique_ptr<IOptimizedNetwork> net,
207 arm::pipe::IProfilingService* profilingService) :
208 m_OptimizedNetwork(std::move(net)),
209 m_NetworkProperties(networkProperties),
210 m_TensorHandleFactoryRegistry(),
211 m_ProfilingService(profilingService)
215 const std::shared_ptr<IProfiler>& profiler = m_OptimizedNetwork->GetProfiler();
225 m_NetworkProperties);
232 bool useExternalMemoryManager =
false;
233 bool useInternalMemoryManager =
false;
234 Graph& order = m_OptimizedNetwork->pOptimizedNetworkImpl->GetGraph();
241 m_IsInputImported = std::vector<bool>(order.
GetNumInputs(),
false);
242 m_IsOutputImported = std::vector<bool>(order.
GetNumOutputs(),
false);
245 for (
auto&& layer : order)
247 auto const& backendId = layer->GetBackendId();
248 if (m_Backends.count(backendId) == 0)
251 auto it = m_Backends.emplace(std::make_pair(backendId, createBackend()));
260 std::string er = backend->
GetId();
261 er +=
" does not support AsyncExecution";
267 std::string er = backend->
GetId();
268 er +=
" does not support ExternallyManagedMemory\n";
269 er +=
"AsyncEnabled networks require all backends to support ExternallyManagedMemory";
272 m_SupportsExternallyManagedMemory[backend->
GetId()] =
true;
273 useExternalMemoryManager =
true;
277 m_SupportsExternallyManagedMemory[backend->
GetId()] =
false;
278 useInternalMemoryManager =
true;
285 m_TensorHandleFactoryRegistry,
286 m_OptimizedNetwork->pOptimizedNetworkImpl->GetModelOptions(),
288 static_cast<MemorySourceFlags>(m_NetworkProperties.m_OutputSource));
294 m_BackendMemoryMangers.back(), m_OptimizedNetwork->pOptimizedNetworkImpl->GetModelOptions());
296 m_WorkloadFactories[backendId ] = std::move(workloadFactory);
302 for (
auto&& layer : order)
304 auto& workloadFactory = GetWorkloadFactory(*layer);
305 bool supportsExternalManager = m_SupportsExternallyManagedMemory[layer->GetBackendId()];
307 switch (layer->GetType())
314 layer->CreateTensorHandles(m_TensorHandleFactoryRegistry,
316 !supportsExternalManager && !m_NetworkProperties.m_ImportEnabled);
321 layer->CreateTensorHandles(m_TensorHandleFactoryRegistry, workloadFactory,
true);
328 if ((layer->GetNumOutputSlots() == 1) &&
329 (layer->GetOutputSlots()[0].GetNumConnections() == 1) &&
330 (layer->GetOutputSlots()[0].GetConnection(0)->GetOwningLayer().GetType() ==
LayerType::Output))
332 layer->CreateTensorHandles(m_TensorHandleFactoryRegistry,
334 !supportsExternalManager && !m_NetworkProperties.m_ExportEnabled);
338 layer->CreateTensorHandles(m_TensorHandleFactoryRegistry,
340 !supportsExternalManager);
347 ProfilingGuid networkGuid = m_OptimizedNetwork->GetGuid();
348 std::unique_ptr<TimelineUtilityMethods> timelineUtils =
349 TimelineUtilityMethods::GetTimelineUtils(*m_ProfilingService);
352 timelineUtils->CreateTypedEntity(networkGuid, LabelsAndEventClasses::NETWORK_GUID);
354 timelineUtils->RecordEvent(networkGuid, LabelsAndEventClasses::ARMNN_PROFILING_SOL_EVENT_CLASS);
356 int processID = arm::pipe::GetCurrentProcessId();
357 std::stringstream ss;
359 timelineUtils->MarkEntityWithLabel(networkGuid, ss.str(), LabelsAndEventClasses::PROCESS_ID_GUID);
362 std::vector<IWorkload*> ConstWorkloads;
367 for (
auto&& layer: order)
372 AddLayerStructure(timelineUtils, *layer, networkGuid);
377 switch (layer->GetType())
387 auto workload = layer->CreateWorkload(workloadFactory);
391 const char*
const layerName =
392 layer->GetNameStr().length() != 0 ? layer->GetName() :
"<Unnamed>";
394 fmt::format(
"No workload created for layer (name: '{0}' type: '{1}') (compute '{2}')",
395 layerName, static_cast<int>(layer->GetType()), layer->GetBackendId().Get()
402 AddWorkloadStructure(timelineUtils, workload, *layer);
407 if((networkProperties.
m_AsyncEnabled || useExternalMemoryManager) &&
410 m_ConstantTensorHandles[layer->GetGuid()] =
411 layer->GetOutputSlot(0).GetOutputHandler().GetData();
412 m_ConstantWorkloads[layer->GetGuid()] = std::move(workload);
416 m_WorkloadQueue.push_back(std::move(workload));
421 ConstWorkloads.push_back(m_WorkloadQueue.back().get());
425 layer->ReleaseConstantData();
433 if (!networkProperties.
m_AsyncEnabled && m_WorkloadQueue.size() != 0)
441 const auto bindingId = layer->GetBindingId();
443 bool supportsReplacement =
true;
445 for (
const auto inputSlot: layer->GetOutputSlot(0).GetConnections())
447 auto workloadIndex = std::distance(order.begin(), order.GetPosInGraph(inputSlot->GetOwningLayer()));
448 workloadIndex -= noOfInputs;
450 m_InputWorkloadSlotPairs[bindingId].emplace_back(WorkloadIndices{
453 auto workload = m_WorkloadQueue[m_InputWorkloadSlotPairs[bindingId].back().m_WorkloadIndex].get();
454 supportsReplacement &= workload->SupportsTensorHandleReplacement();
460 m_TensorHandleFactoryRegistry.GetMatchingImportFactoryId(factoryId);
462 ITensorHandleFactory *importFactory = m_TensorHandleFactoryRegistry.GetFactory(importFactoryId);
464 if (supportsReplacement && importFactory)
466 m_PreImportedInputHandles.emplace_back(
467 bindingId, importFactory->
CreateTensorHandle(layer->GetOutputSlot(0).GetTensorInfo(),
false));
471 m_PreImportedInputHandles.emplace_back(bindingId,
nullptr);
479 const auto bindingId = layer->GetBindingId();
481 const auto outputSlot = layer->GetInputSlot(0).GetConnectedOutputSlot();
482 auto& indices = m_OutputWorkloadSlotPairs[bindingId];
484 auto workloadIndex = std::distance(order.begin(), order.GetPosInGraph(outputSlot->GetOwningLayer()));
485 workloadIndex -= noOfInputs;
487 indices.m_OutputSlotIndices = WorkloadIndices{
numeric_cast<
unsigned int>(workloadIndex),
488 outputSlot->CalculateIndexOnOwner()};
490 bool supportsReplacement =
true;
491 auto outputWorkload = m_WorkloadQueue[indices.m_OutputSlotIndices.m_WorkloadIndex].get();
492 supportsReplacement &= outputWorkload->SupportsTensorHandleReplacement();
494 for (
auto &inputSlot: outputSlot->GetConnections())
498 auto inWorkloadIndex = std::distance(order.begin(),
499 order.GetPosInGraph(inputSlot->GetOwningLayer()));
500 inWorkloadIndex -= noOfInputs;
501 indices.m_InputSlotIndices.emplace_back(WorkloadIndices{
numeric_cast<
unsigned int>(inWorkloadIndex),
502 inputSlot->GetSlotIndex()});
503 auto inputWorkload = m_WorkloadQueue[indices.m_InputSlotIndices.back().m_WorkloadIndex].get();
504 supportsReplacement &= inputWorkload->SupportsTensorHandleReplacement();
511 m_TensorHandleFactoryRegistry.GetMatchingImportFactoryId(factoryId);
512 ITensorHandleFactory *importFactory = m_TensorHandleFactoryRegistry.GetFactory(importFactoryId);
514 if (supportsReplacement && importFactory)
516 m_PreImportedOutputHandles.emplace_back(
521 m_PreImportedOutputHandles.emplace_back(bindingId,
nullptr);
526 for (
auto&& workloadFactory : m_WorkloadFactories)
528 workloadFactory.second->AfterWorkloadsCreated();
534 timelineUtils->Commit();
537 if (useExternalMemoryManager)
541 CreateMemoryProfileAsync();
545 CreateMemoryProfile();
549 for (
auto& backendMemoryProfile : m_MemBlockMap)
551 const BackendId& backendId = backendMemoryProfile.first;
552 if (backendStrategyMap.find(backendId) != backendStrategyMap.end())
554 m_MemBinMap[backendId] = backendStrategyMap[backendId]->Optimize(backendMemoryProfile.second);
558 m_MemBinMap[backendId] = m_ConstantStrategy->Optimize(backendMemoryProfile.second);
564 m_ExternalMemoryManager = CreateExternalMemoryManger(m_TensorMemory);
567 std::sort(m_TensorMemory.begin(), m_TensorMemory.end(),
568 [](
const std::pair<std::shared_ptr<TensorMemory>,
MemorySource>& lhs,
569 const std::pair<std::shared_ptr<TensorMemory>,
MemorySource>& rhs)
571 return lhs.first->m_OutputSlotId < rhs.first->m_OutputSlotId;
580 if (useInternalMemoryManager)
583 m_OptimizedNetwork->pOptimizedNetworkImpl->GetGraph().AllocateDynamicBuffers();
586 for (
auto &workload : m_WorkloadQueue)
588 workload->PostAllocationConfigure();
592 if (useExternalMemoryManager)
596 AllocateAndExecuteConstantWorkloads();
600 AllocateAndExecuteConstantWorkloadsAsync();
606 for (
auto workload: ConstWorkloads)
613 void LoadedNetwork::AllocateAndExecuteConstantWorkloads()
616 for (
auto& pair : m_ConstantWorkloads)
618 auto tensorHandle = m_ConstantTensorHandles[pair.first];
619 tensorHandle->Allocate();
620 pair.second->Execute();
624 void LoadedNetwork::AllocateAndExecuteConstantWorkloadsAsync()
627 Graph& order = m_OptimizedNetwork->pOptimizedNetworkImpl->GetGraph();
628 for (
auto&& layer : order)
632 const auto& outSlot = layer->GetOutputSlots()[0];
633 const auto factoryId = outSlot.GetTensorHandleFactoryId();
635 auto& workloadFactory = GetWorkloadFactory(*layer);
637 layer->CreateTensorHandles(m_TensorHandleFactoryRegistry, workloadFactory);
638 ITensorHandle* tensorHandle = outSlot.GetOutputHandler().GetData();
640 m_ConstantTensorHandles[layer->GetGuid()] = tensorHandle;
643 auto& backend = m_Backends.at(layer->GetBackendId());
646 memDesc.
m_Outputs.push_back(tensorHandle);
648 ExecutionData executionData = backend->CreateExecutionData(memDesc);
649 m_ConstantWorkloads[layer->GetGuid()]->ExecuteAsync(executionData);
658 ProfilingGuid networkGuid = m_OptimizedNetwork->GetGuid();
660 std::unique_ptr<TimelineUtilityMethods> timelineUtils =
661 TimelineUtilityMethods::GetTimelineUtils(profilingService);
663 timelineUtils->CreateTypedEntity(networkGuid, LabelsAndEventClasses::NETWORK_GUID);
665 for (
auto&& layer : order)
668 AddLayerStructure(timelineUtils, *layer, networkGuid);
669 switch (layer->GetType())
679 for (
auto& workload : m_WorkloadQueue)
682 AddWorkloadStructure(timelineUtils, workload, *layer);
689 timelineUtils->Commit();
694 return m_OptimizedNetwork->GetGuid();
699 for (
auto&& inputLayer : m_OptimizedNetwork->pOptimizedNetworkImpl->GetGraph().GetInputLayers())
701 ARMNN_ASSERT_MSG(inputLayer->GetNumOutputSlots() == 1,
"Input layer should have exactly 1 output slot");
702 if (inputLayer->GetBindingId() == layerId)
704 return inputLayer->GetOutputSlot(0).GetTensorInfo();
713 for (
auto&& outputLayer : m_OptimizedNetwork->pOptimizedNetworkImpl->GetGraph().GetOutputLayers())
715 ARMNN_ASSERT_MSG(outputLayer->GetNumInputSlots() == 1,
"Output layer should have exactly 1 input slot");
716 ARMNN_ASSERT_MSG(outputLayer->GetInputSlot(0).GetConnection(),
"Input slot on Output layer must be connected");
717 if (outputLayer->GetBindingId() == layerId)
719 return outputLayer->GetInputSlot(0).GetConnection()->GetTensorInfo();
730 auto it = m_WorkloadFactories.find(layer.
GetBackendId());
731 if (it == m_WorkloadFactories.end())
733 throw RuntimeException(fmt::format(
"No workload factory for {0} to be used for layer: {1}",
739 workloadFactory = it->second.get();
743 return *workloadFactory;
753 : m_TensorHandle(std::move(handle))
759 ITensorHandle* GetTensorHandle()
const {
return m_TensorHandle.get(); }
764 std::unique_ptr<ITensorHandle> m_TensorHandle;
770 const std::vector<TensorPin>& pins,
771 char const* bindingPointDesc)
773 auto it = std::find_if(pins.begin(), pins.end(),
774 [id](
const TensorPin& pin)
776 return pin.GetBindingId() == id;
779 if (it != pins.end())
795 m_InputTensorPins.reserve(inputTensors.size());
796 m_OutputTensorPins.reserve(outputTensors.size());
798 for (
auto inputTensorPair : inputTensors)
800 auto inputTensor = inputTensorPair.second;
802 std::unique_ptr<ITensorHandle> tensorHandle =
803 std::make_unique<ConstPassthroughTensorHandle>(inputTensor.GetInfo(),inputTensor.GetMemoryArea());
806 m_InputTensorPins.emplace_back(std::move(tensorHandle), inputTensor.GetInfo(), layerId);
809 for (
auto outputTensorPair : outputTensors)
811 auto outputTensor = outputTensorPair.second;
813 std::unique_ptr<ITensorHandle> tensorHandle =
814 std::make_unique<PassthroughTensorHandle>(outputTensor.GetInfo(), outputTensor.GetMemoryArea());
817 m_OutputTensorPins.emplace_back(std::move(tensorHandle), outputTensor.GetInfo(), layerId);
823 return GetTensorPin(
id, m_InputTensorPins,
"input");
828 return GetTensorPin(
id, m_OutputTensorPins,
"output");
833 std::vector<TensorPin> m_InputTensorPins;
834 std::vector<TensorPin> m_OutputTensorPins;
841 std::vector<ImportedInputId> preImportedInputIds,
842 std::vector<ImportedOutputId> preImportedOutputIds)
844 const Graph& graph = m_OptimizedNetwork->pOptimizedNetworkImpl->GetGraph();
849 ARMNN_LOG(
warning) <<
"IRuntime::EnqueueWorkload()::Less than two nodes in graph";
854 WorkloadData workloadData(inputTensors, outputTensors);
858 if (graph.
GetNumInputs() != (inputTensors.size() + preImportedInputIds.size()))
866 m_InputQueue.clear();
869 unsigned int inputIndex = 0;
870 unsigned int importedInputIdIndex = 0;
871 std::sort(preImportedInputIds.begin(), preImportedInputIds.end());
874 if (importedInputIdIndex < preImportedInputIds.size() &&
875 inputIndex == preImportedInputIds[importedInputIdIndex])
878 if (!m_IsInputImported[inputIndex])
880 auto outputTensorHandle = m_PreImportedInputHandles[inputIndex].m_TensorHandle.get();
882 for (
const auto& workloadInfo: m_InputWorkloadSlotPairs[inputLayer->GetBindingId()])
884 auto workload = m_WorkloadQueue[workloadInfo.m_WorkloadIndex].get();
885 workload->ReplaceInputTensorHandle(outputTensorHandle, workloadInfo.m_SlotIndex);
887 m_IsInputImported[inputIndex] =
true;
889 importedInputIdIndex++;
893 if (m_IsInputImported[inputIndex])
897 for (
const auto& workloadInfo: m_InputWorkloadSlotPairs[inputLayer->GetBindingId()])
899 auto workload = m_WorkloadQueue[workloadInfo.m_WorkloadIndex].get();
900 workload->ReplaceInputTensorHandle(handler.
GetData(), workloadInfo.m_SlotIndex);
903 m_IsInputImported[inputIndex] =
false;
907 const TensorPin& pin = workloadData.GetInputTensorPin(inputLayer->GetBindingId());
908 EnqueueInput(*inputLayer, pin.GetTensorHandle(), pin.GetTensorInfo());
916 m_OutputQueue.clear();
924 unsigned int outputIndex = 0;
925 unsigned int importedOutputIdIndex = 0;
926 std::sort(preImportedOutputIds.begin(), preImportedOutputIds.end());
929 if (importedOutputIdIndex < preImportedOutputIds.size() &&
930 outputIndex == preImportedOutputIds[importedOutputIdIndex])
933 ITensorHandle* inputTensorHandle = m_PreImportedOutputHandles[outputIndex].m_TensorHandle.get();
935 if (!m_IsOutputImported[outputIndex])
937 const auto bindingId = outputLayer->GetBindingId();
938 const auto& indices = m_OutputWorkloadSlotPairs[bindingId];
940 auto outputWorkload = m_WorkloadQueue[indices.m_OutputSlotIndices.m_WorkloadIndex].get();
942 outputWorkload->ReplaceOutputTensorHandle(inputTensorHandle,
943 indices.m_OutputSlotIndices.m_SlotIndex);
945 for (
const auto& workloadInfo: indices.m_InputSlotIndices)
947 auto inputWorkload = m_WorkloadQueue[workloadInfo.m_WorkloadIndex].get();
948 inputWorkload->ReplaceInputTensorHandle(inputTensorHandle, workloadInfo.m_SlotIndex);
950 m_IsOutputImported[outputIndex] =
true;
953 ARMNN_ASSERT_MSG(inputTensorHandle !=
nullptr,
"Data should have been allocated.");
955 syncDesc.
m_Inputs.push_back(inputTensorHandle);
958 outputLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo());
959 auto syncWorkload = std::make_unique<SyncMemGenericWorkload>(syncDesc,
info);
961 m_OutputQueue.push_back(move(syncWorkload));
962 importedOutputIdIndex++;
966 if (m_IsOutputImported[outputIndex])
968 const auto bindingId = outputLayer->GetBindingId();
969 const auto& indices = m_OutputWorkloadSlotPairs[bindingId];
971 auto outputWorkload = m_WorkloadQueue[indices.m_OutputSlotIndices.m_WorkloadIndex].get();
973 outputLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetOutputHandler();
975 outputWorkload->ReplaceOutputTensorHandle(
976 outputHandler.
GetData(), indices.m_OutputSlotIndices.m_SlotIndex);
978 for (
const auto& workloadInfo: indices.m_InputSlotIndices)
980 auto inputWorkload = m_WorkloadQueue[workloadInfo.m_WorkloadIndex].get();
981 inputWorkload->ReplaceInputTensorHandle(outputHandler.
GetData(), workloadInfo.m_SlotIndex);
983 m_IsOutputImported[outputIndex] =
false;
986 const TensorPin& pin = workloadData.GetOutputTensorPin(outputLayer->GetBindingId());
988 EnqueueOutput(*outputLayer, pin.GetTensorHandle(), pin.GetTensorInfo());
994 std::unique_ptr<TimelineUtilityMethods> timelineUtils =
995 TimelineUtilityMethods::GetTimelineUtils(*m_ProfilingService);
996 ProfilingGuid inferenceGuid = m_ProfilingService->GetNextGuid();
1000 ProfilingGuid networkGuid = m_OptimizedNetwork->GetGuid();
1001 timelineUtils->CreateTypedEntity(inferenceGuid, LabelsAndEventClasses::INFERENCE_GUID);
1002 timelineUtils->CreateRelationship(ProfilingRelationshipType::RetentionLink,
1005 LabelsAndEventClasses::EXECUTION_OF_GUID);
1006 timelineUtils->RecordEvent(inferenceGuid, LabelsAndEventClasses::ARMNN_PROFILING_SOL_EVENT_CLASS);
1009 bool executionSucceeded =
true;
1012 if (m_ProfilingService->IsProfilingEnabled())
1014 m_ProfilingService->IncrementCounterValue(INFERENCES_RUN);
1018 executionSucceeded = Execute(timelineUtils, inferenceGuid);
1024 timelineUtils->RecordEvent(inferenceGuid, LabelsAndEventClasses::ARMNN_PROFILING_EOL_EVENT_CLASS);
1025 timelineUtils->Commit();
1038 if (tensorHandle ==
nullptr)
1046 inputQueueDescriptor.
m_Inputs.push_back(tensorHandle);
1051 const TensorInfo& outputTensorInfo = handler.GetTensorInfo();
1054 "Data should have been allocated.");
1055 inputQueueDescriptor.
m_Outputs.push_back(outputTensorHandle);
1059 bool needMemCopy =
true;
1060 if (m_NetworkProperties.m_ImportEnabled)
1062 if(
CheckFlag(importFlags, m_NetworkProperties.m_InputSource))
1064 needMemCopy =
false;
1066 void* mem = tensorHandle->
Map(
false);
1067 if (outputTensorHandle->
Import(mem, m_NetworkProperties.m_InputSource))
1069 tensorHandle->
Unmap();
1072 tensorHandle->
Unmap();
1079 std::unique_ptr<IWorkload> inputWorkload = std::make_unique<CopyMemGenericWorkload>(inputQueueDescriptor,
info);
1083 std::unique_ptr<TimelineUtilityMethods> timelineUtils =
1084 TimelineUtilityMethods::GetTimelineUtils(*m_ProfilingService);
1088 AddWorkloadStructure(timelineUtils, inputWorkload, layer);
1089 timelineUtils->Commit();
1092 m_InputQueue.push_back(move(inputWorkload));
1103 if (tensorHandle ==
nullptr)
1111 outputQueueDescriptor.
m_Outputs.push_back(tensorHandle);
1119 const TensorInfo& inputTensorInfo = outputHandler.GetTensorInfo();
1121 ARMNN_ASSERT_MSG(inputTensorHandle !=
nullptr,
"Data should have been allocated.");
1130 bool needMemCopy =
true;
1131 if (m_NetworkProperties.m_ExportEnabled &&
1132 (layer.
GetInputSlots()[0].GetConnectedOutputSlot()->GetNumConnections() == 1))
1137 if (
CheckFlag(importFlags, m_NetworkProperties.m_OutputSource))
1139 needMemCopy =
false;
1140 void *mem = tensorHandle->
Map(
false);
1141 bool importOk = inputTensorHandle->
Import(mem, m_NetworkProperties.m_OutputSource);
1142 tensorHandle->
Unmap();
1148 syncDesc.
m_Inputs.push_back(inputTensorHandle);
1150 auto syncWorkload = std::make_unique<SyncMemGenericWorkload>(syncDesc,
info);
1152 m_OutputQueue.push_back(move(syncWorkload));
1164 outputQueueDescriptor.
m_Inputs.push_back(inputTensorHandle);
1167 std::unique_ptr<IWorkload> outputWorkload =
1168 std::make_unique<CopyMemGenericWorkload>(outputQueueDescriptor,
info);
1171 std::unique_ptr<TimelineUtilityMethods> timelineUtils =
1172 TimelineUtilityMethods::GetTimelineUtils(*m_ProfilingService);
1176 AddWorkloadStructure(timelineUtils, outputWorkload, layer);
1177 timelineUtils->Commit();
1180 m_OutputQueue.push_back(move(outputWorkload));
1184 void LoadedNetwork::AllocateWorkingMemory(
1185 #
if !defined(ARMNN_DISABLE_THREADS)
1186 std::lock_guard<std::mutex>& lock
1192 #if !defined(ARMNN_DISABLE_THREADS) 1196 if (m_IsWorkingMemAllocated)
1201 if (m_ExternalMemoryManager)
1203 m_ExternalMemoryManager->Allocate();
1205 for (
unsigned int i = 0; i < m_TensorMemory.size(); ++i)
1207 m_Tensorhandles[i]->Import(m_TensorMemory[i].first->m_Data, m_TensorMemory[i].second);
1211 for (
auto&& memoryManager : m_BackendMemoryMangers)
1215 memoryManager->Acquire();
1218 m_TensorHandleFactoryRegistry.AquireMemory();
1219 m_IsWorkingMemAllocated =
true;
1224 #if !defined(ARMNN_DISABLE_THREADS) 1225 std::lock_guard<std::mutex> lockGuard(m_WorkingMemMutex);
1228 if (!m_IsWorkingMemAllocated)
1233 if (m_ExternalMemoryManager)
1235 m_ExternalMemoryManager->Deallocate();
1239 for (
auto&& memoryManager : m_BackendMemoryMangers)
1243 memoryManager->Release();
1246 m_TensorHandleFactoryRegistry.ReleaseMemory();
1247 m_IsWorkingMemAllocated =
false;
1251 ProfilingGuid inferenceGuid)
1253 bool success =
true;
1255 auto Fail = [&](
const std::exception&
error)
1257 ARMNN_LOG(error) <<
"An error occurred attempting to execute a workload: " << error.what();
1263 #if !defined(ARMNN_DISABLE_THREADS) 1264 std::lock_guard<std::mutex> lockGuard(m_WorkingMemMutex);
1265 AllocateWorkingMemory(lockGuard);
1267 AllocateWorkingMemory();
1270 ProfilingDynamicGuid workloadInferenceID(0);
1271 auto ExecuteQueue = [&timelineUtils, &workloadInferenceID, &inferenceGuid](
WorkloadQueue& queue)
1273 for (
auto& workload : queue)
1277 workloadInferenceID = timelineUtils->RecordWorkloadInferenceAndStartOfLifeEvent(workload->GetGuid(),
1280 workload->Execute();
1283 timelineUtils->RecordEndOfLifeEvent(workloadInferenceID);
1288 ExecuteQueue(m_InputQueue);
1289 ExecuteQueue(m_WorkloadQueue);
1290 ExecuteQueue(m_OutputQueue);
1296 catch (
const std::runtime_error& error)
1306 if (m_NetworkProperties.m_ImportEnabled)
1309 if (
CheckFlag(importFlags, m_NetworkProperties.m_InputSource) )
1311 std::unique_ptr<ITensorHandle> tensorHandle =
1312 std::make_unique<ConstPassthroughTensorHandle>(inputTensor.
GetInfo(),
1314 void* mem = tensorHandle->
Map(
false);
1316 if (inputTensorHandle->
Import(mem, m_NetworkProperties.m_InputSource))
1318 tensorHandle->Unmap();
1321 tensorHandle->Unmap();
1331 std::unique_ptr<ITensorHandle> tensorHandle =
1332 std::make_unique<ConstPassthroughTensorHandle>(inputTensor.
GetInfo(), inputTensor.
GetMemoryArea());
1334 auto copyFunc = [](
void* dst,
const void* src,
size_t size)
1336 memcpy(dst, src, size);
1349 void LoadedNetwork::ImportOutputTensor(
const Tensor& outputTensor,
ITensorHandle* outputTensorHandle)
1351 ARMNN_ASSERT_MSG(outputTensorHandle !=
nullptr,
"Data should have been allocated.");
1353 if (
CheckFlag(importFlags, m_NetworkProperties.m_OutputSource))
1355 std::unique_ptr<ITensorHandle> tensorHandle =
1356 std::make_unique<PassthroughTensorHandle>(outputTensor.
GetInfo(),
1359 void* mem = tensorHandle->
Map(
false);
1360 bool importOk = outputTensorHandle->
Import(mem, m_NetworkProperties.m_OutputSource);
1361 tensorHandle->Unmap();
1370 throw MemoryExportException(
"ImportOutputTensor: Memory Export failed, attempting to export Input Layer");
1377 auto copyFunc = [](
void* dst,
const void* src,
size_t size)
1379 memcpy(dst, src, size);
1382 std::unique_ptr<ITensorHandle> tensorHandle =
1383 std::make_unique<PassthroughTensorHandle>(outputTensor.
GetInfo(),
1392 for (
auto inputTensorPair : inputTensors)
1397 return inputTensorPair.second;
1405 for (
auto outputTensorPair : outputTensors)
1410 return outputTensorPair.second;
1419 if (!m_NetworkProperties.m_AsyncEnabled)
1424 throw MemoryImportException(
"ImportInputs: Memory Import failed, NetworkProperties.m_ImportEnabled");
1427 if (inputTensors.size() > m_OptimizedNetwork->pOptimizedNetworkImpl->GetGraph().GetNumInputs())
1429 throw MemoryImportException(
"ImportInputs: The number of tensors provided exceeds the number of inputs.");
1432 std::vector<ImportedInputId> importedInputs;
1434 unsigned int inputIndex = 0;
1437 auto outputTensorHandle = m_PreImportedInputHandles[inputIndex].m_TensorHandle.get();
1439 if (!outputTensorHandle)
1445 auto layerBindingId = inputLayer->GetBindingId();
1446 auto it = std::find_if(inputTensors.begin(), inputTensors.end(), [=](
const auto& inputTensor)
1448 return inputTensor.first == layerBindingId;
1451 if (it == inputTensors.end())
1457 const auto& inputTensor = *it;
1458 std::unique_ptr<ITensorHandle> passThroughTensorHandle =
1459 std::make_unique<ConstPassthroughTensorHandle>(inputTensor.second.
GetInfo(),
1464 if (outputTensorHandle->
CanBeImported(passThroughTensorHandle->Map(), forceImportMemorySource)
1465 && (outputTensorHandle->
Import(passThroughTensorHandle->Map(), forceImportMemorySource)))
1467 importedInputs.push_back(inputIndex);
1469 passThroughTensorHandle->Unmap();
1473 ARMNN_LOG(error) <<
"An error occurred attempting to import input_" 1474 << inputIndex <<
" : " << exception.
what();
1475 passThroughTensorHandle->Unmap();
1480 return importedInputs;
1485 std::vector<ImportedInputId> importedInputs;
1488 for (
auto inputTensor : inputTensors)
1490 auto layerBindingId = inputTensor.first;
1499 "ImportInputs: Memory Import failed, unknown LayerBindingId: {}", layerBindingId));
1502 const Layer* layer = *it;
1511 std::string er = backend->GetId();
1512 er +=
" does not have PreImportIOTensors capability";
1524 ImportedTensorHandlePin importedTensorHandlePin{layerBindingId,
1527 ITensorHandle* tensorHandle = importedTensorHandlePin.m_TensorHandle.get();
1532 fmt::format(
"ImportInputs: Memory Import failed, backend: " 1533 "{} does not support importing from source {}" 1534 , factoryId, m_NetworkProperties.m_InputSource));
1537 std::unique_ptr<ITensorHandle> passThroughTensorHandle =
1538 std::make_unique<ConstPassthroughTensorHandle>(inputTensor.second.
GetInfo(),
1541 if (tensorHandle->
Import(passThroughTensorHandle->Map(), forceImportMemorySource))
1543 importedInputs.push_back(m_CurImportedInputId++);
1544 passThroughTensorHandle->Unmap();
1548 passThroughTensorHandle->Unmap();
1552 m_PreImportedInputHandles.push_back(std::move(importedTensorHandlePin));
1554 return importedInputs;
1561 if (!m_NetworkProperties.m_AsyncEnabled)
1566 throw MemoryImportException(
"ImportOutputs: Memory Import failed, NetworkProperties.m_ImportEnabled");
1569 if (outputTensors.size() != m_OptimizedNetwork->pOptimizedNetworkImpl->GetGraph().GetNumOutputs())
1573 std::vector<ImportedOutputId> importedOutputs;
1576 unsigned int outputIndex = 0;
1579 auto inputTensorHandle = m_PreImportedOutputHandles[outputIndex].m_TensorHandle.get();
1580 if (!inputTensorHandle)
1586 auto layerBindingId = outputLayer->GetBindingId();
1587 auto it = std::find_if(outputTensors.begin(), outputTensors.end(), [=] (
const auto& outputTensor)
1589 return outputTensor.first == layerBindingId;
1592 if (it == outputTensors.end())
1598 const auto outputTensor = *it;
1603 && inputTensorHandle->
Import(outputTensor.second.
GetMemoryArea(), forceImportMemorySource))
1605 importedOutputs.push_back(outputIndex);
1610 ARMNN_LOG(error) <<
"An error occurred attempting to import output_" 1611 << outputIndex <<
" : " << exception.
what();
1615 return importedOutputs;
1618 std::vector<ImportedOutputId> importedOutputs;
1621 for (
const auto& outputTensor : outputTensors)
1623 auto layerBindingId = outputTensor.first;
1631 throw MemoryImportException(fmt::format(
"ImportOutputs: Memory Import failed, unknown LayerBindingId: {}",
1635 const Layer* layer = *it;
1644 std::string er = backend->GetId();
1645 er +=
" does not have PreImportIOTensors capability";
1656 ImportedTensorHandlePin importedTensorHandlePin{layerBindingId,
1659 ITensorHandle* tensorHandle = importedTensorHandlePin.m_TensorHandle.get();
1664 "{} does not support importing from source {}" 1665 , factoryId, forceImportMemorySource));
1670 importedOutputs.push_back(m_CurImportedOutputId++);
1677 m_PreImportedOutputHandles.push_back(std::move(importedTensorHandlePin));
1680 return importedOutputs;
1685 for (
auto id : inputIds)
1687 if (
id > m_PreImportedInputHandles.size())
1692 auto& importedTensorHandle = m_PreImportedInputHandles[id].m_TensorHandle;
1693 if (!importedTensorHandle)
1696 fmt::format(
"ClearImportedInputs::ImportedInput with id: {} has already been deleted",
id));
1699 importedTensorHandle->Unimport();
1700 importedTensorHandle = {};
1706 for (
auto id : outputIds)
1708 if (
id > m_PreImportedOutputHandles.size())
1713 auto& importedTensorHandle = m_PreImportedOutputHandles[id].m_TensorHandle;
1714 if (!importedTensorHandle)
1717 fmt::format(
"ClearImportedOutputs::ImportedOutput with id: {} has already been deleted",
id));
1720 importedTensorHandle->Unimport();
1721 importedTensorHandle = {};
1728 std::vector<ImportedInputId> preImportedInputs,
1729 std::vector<ImportedOutputId> preImportedOutputs)
1731 const Graph& graph = m_OptimizedNetwork->pOptimizedNetworkImpl->GetGraph();
1733 if (inputTensors.size() + preImportedInputs.size() != graph.
GetNumInputs())
1735 if (preImportedInputs.empty())
1742 "Number of inputs + preImportedInputs provided does not match network.");
1746 if (outputTensors.size() + preImportedOutputs.size() != graph.
GetNumOutputs())
1748 if (preImportedOutputs.empty())
1751 "Number of outputs provided does not match network.");
1756 "Number of outputs + preImportedOutputs provided does not match network.");
1763 unsigned int index = 0;
1764 for (
auto pair : inputTensors)
1766 bindingIds[index++] = pair.first;
1770 bindingIds[index++] = ValidateImportedInputID(
id);
1772 for (
auto pair : outputTensors)
1774 bindingIds[index++] = pair.first;
1778 bindingIds[index++] = ValidateImportedOutputID(
id);
1783 auto resetMemHandle = [&]()
1787 const LayerBindingId layerBindingId = m_PreImportedInputHandles[id].m_LayerBindingId;
1789 auto inputHandle = workingMemHandle.
GetInputHandle(layerBindingId);
1791 for (
auto it : inputConnections)
1799 const LayerBindingId layerBindingId = m_PreImportedOutputHandles[id].m_LayerBindingId;
1804 for (
auto it : outputConnections)
1811 std::unique_ptr<TimelineUtilityMethods> timelineUtils =
1812 TimelineUtilityMethods::GetTimelineUtils(*m_ProfilingService);
1813 ProfilingGuid inferenceGuid = m_ProfilingService->GetNextGuid();
1817 ProfilingGuid networkGuid = m_OptimizedNetwork->GetGuid();
1818 timelineUtils->CreateTypedEntity(inferenceGuid,LabelsAndEventClasses::INFERENCE_GUID);
1819 timelineUtils->CreateRelationship(ProfilingRelationshipType::RetentionLink,
1822 LabelsAndEventClasses::EXECUTION_OF_GUID);
1823 timelineUtils->RecordEvent(inferenceGuid,LabelsAndEventClasses::ARMNN_PROFILING_SOL_EVENT_CLASS);
1826 bool executionSucceeded =
true;
1831 timelineUtils->RecordEvent(inferenceGuid,LabelsAndEventClasses::ARMNN_PROFILING_EOL_EVENT_CLASS);
1832 timelineUtils->Commit();
1842 for (
auto pair : inputTensors)
1844 EnqueueInput(pair.second, workingMemHandle.
GetInputHandle(pair.first));
1850 const ImportedTensorHandlePin& importedInputPin = m_PreImportedInputHandles[id];
1851 const LayerBindingId layerBindingId = m_PreImportedInputHandles[id].m_LayerBindingId;
1852 const auto& preimportedHandle = importedInputPin.m_TensorHandle;
1855 for (
auto it : inputConnections)
1857 *it = preimportedHandle.get();
1863 if (m_NetworkProperties.m_ExportEnabled)
1865 for (
auto pair: outputTensors)
1867 ImportOutputTensor(pair.second, workingMemHandle.
GetOutputHandle(pair.first));
1873 const ImportedTensorHandlePin& importedOutputPin = m_PreImportedOutputHandles[id];
1874 const LayerBindingId layerBindingId = m_PreImportedOutputHandles[id].m_LayerBindingId;
1875 const auto& preimportedHandle = importedOutputPin.m_TensorHandle;
1878 for (
auto it : outputConnections)
1880 *it = preimportedHandle.get();
1885 auto Fail = [&](
const std::exception&
error)
1887 ARMNN_LOG(error) <<
"An error occurred attempting to execute a workload: " << error.what();
1888 executionSucceeded =
false;
1890 ProfilingDynamicGuid workloadInferenceID(0);
1894 for (
unsigned int i = 0; i < m_WorkloadQueue.size(); ++i)
1896 auto& workload = m_WorkloadQueue[i];
1899 workloadInferenceID = timelineUtils->RecordWorkloadInferenceAndStartOfLifeEvent(workload->GetGuid(),
1907 timelineUtils->RecordEndOfLifeEvent(workloadInferenceID);
1916 catch (
const std::runtime_error& error)
1927 if (!m_NetworkProperties.m_ExportEnabled)
1929 for (
auto pair: outputTensors)
1949 Graph& order = m_OptimizedNetwork->pOptimizedNetworkImpl->GetGraph();
1952 std::vector<std::unique_ptr<ITensorHandle>> managedTensorHandles;
1954 std::vector<std::unique_ptr<ITensorHandle>> unmanagedTensorHandles;
1956 std::vector<WorkingMemDescriptor> workingMemDescriptors;
1957 std::vector<std::pair<BackendId, ExecutionData>> executionDataVec;
1959 auto GetTensorHandle = [&](
Layer* layer,
const OutputSlot& outputSlot)
1962 const TensorInfo& tensorInfo = outputSlot.GetTensorInfo();
1968 return m_WorkloadFactories.at(
id)->CreateTensorHandle(tensorInfo,
false);
1983 bool m_IsInputLayerHandle =
false;
1984 bool m_IsOutputLayerHandle =
false;
1990 std::unordered_map<const OutputSlot*, HandleInfo> outputToHandleInfoMap;
1992 unsigned int layerIndex = 0;
1993 for (
auto&& layer : order)
2003 bool isMemoryManaged =
true;
2004 bool isInputLayer =
false;
2005 bool isOutputLayer =
false;
2006 bool isConnectedToOutputLayer =
false;
2012 isInputLayer =
true;
2013 isMemoryManaged = !m_NetworkProperties.m_ImportEnabled;
2017 isOutputLayer =
true;
2020 unsigned int slotIndex = 0;
2025 for (
unsigned int i = 0; i < slot.GetNumConnections(); ++i)
2029 if (!isConnectedToOutputLayer)
2031 isConnectedToOutputLayer =
true;
2033 isMemoryManaged = !m_NetworkProperties.m_ExportEnabled;
2039 fmt::format(
"Layer name: '{0}' guid: '{1}' has two or more OutputLayers connected to it. " 2040 "This will prevent importing on the connected OutputLayers.",
2042 isMemoryManaged =
true;
2048 if (isMemoryManaged)
2050 managedTensorHandles.emplace_back(GetTensorHandle(layer, slot));
2051 tensorHandle = managedTensorHandles.back().get();
2055 unmanagedTensorHandles.emplace_back(GetTensorHandle(layer, slot));
2056 tensorHandle = unmanagedTensorHandles.back().get();
2059 workingMemDescriptor.
m_Outputs.push_back(tensorHandle);
2061 HandleInfo& handleInfo = outputToHandleInfoMap[&slot];
2062 handleInfo.m_TensorHandle = tensorHandle;
2065 if (isConnectedToOutputLayer)
2067 handleInfo.m_IsOutputLayerHandle =
true;
2068 handleInfo.m_OutputMemDescriptorCoords.m_OutputSlotCoords = {layerIndex, slotIndex};
2073 handleInfo.m_IsInputLayerHandle =
true;
2075 handleInfo.m_InputMemDescriptorCoords.m_LayerBindingId = bindingId;
2086 auto outputSlot = slot.GetConnectedOutputSlot();
2087 auto key = outputSlot->GetOwningLayer().GetGuid();
2090 auto found = m_ConstantTensorHandles.find(key);
2091 if (found != m_ConstantTensorHandles.end())
2094 workingMemDescriptor.
m_Inputs.push_back(tensorHandle);
2102 HandleInfo& handleInfo = outputToHandleInfoMap[outputSlot];
2103 handleInfo.m_TensorHandle = tensorHandle;
2104 handleInfo.m_IsOutputLayerHandle =
true;
2105 handleInfo.m_OutputMemDescriptorCoords.m_LayerBindingIds.push_back(bindingId);
2106 handleInfo.m_OutputMemDescriptorCoords.m_InputSlotCoords.push_back({layerIndex, 0});
2111 HandleInfo& handleInfo = outputToHandleInfoMap.at(outputSlot);
2113 ITensorHandle* inputTensorHandle = handleInfo.m_TensorHandle;
2114 workingMemDescriptor.
m_Inputs.push_back(inputTensorHandle);
2120 handleInfo.m_OutputMemDescriptorCoords.m_LayerBindingIds.push_back(bindingId);
2121 handleInfo.m_OutputMemDescriptorCoords.m_InputSlotCoords.push_back({layerIndex, 0});
2125 else if (handleInfo.m_IsOutputLayerHandle)
2127 handleInfo.m_OutputMemDescriptorCoords.m_InputSlotCoords.push_back({layerIndex, slot.GetSlotIndex()});
2132 if (handleInfo.m_IsInputLayerHandle)
2134 std::pair<LayerGuid, unsigned int> connectionLocation{layerIndex, slot.GetSlotIndex()};
2135 handleInfo.m_InputMemDescriptorCoords.m_InputSlotCoords.emplace_back(connectionLocation);
2145 std::pair<BackendId, ExecutionData> dataPair;
2148 executionDataVec.push_back(dataPair);
2149 workingMemDescriptors.push_back(workingMemDescriptor);
2155 std::vector<std::pair<std::shared_ptr<TensorMemory>,
MemorySource>> tensorMemory;
2157 auto externalMemoryManager = CreateExternalMemoryManger(tensorMemory);
2160 std::sort(tensorMemory.begin(), tensorMemory.end(),
2161 [](
const std::pair<std::shared_ptr<TensorMemory>,
MemorySource>& lhs,
2162 const std::pair<std::shared_ptr<TensorMemory>,
MemorySource>& rhs)
2164 return lhs.first->m_OutputSlotId < rhs.first->m_OutputSlotId;
2167 std::vector<WorkingMemHandle::InputMemDescriptorCoords> inputConnectionsInfo;
2168 std::vector<WorkingMemHandle::OutputMemDescriptorCoords> outputConnectionsInfo;
2170 for (
const auto& handleInfo: outputToHandleInfoMap)
2172 if (handleInfo.second.m_IsOutputLayerHandle)
2174 outputConnectionsInfo.emplace_back(handleInfo.second.m_OutputMemDescriptorCoords);
2177 if (handleInfo.second.m_IsInputLayerHandle)
2179 inputConnectionsInfo.emplace_back(handleInfo.second.m_InputMemDescriptorCoords);
2183 return std::make_unique<WorkingMemHandle>(networkId,
2184 inputConnectionsInfo,
2185 outputConnectionsInfo,
2186 workingMemDescriptors,
2187 std::move(externalMemoryManager),
2188 std::move(tensorMemory),
2189 std::move(managedTensorHandles),
2190 std::move(unmanagedTensorHandles),
2197 for (
auto&& workloadPtr: m_WorkloadQueue)
2199 workloadPtr.get()->RegisterDebugCallback(func);
2204 void LoadedNetwork::CreateMemoryProfileAsync()
2208 unsigned int m_StartOfLife;
2209 unsigned int m_Lifetime;
2212 unsigned int m_Index;
2217 auto align = [](
size_t numToAlign)
2219 const size_t alignment =
sizeof(float);
2220 return ((numToAlign + alignment - 1) / alignment) * alignment;
2223 std::unordered_map<const OutputSlot*, PartialBlock> memBlockTrackerMap;
2228 unsigned int timestep = 0;
2229 unsigned int outputIndex = 0;
2232 for (
auto&& layer : order)
2256 if (!m_SupportsExternallyManagedMemory[backendId])
2261 PartialBlock partialBlock;
2263 partialBlock.m_StartOfLife = timestep;
2265 size_t alignedSize = align(outputSlot.GetOutputHandler().GetTensorInfo().GetNumBytes());
2266 partialBlock.m_MemSize = alignedSize;
2267 partialBlock.m_Index = outputIndex++;
2268 partialBlock.m_Lifetime = outputSlot.GetNumConnections();
2269 partialBlock.m_BackendId = backendId;
2271 if (partialBlock.m_Lifetime == 0)
2273 m_MemBlockMap[partialBlock.m_BackendId].emplace_back(partialBlock.m_StartOfLife,
2274 partialBlock.m_StartOfLife,
2275 partialBlock.m_MemSize,
2277 partialBlock.m_Index);
2281 memBlockTrackerMap[&outputSlot] = partialBlock;
2287 const Layer& connectedInputLayer = inputSlot.GetConnectedOutputSlot()->GetOwningLayer();
2299 auto outputSlot = inputSlot.GetConnectedOutputSlot();
2301 PartialBlock& partialBlock = memBlockTrackerMap.at(outputSlot);
2303 auto& lifetime = partialBlock.m_Lifetime;
2308 m_MemBlockMap[partialBlock.m_BackendId].emplace_back(partialBlock.m_StartOfLife,
2310 partialBlock.m_MemSize,
2312 partialBlock.m_Index);
2319 void LoadedNetwork::CreateMemoryProfile()
2323 auto TraceSubTensorHandleAncestry = [](
ITensorHandle*
const subTensorHandle)
2326 while (ancestor && ancestor->
GetParent())
2335 unsigned int m_StartOfLife;
2336 unsigned int m_Lifetime;
2339 unsigned int m_Index;
2344 auto align = [](
size_t numToAlign)
2346 const size_t alignment =
sizeof(float);
2347 return ((numToAlign + alignment - 1) / alignment) * alignment;
2350 std::unordered_map<ITensorHandle*, PartialBlock> memBlockTrackerMap;
2355 unsigned int timestep = 0;
2356 unsigned int outputIndex = 0;
2359 for (
auto&& layer : order)
2383 if (!m_SupportsExternallyManagedMemory[backendId])
2388 ITensorHandle* tensorHandle = outputSlot.GetOutputHandler().GetData();
2389 tensorHandle = TraceSubTensorHandleAncestry(tensorHandle);
2391 if (memBlockTrackerMap.find(tensorHandle) == memBlockTrackerMap.end())
2393 PartialBlock partialBlock;
2395 partialBlock.m_StartOfLife = timestep;
2397 size_t alignedSize = align(outputSlot.GetOutputHandler().GetTensorInfo().GetNumBytes());
2398 partialBlock.m_MemSize = alignedSize;
2399 partialBlock.m_Index = outputIndex++;
2400 partialBlock.m_Lifetime = outputSlot.GetNumConnections();
2401 partialBlock.m_BackendId = backendId;
2403 if (partialBlock.m_Lifetime == 0)
2405 m_MemBlockMap[partialBlock.m_BackendId].emplace_back(partialBlock.m_StartOfLife,
2406 partialBlock.m_StartOfLife,
2407 partialBlock.m_MemSize,
2409 partialBlock.m_Index);
2413 memBlockTrackerMap[tensorHandle] = partialBlock;
2415 m_Tensorhandles.push_back(tensorHandle);
2420 memBlockTrackerMap.at(tensorHandle).m_Lifetime += outputSlot.GetNumConnections();
2426 const Layer& connectedInputLayer = inputSlot.GetConnectedOutputSlot()->GetOwningLayer();
2437 if (!m_SupportsExternallyManagedMemory[connectedInputLayer.
GetBackendId()])
2442 auto outputSlot = inputSlot.GetConnectedOutputSlot();
2444 ITensorHandle* tensorHandle = outputSlot->GetOutputHandler().GetData();
2445 tensorHandle = TraceSubTensorHandleAncestry(tensorHandle);
2447 PartialBlock& partialBlock = memBlockTrackerMap.at(tensorHandle);
2449 auto& lifetime = partialBlock.m_Lifetime;
2454 m_MemBlockMap[partialBlock.m_BackendId].emplace_back(partialBlock.m_StartOfLife,
2456 partialBlock.m_MemSize,
2458 partialBlock.m_Index);
2466 std::unique_ptr<MemoryManager> LoadedNetwork::CreateExternalMemoryManger(
2467 std::vector<std::pair<std::shared_ptr<TensorMemory>,
MemorySource>>& tensorMemoryVec)
2469 std::unique_ptr<MemoryManager> memoryManager = std::make_unique<MemoryManager>();
2472 for (
auto& backend : m_MemBinMap)
2474 std::vector<BufferStorage> bufferStorageVec;
2476 std::shared_ptr<ICustomAllocator> backendAllocator;
2477 if (allocatorMap.find(backend.first) != allocatorMap.end())
2479 backendAllocator = allocatorMap[backend.first];
2483 backendAllocator = m_Backends[backend.first]->GetDefaultAllocator();
2486 for (
auto& memBin : backend.second)
2492 for (
auto& memBlock : memBin.m_MemBlocks)
2494 auto tensorMemory = std::make_shared<TensorMemory>(
TensorMemory{memBlock.
m_Offset, memBlock.m_Index});
2496 tensorMemoryVec.emplace_back(tensorMemory, backendAllocator->GetMemorySourceType());
2500 bufferStorageVec.emplace_back(std::move(bufferStorage));
2503 memoryManager->StoreMemToAllocate(bufferStorageVec, backendAllocator, 4);
2506 return memoryManager;
2513 const auto& importedTensorHandlePin = m_PreImportedInputHandles.at(
id);
2514 if (!importedTensorHandlePin.m_TensorHandle)
2517 "PreImportedInput: {} has been deleted",
id));
2519 return importedTensorHandlePin.m_LayerBindingId;
2521 catch (
const std::out_of_range&)
2531 const auto& importedTensorHandlePin = m_PreImportedOutputHandles.at(
id);
2532 if (!importedTensorHandlePin.m_TensorHandle)
2535 "PreImportedOutput: {} has been deleted",
id));
2537 return importedTensorHandlePin.m_LayerBindingId;
2539 catch (
const std::out_of_range&)
Status Execute(const InputTensors &inputTensors, const OutputTensors &outputTensors, IWorkingMemHandle &workingMemHandle, std::vector< ImportedInputId > preImportedInputs={}, std::vector< ImportedOutputId > preImportedOutputs={})
Thread safe execution of the loaded network.
std::vector< std::shared_ptr< TensorMemory > > m_TensorMemoryVector
Vector of pointer to .
std::unique_ptr< IWorkingMemHandle > CreateWorkingMemHandle(NetworkId networkId)
Create a new unique WorkingMemHandle object.
bool HasCapability(const std::string &name, const BackendCapabilities &capabilities)
Convenience function to check if a capability exists in a BackendCapabilites struct.
const MemorySource m_InputSource
virtual bool Import(void *memory, MemorySource source)
Import externally allocated memory.
FactoryFunction GetFactory(const BackendId &id) const
std::unique_ptr< IWorkloadFactory > IWorkloadFactoryPtr
unsigned int GetNumInputSlots() const override
Returns the number of connectable input slots.
static ProfilerManager & GetInstance()
#define ARMNN_NO_DEPRECATE_WARN_BEGIN
virtual IMemoryManagerUniquePtr CreateMemoryManager() const
LayerBindingId GetBindingId() const
virtual unsigned int GetImportFlags() const
Get flags describing supported import sources.
const bool m_AsyncEnabled
const armnn::Tensor GetOutputTensor(const LayerBindingId layerId, const OutputTensors &outputTensors)
MemoryOptimizerStrategiesMapRef GetMemoryOptimizerStrategies()
unsigned int ImportedOutputId
std::pair< BackendId, ExecutionData > & GetExecutionDataAt(unsigned int id) override
Get the ExecutionData at an index.
size_t m_Offset
Number of bytes the value is away from the .m_Buffer.
virtual void Allocate()=0
Indicate to the memory manager that this resource is no longer active.
virtual const char * what() const noexcept override
TensorInfo GetInputTensorInfo(LayerBindingId layerId) const
#define ARMNN_LOG(severity)
size_t m_BufferSize
Total size of the buffer.
ITensorHandle * GetOutputHandle(LayerBindingId layerBindingId) const
BackendRegistry & BackendRegistryInstance()
std::vector< std::pair< LayerBindingId, class ConstTensor > > InputTensors
const ProfilingDetailsMethod m_OutputNetworkDetailsMethod
unsigned int MemorySourceFlags
MemoryType GetMemoryArea() const
size_t GetNumOutputs() const
void CopyToOutputTensor(const Tensor &outputTensor, ITensorHandle *outputTensorHandle)
TensorInfo GetOutputTensorInfo(LayerBindingId layerId) const
Copyright (c) 2021 ARM Limited and Contributors.
void IgnoreUnused(Ts &&...)
const std::vector< InputSlot > & GetInputSlots() const
std::vector< ImportedInputId > ImportInputs(const InputTensors &inputTensors, MemorySource forceImportMemorySource=MemorySource::Undefined)
std::function< void(LayerGuid guid, unsigned int slotIndex, ITensorHandle *tensorHandle)> DebugCallbackFunction
Define the type of callback for the Debug layer to call.
unsigned int GetNumOutputSlots() const override
Returns the number of connectable output slots.
int LayerBindingId
Type of identifiers for bindable layers (inputs, outputs).
#define ARMNN_SCOPED_PROFILING_EVENT(backendId, name)
virtual const BackendId & GetId() const =0
ConstIteratorOutputs begin() const
A tensor defined by a TensorInfo (shape and data type) and a mutable backing store.
virtual IWorkloadFactoryPtr CreateWorkloadFactory(const IMemoryManagerSharedPtr &memoryManager=nullptr) const =0
std::vector< ITensorHandle * > m_Inputs
unsigned int GetNumConnections() const override
const InputSlot & GetInputSlot(unsigned int index) const override
Get a const input slot handle by slot index.
std::vector< TensorInfo > m_InputTensorInfos
const std::vector< std::vector< ITensorHandle * >::iterator > & GetOutputConnection(LayerBindingId layerBindingId) const
void ValidateBindingIds()
#define ARMNN_NO_DEPRECATE_WARN_END
#define ARMNN_ASSERT_MSG(COND, MSG)
bool SupportsTensorAllocatorAPI() const
#define ARMNN_SCOPED_HEAP_PROFILING(TAG)
A tensor defined by a TensorInfo (shape and data type) and an immutable backing store.
virtual ITensorHandle * GetParent() const =0
Get the parent tensor if this is a subtensor.
std::vector< std::pair< LayerBindingId, class Tensor > > OutputTensors
const std::string & GetNameStr() const
LayerType GetType() const override
Returns the armnn::LayerType of this layer.
void SendNetworkStructure(arm::pipe::IProfilingService &profilingService)
const std::vector< std::vector< ITensorHandle * >::iterator > & GetInputConnections(LayerBindingId layerBindingId) const
#define ARMNN_ASSERT(COND)
void ClearImportedInputs(const std::vector< ImportedInputId > inputIds)
std::vector< TensorInfo > m_OutputTensorInfos
void SetLayersOutOfOrder()
ITensorHandle * GetData() const
Gets the allocated tensor memory.
std::vector< std::unique_ptr< IWorkload > > WorkloadQueue
const TensorInfo & GetInfo() const
const BackendId & GetBackendId() const
void Allocate() override
Allocate the backing memory required for execution.
void ValidateSourcesMatchOptimizedNetwork(std::vector< BackendOptions > optimizedOptions, const INetworkProperties &networkProperties)
This function performs a sanity check to ensure that the combination of input and output memory sourc...
arm::pipe::ProfilingGuid GetNetworkGuid()
const std::vector< OutputSlot > & GetOutputSlots() const
virtual bool CanBeImported(void *memory, MemorySource source)
Implementations must determine if this memory block can be imported.
OutputLayersAccessor GetOutputLayers() const
Returns a wrapper object with begin(), end() methods to iterate over the output layers in a range-bas...
unsigned int ImportedInputId
Struct for the users to pass backend specific options.
Status EnqueueWorkload(const InputTensors &inputTensors, const OutputTensors &outputTensors, std::vector< ImportedInputId > preImportedInputIds={}, std::vector< ImportedOutputId > preImportedOutputIds={})
Single thread execution of the loaded network.
void RegisterProfiler(IProfiler *profiler)
virtual const void * Map(bool blocking=true) const =0
Map the tensor data for access.
std::vector< LayerBindingId > & GetBindingIdVector()
std::unordered_map< BackendId, std::shared_ptr< ICustomAllocator > > GetAllocators()
virtual BackendCapabilities GetCapabilities() const
Returns a BackendCapability if the backend lists the capability The BackendCapability must then be in...
virtual void Unmap() const =0
Unmap the tensor data.
bool AsBool() const
Value getters.
std::string GetName() const
bool IsAllocated() override
IsAllocated returns true if the backing memory is currently allocated.
std::vector< ITensorHandle * > m_Outputs
Base class for all ArmNN exceptions so that users can filter to just those.
const OutputHandler & GetOutputHandler(unsigned int i=0) const
std::vector< ImportedOutputId > ImportOutputs(const OutputTensors &outputTensors, MemorySource forceImportMemorySource=MemorySource::Undefined)
MemorySource
Define the Memory Source to reduce copies.
const std::string & Get() const
void RegisterDebugCallback(const DebugCallbackFunction &func)
ConstIteratorOutputs end() const
std::vector< ITensorHandle * > m_Outputs
std::enable_if_t< std::is_unsigned< Source >::value &&std::is_unsigned< Dest >::value, Dest > numeric_cast(Source source)
const MemorySource m_OutputSource
Contains information about TensorInfos of a layer.
const char * GetName() const override
Returns the name of the layer.
ITensorHandleFactory::FactoryId GetTensorHandleFactoryId() const
bool CheckFlag(MemorySourceFlags flags, MemorySource source)
void CopyTensorContentsGeneric(const ITensorHandle *srcTensor, ITensorHandle *dstTensor, CopyFunc copy)
Graph & TopologicalSort()
Sorts layers in topological order and return this.
InputLayersAccessor GetInputLayers() const
Returns a wrapper object with begin(), end() methods to iterate over the input layers in a range-base...
std::vector< ITensorHandle * > m_Inputs
size_t GetNumLayers() const
const armnn::ConstTensor GetInputTensor(const LayerBindingId layerId, const InputTensors &inputTensors)
const TensorInfo & GetTensorInfo() const override
static std::unique_ptr< LoadedNetwork > MakeLoadedNetwork(std::unique_ptr< IOptimizedNetwork > net, std::string &errorMessage, const INetworkProperties &networkProperties, arm::pipe::IProfilingService *profilingService)
ITensorHandle * GetInputHandle(LayerBindingId layerBindingId) const
size_t GetNumInputs() const
virtual std::unique_ptr< ITensorHandle > CreateTensorHandle(const TensorInfo &tensorInfo) const =0
static const FactoryId LegacyFactoryId
const bool m_ProfilingEnabled
void ClearImportedOutputs(const std::vector< ImportedOutputId > outputIds)
const TensorInfo & GetTensorInfo(const ITensorHandle *tensorHandle)
float32 helpers
LayerType
When adding a new layer, adapt also the LastLayer enum value in the enum class LayerType below...
LayerGuid GetGuid() const final
Returns the unique id of the layer.