29 #include <common/include/Processes.hpp>
31 #include <fmt/format.h>
42 template <
typename ExceptionType>
43 std::string ToErrorMessage(
const char * prefix,
const ExceptionType & error)
46 ss << prefix <<
" " <<
error.what();
50 void AddLayerStructure(std::unique_ptr<TimelineUtilityMethods>& timelineUtils,
52 ProfilingGuid networkGuid)
55 std::string layerName = layer.GetNameStr().empty() ?
"<Unnamed>" : layer.GetNameStr();
56 timelineUtils->CreateNamedTypedChildEntity(layer.GetGuid(),
59 LabelsAndEventClasses::LAYER_GUID);
60 for (
auto&& input : layer.GetInputSlots())
62 const IOutputSlot* source = input.GetConnectedOutputSlot();
64 timelineUtils->CreateConnectionRelationship(ProfilingRelationshipType::RetentionLink,
65 source->GetOwningLayerGuid(),
70 void AddWorkloadStructure(std::unique_ptr<TimelineUtilityMethods>& timelineUtils,
71 std::unique_ptr<IWorkload>& workload,
75 timelineUtils->CreateTypedEntity(workload->GetGuid(), LabelsAndEventClasses::WORKLOAD_GUID);
76 timelineUtils->MarkEntityWithLabel(workload->GetGuid(),
77 layer.GetBackendId().Get(),
78 LabelsAndEventClasses::BACKENDID_GUID);
81 timelineUtils->CreateRelationship(ProfilingRelationshipType::RetentionLink,
84 LabelsAndEventClasses::CHILD_GUID);
103 const vector<BackendOptions>::iterator& backendItr =
104 find_if(optimizedOptions.begin(), optimizedOptions.end(), [](
const BackendOptions& backend) {
105 if (backend.GetBackendId().Get() ==
"Global")
114 bool importEnabled =
false;
115 bool exportEnabled =
false;
116 if (backendItr != optimizedOptions.end())
119 for (
size_t i = 0; i < backendItr->GetOptionCount(); i++)
121 const BackendOptions::BackendOption& option = backendItr->GetOption(i);
122 if (option.GetName() ==
"ImportEnabled")
124 importEnabled = option.GetValue().AsBool();
126 if (option.GetName() ==
"ExportEnabled")
128 exportEnabled = option.GetValue().AsBool();
138 auto message = fmt::format(
"The input memory source specified, '{0}',", networkProperties.m_InputSource);
141 message.append(
" requires that memory import be enabled. However, "
142 "it was disabled when this network was optimized.");
146 message.append(
" requires that memory import be disabled. However, "
147 "it was enabled when this network was optimized.");
149 throw InvalidArgumentException(message);
155 auto message = fmt::format(
"The output memory source specified, '{0}',", networkProperties.m_OutputSource);
158 message.append(
" requires that memory export be enabled. However, "
159 "it was disabled when this network was optimized.");
163 message.append(
" requires that memory export be disabled. However, "
164 "it was enabled when this network was optimized.");
166 throw InvalidArgumentException(message);
171 std::string& errorMessage,
173 arm::pipe::IProfilingService* profilingService)
175 std::unique_ptr<LoadedNetwork> loadedNetwork;
177 auto Fail = [&](
const std::exception&
error) -> std::unique_ptr<LoadedNetwork>
179 errorMessage = ToErrorMessage(
"An error occurred when preparing the network workloads: ",
error);
182 return std::unique_ptr<LoadedNetwork>();
187 loadedNetwork.reset(
new LoadedNetwork(std::move(net), networkProperties, profilingService));
197 catch (
const std::runtime_error&
error)
202 return loadedNetwork;
205 LoadedNetwork::LoadedNetwork(std::unique_ptr<IOptimizedNetwork> net,
207 arm::pipe::IProfilingService* profilingService) :
208 m_OptimizedNetwork(
std::move(net)),
209 m_NetworkProperties(networkProperties),
210 m_TensorHandleFactoryRegistry(),
211 m_ProfilingService(profilingService)
215 const std::shared_ptr<IProfiler>& profiler = m_OptimizedNetwork->GetProfiler();
225 m_NetworkProperties);
232 bool useExternalMemoryManager =
false;
233 bool useInternalMemoryManager =
false;
234 Graph& order = m_OptimizedNetwork->pOptimizedNetworkImpl->GetGraph();
241 m_IsInputImported = std::vector<bool>(order.
GetNumInputs(),
false);
242 m_IsOutputImported = std::vector<bool>(order.
GetNumOutputs(),
false);
245 for (
auto&& layer : order)
247 auto const& backendId = layer->GetBackendId();
248 if (m_Backends.count(backendId) == 0)
251 auto it = m_Backends.emplace(std::make_pair(backendId, createBackend()));
253 IBackendInternal* backend = it.first->second.get();
258 if (!
HasCapability(BackendOptions::BackendOption{
"AsyncExecution",
true}, backend->GetCapabilities()))
260 std::string er = backend->GetId();
261 er +=
" does not support AsyncExecution";
262 throw BackendCapabilityException(er);
264 if (!
HasCapability(BackendOptions::BackendOption{
"ExternallyManagedMemory",
true},
265 backend->GetCapabilities()))
267 std::string er = backend->GetId();
268 er +=
" does not support ExternallyManagedMemory\n";
269 er +=
"AsyncEnabled networks require all backends to support ExternallyManagedMemory";
270 throw BackendCapabilityException(er);
272 m_SupportsExternallyManagedMemory[backend->GetId()] =
true;
273 useExternalMemoryManager =
true;
277 m_SupportsExternallyManagedMemory[backend->GetId()] =
false;
278 useInternalMemoryManager =
true;
282 if (backend->SupportsTensorAllocatorAPI())
284 workloadFactory = backend->CreateWorkloadFactory(
285 m_TensorHandleFactoryRegistry,
286 m_OptimizedNetwork->pOptimizedNetworkImpl->GetModelOptions(),
292 m_BackendMemoryMangers.emplace_back(backend->CreateMemoryManager());
293 workloadFactory = backend->CreateWorkloadFactory(
294 m_BackendMemoryMangers.back(), m_OptimizedNetwork->pOptimizedNetworkImpl->GetModelOptions());
296 m_WorkloadFactories[backendId ] = std::move(workloadFactory);
302 for (
auto&& layer : order)
304 auto& workloadFactory = GetWorkloadFactory(*layer);
305 bool supportsExternalManager = m_SupportsExternallyManagedMemory[layer->GetBackendId()];
307 switch (layer->GetType())
314 layer->CreateTensorHandles(m_TensorHandleFactoryRegistry,
321 layer->CreateTensorHandles(m_TensorHandleFactoryRegistry, workloadFactory,
true);
328 if ((layer->GetNumOutputSlots() == 1) &&
329 (layer->GetOutputSlots()[0].GetNumConnections() == 1) &&
330 (layer->GetOutputSlots()[0].GetConnection(0)->GetOwningLayer().GetType() ==
LayerType::Output))
332 layer->CreateTensorHandles(m_TensorHandleFactoryRegistry,
338 layer->CreateTensorHandles(m_TensorHandleFactoryRegistry,
340 !supportsExternalManager);
347 ProfilingGuid networkGuid = m_OptimizedNetwork->GetGuid();
348 std::unique_ptr<TimelineUtilityMethods> timelineUtils =
349 TimelineUtilityMethods::GetTimelineUtils(*m_ProfilingService);
352 timelineUtils->CreateTypedEntity(networkGuid, LabelsAndEventClasses::NETWORK_GUID);
354 timelineUtils->RecordEvent(networkGuid, LabelsAndEventClasses::ARMNN_PROFILING_SOL_EVENT_CLASS);
356 int processID = arm::pipe::GetCurrentProcessId();
357 std::stringstream ss;
359 timelineUtils->MarkEntityWithLabel(networkGuid, ss.str(), LabelsAndEventClasses::PROCESS_ID_GUID);
362 std::vector<IWorkload*> ConstWorkloads;
367 for (
auto&& layer: order)
372 AddLayerStructure(timelineUtils, *layer, networkGuid);
375 const IWorkloadFactory& workloadFactory = GetWorkloadFactory(*layer);
377 switch (layer->GetType())
387 auto workload = layer->CreateWorkload(workloadFactory);
391 const char*
const layerName =
392 layer->GetNameStr().length() != 0 ? layer->GetName() :
"<Unnamed>";
393 throw InvalidArgumentException(
394 fmt::format(
"No workload created for layer (name: '{0}' type: '{1}') (compute '{2}')",
395 layerName,
static_cast<int>(layer->GetType()), layer->GetBackendId().Get()
402 AddWorkloadStructure(timelineUtils, workload, *layer);
407 if((networkProperties.
m_AsyncEnabled || useExternalMemoryManager) &&
410 m_ConstantTensorHandles[layer->GetGuid()] =
411 layer->GetOutputSlot(0).GetOutputHandler().GetData();
412 m_ConstantWorkloads[layer->GetGuid()] = std::move(workload);
416 m_WorkloadQueue.push_back(std::move(workload));
421 ConstWorkloads.push_back(m_WorkloadQueue.back().get());
425 layer->ReleaseConstantData();
433 if (!networkProperties.
m_AsyncEnabled && m_WorkloadQueue.size() != 0)
435 const int noOfInputs = armnn::numeric_cast<int>(order.GetNumInputs());
439 for (
const BindableLayer* layer: order.GetInputLayers())
441 const auto bindingId = layer->GetBindingId();
443 bool supportsReplacement =
true;
445 for (
const auto inputSlot: layer->GetOutputSlot(0).GetConnections())
447 auto workloadIndex = std::distance(order.begin(), order.GetPosInGraph(inputSlot->GetOwningLayer()));
448 workloadIndex -= noOfInputs;
450 m_InputWorkloadSlotPairs[bindingId].emplace_back(WorkloadIndices{
451 armnn::numeric_cast<unsigned int>(workloadIndex), inputSlot->GetSlotIndex()});
453 auto workload = m_WorkloadQueue[m_InputWorkloadSlotPairs[bindingId].back().m_WorkloadIndex].get();
454 supportsReplacement &= workload->SupportsTensorHandleReplacement();
462 ITensorHandleFactory *importFactory = m_TensorHandleFactoryRegistry.
GetFactory(importFactoryId);
464 if (supportsReplacement && importFactory)
466 m_PreImportedInputHandles.emplace_back(
467 bindingId, importFactory->CreateTensorHandle(layer->GetOutputSlot(0).GetTensorInfo(),
false));
471 m_PreImportedInputHandles.emplace_back(bindingId,
nullptr);
477 for (
const BindableLayer* layer: order.GetOutputLayers())
479 const auto bindingId = layer->GetBindingId();
481 const auto outputSlot = layer->GetInputSlot(0).GetConnectedOutputSlot();
482 auto& indices = m_OutputWorkloadSlotPairs[bindingId];
484 auto workloadIndex = std::distance(order.begin(), order.GetPosInGraph(outputSlot->GetOwningLayer()));
485 workloadIndex -= noOfInputs;
487 indices.m_OutputSlotIndices = WorkloadIndices{numeric_cast<unsigned int>(workloadIndex),
488 outputSlot->CalculateIndexOnOwner()};
490 bool supportsReplacement =
true;
491 auto outputWorkload = m_WorkloadQueue[indices.m_OutputSlotIndices.m_WorkloadIndex].get();
492 supportsReplacement &= outputWorkload->SupportsTensorHandleReplacement();
494 for (
auto &inputSlot: outputSlot->GetConnections())
498 auto inWorkloadIndex = std::distance(order.begin(),
499 order.GetPosInGraph(inputSlot->GetOwningLayer()));
500 inWorkloadIndex -= noOfInputs;
501 indices.m_InputSlotIndices.emplace_back(WorkloadIndices{numeric_cast<unsigned int>(inWorkloadIndex),
502 inputSlot->GetSlotIndex()});
503 auto inputWorkload = m_WorkloadQueue[indices.m_InputSlotIndices.back().m_WorkloadIndex].get();
504 supportsReplacement &= inputWorkload->SupportsTensorHandleReplacement();
512 ITensorHandleFactory *importFactory = m_TensorHandleFactoryRegistry.
GetFactory(importFactoryId);
514 if (supportsReplacement && importFactory)
516 m_PreImportedOutputHandles.emplace_back(
517 bindingId, importFactory->CreateTensorHandle(outputSlot->GetTensorInfo(),
false));
521 m_PreImportedOutputHandles.emplace_back(bindingId,
nullptr);
526 for (
auto&& workloadFactory : m_WorkloadFactories)
528 workloadFactory.second->AfterWorkloadsCreated();
534 timelineUtils->Commit();
537 if (useExternalMemoryManager)
541 CreateMemoryProfileAsync();
545 CreateMemoryProfile();
549 for (
auto& backendMemoryProfile : m_MemBlockMap)
551 const BackendId& backendId = backendMemoryProfile.first;
552 if (backendStrategyMap.find(backendId) != backendStrategyMap.end())
554 m_MemBinMap[backendId] = backendStrategyMap[backendId]->Optimize(backendMemoryProfile.second);
558 m_MemBinMap[backendId] = m_ConstantStrategy->Optimize(backendMemoryProfile.second);
564 m_ExternalMemoryManager = CreateExternalMemoryManger(m_TensorMemory);
567 std::sort(m_TensorMemory.begin(), m_TensorMemory.end(),
568 [](
const std::pair<std::shared_ptr<TensorMemory>,
MemorySource>& lhs,
569 const std::pair<std::shared_ptr<TensorMemory>,
MemorySource>& rhs)
571 return lhs.first->m_OutputSlotId < rhs.first->m_OutputSlotId;
580 if (useInternalMemoryManager)
583 m_OptimizedNetwork->pOptimizedNetworkImpl->GetGraph().AllocateDynamicBuffers();
586 for (
auto &workload : m_WorkloadQueue)
588 workload->PostAllocationConfigure();
592 if (useExternalMemoryManager)
596 AllocateAndExecuteConstantWorkloads();
600 AllocateAndExecuteConstantWorkloadsAsync();
606 for (
auto workload: ConstWorkloads)
613 void LoadedNetwork::AllocateAndExecuteConstantWorkloads()
616 for (
auto& pair : m_ConstantWorkloads)
618 auto tensorHandle = m_ConstantTensorHandles[pair.first];
619 tensorHandle->Allocate();
620 pair.second->Execute();
624 void LoadedNetwork::AllocateAndExecuteConstantWorkloadsAsync()
627 Graph& order = m_OptimizedNetwork->pOptimizedNetworkImpl->GetGraph();
628 for (
auto&& layer : order)
632 const auto& outSlot = layer->GetOutputSlots()[0];
633 const auto factoryId = outSlot.GetTensorHandleFactoryId();
635 auto& workloadFactory = GetWorkloadFactory(*layer);
637 layer->CreateTensorHandles(m_TensorHandleFactoryRegistry, workloadFactory);
638 ITensorHandle* tensorHandle = outSlot.GetOutputHandler().GetData();
640 m_ConstantTensorHandles[layer->GetGuid()] = tensorHandle;
641 tensorHandle->Allocate();
643 auto& backend = m_Backends.at(layer->GetBackendId());
645 WorkingMemDescriptor memDesc;
646 memDesc.m_Outputs.push_back(tensorHandle);
648 ExecutionData executionData = backend->CreateExecutionData(memDesc);
649 m_ConstantWorkloads[layer->GetGuid()]->ExecuteAsync(executionData);
657 Graph& order = m_OptimizedNetwork->pOptimizedNetworkImpl->GetGraph().TopologicalSort();
658 ProfilingGuid networkGuid = m_OptimizedNetwork->GetGuid();
660 std::unique_ptr<TimelineUtilityMethods> timelineUtils =
661 TimelineUtilityMethods::GetTimelineUtils(profilingService);
663 timelineUtils->CreateTypedEntity(networkGuid, LabelsAndEventClasses::NETWORK_GUID);
665 for (
auto&& layer : order)
668 AddLayerStructure(timelineUtils, *layer, networkGuid);
669 switch (layer->GetType())
679 for (
auto& workload : m_WorkloadQueue)
682 AddWorkloadStructure(timelineUtils, workload, *layer);
689 timelineUtils->Commit();
694 return m_OptimizedNetwork->GetGuid();
699 for (
auto&& inputLayer : m_OptimizedNetwork->pOptimizedNetworkImpl->GetGraph().GetInputLayers())
701 ARMNN_ASSERT_MSG(inputLayer->GetNumOutputSlots() == 1,
"Input layer should have exactly 1 output slot");
702 if (inputLayer->GetBindingId() == layerId)
704 return inputLayer->GetOutputSlot(0).GetTensorInfo();
713 for (
auto&& outputLayer : m_OptimizedNetwork->pOptimizedNetworkImpl->GetGraph().GetOutputLayers())
715 ARMNN_ASSERT_MSG(outputLayer->GetNumInputSlots() == 1,
"Output layer should have exactly 1 input slot");
716 ARMNN_ASSERT_MSG(outputLayer->GetInputSlot(0).GetConnection(),
"Input slot on Output layer must be connected");
717 if (outputLayer->GetBindingId() == layerId)
719 return outputLayer->GetInputSlot(0).GetConnection()->GetTensorInfo();
730 auto it = m_WorkloadFactories.find(layer.
GetBackendId());
731 if (it == m_WorkloadFactories.end())
733 throw RuntimeException(fmt::format(
"No workload factory for {0} to be used for layer: {1}",
739 workloadFactory = it->second.get();
743 return *workloadFactory;
752 TensorPin(std::unique_ptr<ITensorHandle> handle,
const TensorInfo& info,
LayerBindingId id)
753 : m_TensorHandle(
std::move(handle))
759 ITensorHandle* GetTensorHandle()
const {
return m_TensorHandle.get(); }
760 const TensorInfo&
GetTensorInfo()
const {
return m_TensorInfo; }
764 std::unique_ptr<ITensorHandle> m_TensorHandle;
765 TensorInfo m_TensorInfo;
770 const std::vector<TensorPin>& pins,
771 char const* bindingPointDesc)
773 auto it = std::find_if(pins.begin(), pins.end(),
774 [
id](
const TensorPin& pin)
776 return pin.GetBindingId() == id;
779 if (it != pins.end())
785 throw InvalidArgumentException(fmt::format(
"No tensor supplied for {0} {1}", bindingPointDesc,
id));
795 m_InputTensorPins.reserve(inputTensors.size());
796 m_OutputTensorPins.reserve(outputTensors.size());
798 for (
auto inputTensorPair : inputTensors)
800 auto inputTensor = inputTensorPair.second;
802 std::unique_ptr<ITensorHandle> tensorHandle =
803 std::make_unique<ConstPassthroughTensorHandle>(inputTensor.GetInfo(),inputTensor.GetMemoryArea());
806 m_InputTensorPins.emplace_back(std::move(tensorHandle), inputTensor.GetInfo(), layerId);
809 for (
auto outputTensorPair : outputTensors)
811 auto outputTensor = outputTensorPair.second;
813 std::unique_ptr<ITensorHandle> tensorHandle =
814 std::make_unique<PassthroughTensorHandle>(outputTensor.GetInfo(), outputTensor.GetMemoryArea());
817 m_OutputTensorPins.emplace_back(std::move(tensorHandle), outputTensor.GetInfo(), layerId);
823 return GetTensorPin(
id, m_InputTensorPins,
"input");
828 return GetTensorPin(
id, m_OutputTensorPins,
"output");
833 std::vector<TensorPin> m_InputTensorPins;
834 std::vector<TensorPin> m_OutputTensorPins;
841 std::vector<ImportedInputId> preImportedInputIds,
842 std::vector<ImportedOutputId> preImportedOutputIds)
844 const Graph& graph = m_OptimizedNetwork->pOptimizedNetworkImpl->GetGraph();
849 ARMNN_LOG(
warning) <<
"IRuntime::EnqueueWorkload()::Less than two nodes in graph";
854 WorkloadData workloadData(inputTensors, outputTensors);
858 if (graph.
GetNumInputs() != (inputTensors.size() + preImportedInputIds.size()))
866 m_InputQueue.clear();
869 unsigned int inputIndex = 0;
870 unsigned int importedInputIdIndex = 0;
871 std::sort(preImportedInputIds.begin(), preImportedInputIds.end());
874 if (importedInputIdIndex < preImportedInputIds.size() &&
875 inputIndex == preImportedInputIds[importedInputIdIndex])
878 if (!m_IsInputImported[inputIndex])
880 auto outputTensorHandle = m_PreImportedInputHandles[inputIndex].m_TensorHandle.get();
882 for (
const auto& workloadInfo: m_InputWorkloadSlotPairs[inputLayer->GetBindingId()])
884 auto workload = m_WorkloadQueue[workloadInfo.m_WorkloadIndex].get();
885 workload->ReplaceInputTensorHandle(outputTensorHandle, workloadInfo.m_SlotIndex);
887 m_IsInputImported[inputIndex] =
true;
889 importedInputIdIndex++;
893 if (m_IsInputImported[inputIndex])
897 for (
const auto& workloadInfo: m_InputWorkloadSlotPairs[inputLayer->GetBindingId()])
899 auto workload = m_WorkloadQueue[workloadInfo.m_WorkloadIndex].get();
900 workload->ReplaceInputTensorHandle(handler.
GetData(), workloadInfo.m_SlotIndex);
903 m_IsInputImported[inputIndex] =
false;
907 const TensorPin& pin = workloadData.GetInputTensorPin(inputLayer->GetBindingId());
908 EnqueueInput(*inputLayer, pin.GetTensorHandle(), pin.GetTensorInfo());
916 m_OutputQueue.clear();
924 unsigned int outputIndex = 0;
925 unsigned int importedOutputIdIndex = 0;
926 std::sort(preImportedOutputIds.begin(), preImportedOutputIds.end());
929 if (importedOutputIdIndex < preImportedOutputIds.size() &&
930 outputIndex == preImportedOutputIds[importedOutputIdIndex])
933 ITensorHandle* inputTensorHandle = m_PreImportedOutputHandles[outputIndex].m_TensorHandle.get();
935 if (!m_IsOutputImported[outputIndex])
937 const auto bindingId = outputLayer->GetBindingId();
938 const auto& indices = m_OutputWorkloadSlotPairs[bindingId];
940 auto outputWorkload = m_WorkloadQueue[indices.m_OutputSlotIndices.m_WorkloadIndex].get();
942 outputWorkload->ReplaceOutputTensorHandle(inputTensorHandle,
943 indices.m_OutputSlotIndices.m_SlotIndex);
945 for (
const auto& workloadInfo: indices.m_InputSlotIndices)
947 auto inputWorkload = m_WorkloadQueue[workloadInfo.m_WorkloadIndex].get();
948 inputWorkload->ReplaceInputTensorHandle(inputTensorHandle, workloadInfo.m_SlotIndex);
950 m_IsOutputImported[outputIndex] =
true;
953 ARMNN_ASSERT_MSG(inputTensorHandle !=
nullptr,
"Data should have been allocated.");
955 syncDesc.
m_Inputs.push_back(inputTensorHandle);
957 info.m_InputTensorInfos.push_back(
958 outputLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo());
959 auto syncWorkload = std::make_unique<SyncMemGenericWorkload>(syncDesc,
info);
961 m_OutputQueue.push_back(move(syncWorkload));
962 importedOutputIdIndex++;
966 if (m_IsOutputImported[outputIndex])
968 const auto bindingId = outputLayer->GetBindingId();
969 const auto& indices = m_OutputWorkloadSlotPairs[bindingId];
971 auto outputWorkload = m_WorkloadQueue[indices.m_OutputSlotIndices.m_WorkloadIndex].get();
973 outputLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetOutputHandler();
975 outputWorkload->ReplaceOutputTensorHandle(
976 outputHandler.
GetData(), indices.m_OutputSlotIndices.m_SlotIndex);
978 for (
const auto& workloadInfo: indices.m_InputSlotIndices)
980 auto inputWorkload = m_WorkloadQueue[workloadInfo.m_WorkloadIndex].get();
981 inputWorkload->ReplaceInputTensorHandle(outputHandler.
GetData(), workloadInfo.m_SlotIndex);
983 m_IsOutputImported[outputIndex] =
false;
986 const TensorPin& pin = workloadData.GetOutputTensorPin(outputLayer->GetBindingId());
988 EnqueueOutput(*outputLayer, pin.GetTensorHandle(), pin.GetTensorInfo());
994 std::unique_ptr<TimelineUtilityMethods> timelineUtils =
995 TimelineUtilityMethods::GetTimelineUtils(*m_ProfilingService);
996 ProfilingGuid inferenceGuid = m_ProfilingService->GetNextGuid();
1000 ProfilingGuid networkGuid = m_OptimizedNetwork->GetGuid();
1001 timelineUtils->CreateTypedEntity(inferenceGuid, LabelsAndEventClasses::INFERENCE_GUID);
1002 timelineUtils->CreateRelationship(ProfilingRelationshipType::RetentionLink,
1005 LabelsAndEventClasses::EXECUTION_OF_GUID);
1006 timelineUtils->RecordEvent(inferenceGuid, LabelsAndEventClasses::ARMNN_PROFILING_SOL_EVENT_CLASS);
1009 bool executionSucceeded =
true;
1012 if (m_ProfilingService->IsProfilingEnabled())
1014 m_ProfilingService->IncrementCounterValue(INFERENCES_RUN);
1018 executionSucceeded =
Execute(timelineUtils, inferenceGuid);
1024 timelineUtils->RecordEvent(inferenceGuid, LabelsAndEventClasses::ARMNN_PROFILING_EOL_EVENT_CLASS);
1025 timelineUtils->Commit();
1038 if (tensorHandle ==
nullptr)
1040 throw InvalidArgumentException(
"EnqueueInput: tensorHandle must not be NULL");
1046 inputQueueDescriptor.m_Inputs.push_back(tensorHandle);
1047 info.m_InputTensorInfos.push_back(tensorInfo);
1051 const TensorInfo& outputTensorInfo = handler.
GetTensorInfo();
1052 ITensorHandle* outputTensorHandle = handler.GetData();
1054 "Data should have been allocated.");
1055 inputQueueDescriptor.m_Outputs.push_back(outputTensorHandle);
1056 info.m_OutputTensorInfos.push_back(outputTensorInfo);
1059 bool needMemCopy =
true;
1064 needMemCopy =
false;
1066 void* mem = tensorHandle->
Map(
false);
1067 if (outputTensorHandle->Import(mem, m_NetworkProperties.
m_InputSource))
1069 tensorHandle->
Unmap();
1072 tensorHandle->
Unmap();
1073 throw MemoryImportException(
"EnqueueInput: Memory Import failed");
1079 std::unique_ptr<IWorkload> inputWorkload = std::make_unique<CopyMemGenericWorkload>(inputQueueDescriptor, info);
1083 std::unique_ptr<TimelineUtilityMethods> timelineUtils =
1084 TimelineUtilityMethods::GetTimelineUtils(*m_ProfilingService);
1088 AddWorkloadStructure(timelineUtils, inputWorkload, layer);
1089 timelineUtils->Commit();
1092 m_InputQueue.push_back(move(inputWorkload));
1096 void LoadedNetwork::EnqueueOutput(
const BindableLayer& layer, ITensorHandle* tensorHandle,
const TensorInfo& tensorInfo)
1100 throw InvalidArgumentException(
"EnqueueOutput: given layer not an OutputLayer");
1103 if (tensorHandle ==
nullptr)
1105 throw InvalidArgumentException(
"EnqueueOutput: tensorHandle must not be NULL");
1111 outputQueueDescriptor.m_Outputs.push_back(tensorHandle);
1112 info.m_OutputTensorInfos.push_back(tensorInfo);
1114 ARMNN_ASSERT_MSG(layer.GetNumInputSlots() == 1,
"Output Layer should have exactly one input.");
1117 const OutputHandler& outputHandler = layer.GetInputSlots()[0].GetConnectedOutputSlot()->GetOutputHandler();
1119 const TensorInfo& inputTensorInfo = outputHandler.GetTensorInfo();
1120 ITensorHandle* inputTensorHandle = outputHandler.GetData();
1121 ARMNN_ASSERT_MSG(inputTensorHandle !=
nullptr,
"Data should have been allocated.");
1130 bool needMemCopy =
true;
1132 (layer.GetInputSlots()[0].GetConnectedOutputSlot()->GetNumConnections() == 1))
1134 if(layer.GetInputSlots()[0].GetConnectedOutputSlot()->GetOwningLayer().GetType() !=
LayerType::Input)
1139 needMemCopy =
false;
1140 void *mem = tensorHandle->Map(
false);
1141 bool importOk = inputTensorHandle->Import(mem, m_NetworkProperties.
m_OutputSource);
1142 tensorHandle->Unmap();
1147 MemSyncQueueDescriptor syncDesc;
1148 syncDesc.m_Inputs.push_back(inputTensorHandle);
1149 info.m_InputTensorInfos.push_back(inputTensorInfo);
1150 auto syncWorkload = std::make_unique<SyncMemGenericWorkload>(syncDesc, info);
1152 m_OutputQueue.push_back(move(syncWorkload));
1156 throw MemoryExportException(
"EnqueueOutput: Memory Export failed");
1164 outputQueueDescriptor.m_Inputs.push_back(inputTensorHandle);
1165 info.m_InputTensorInfos.push_back(inputTensorInfo);
1167 std::unique_ptr<IWorkload> outputWorkload =
1168 std::make_unique<CopyMemGenericWorkload>(outputQueueDescriptor, info);
1171 std::unique_ptr<TimelineUtilityMethods> timelineUtils =
1172 TimelineUtilityMethods::GetTimelineUtils(*m_ProfilingService);
1176 AddWorkloadStructure(timelineUtils, outputWorkload, layer);
1177 timelineUtils->Commit();
1180 m_OutputQueue.push_back(move(outputWorkload));
1184 void LoadedNetwork::AllocateWorkingMemory(
1185 #
if !defined(ARMNN_DISABLE_THREADS)
1186 std::lock_guard<std::mutex>& lock
1192 #if !defined(ARMNN_DISABLE_THREADS)
1196 if (m_IsWorkingMemAllocated)
1201 if (m_ExternalMemoryManager)
1203 m_ExternalMemoryManager->Allocate();
1205 for (
unsigned int i = 0; i < m_TensorMemory.size(); ++i)
1207 m_Tensorhandles[i]->Import(m_TensorMemory[i].first->m_Data, m_TensorMemory[i].second);
1211 for (
auto&& memoryManager : m_BackendMemoryMangers)
1215 memoryManager->Acquire();
1219 m_IsWorkingMemAllocated =
true;
1224 #if !defined(ARMNN_DISABLE_THREADS)
1225 std::lock_guard<std::mutex> lockGuard(m_WorkingMemMutex);
1228 if (!m_IsWorkingMemAllocated)
1233 if (m_ExternalMemoryManager)
1235 m_ExternalMemoryManager->Deallocate();
1239 for (
auto&& memoryManager : m_BackendMemoryMangers)
1243 memoryManager->Release();
1247 m_IsWorkingMemAllocated =
false;
1251 ProfilingGuid inferenceGuid)
1253 bool success =
true;
1255 auto Fail = [&](
const std::exception&
error)
1257 ARMNN_LOG(error) <<
"An error occurred attempting to execute a workload: " <<
error.what();
1263 #if !defined(ARMNN_DISABLE_THREADS)
1264 std::lock_guard<std::mutex> lockGuard(m_WorkingMemMutex);
1265 AllocateWorkingMemory(lockGuard);
1267 AllocateWorkingMemory();
1270 ProfilingDynamicGuid workloadInferenceID(0);
1271 auto ExecuteQueue = [&timelineUtils, &workloadInferenceID, &inferenceGuid](
WorkloadQueue& queue)
1273 for (
auto& workload : queue)
1277 workloadInferenceID = timelineUtils->RecordWorkloadInferenceAndStartOfLifeEvent(workload->GetGuid(),
1280 workload->Execute();
1283 timelineUtils->RecordEndOfLifeEvent(workloadInferenceID);
1288 ExecuteQueue(m_InputQueue);
1289 ExecuteQueue(m_WorkloadQueue);
1290 ExecuteQueue(m_OutputQueue);
1292 catch (
const RuntimeException& error)
1296 catch (
const std::runtime_error& error)
1304 void LoadedNetwork::EnqueueInput(
const ConstTensor& inputTensor, ITensorHandle* inputTensorHandle)
1311 std::unique_ptr<ITensorHandle> tensorHandle =
1312 std::make_unique<ConstPassthroughTensorHandle>(inputTensor.GetInfo(),
1313 inputTensor.GetMemoryArea());
1314 void* mem = tensorHandle->Map(
false);
1316 if (inputTensorHandle->Import(mem, m_NetworkProperties.
m_InputSource))
1318 tensorHandle->Unmap();
1321 tensorHandle->Unmap();
1322 throw MemoryImportException(
"EnqueueInput: Memory Import failed");
1326 throw MemoryImportException(
"EnqueueInput: Memory Import failed, backend does not support Import");
1332 std::unique_ptr<ITensorHandle> tensorHandle =
1333 std::make_unique<ConstPassthroughTensorHandle>(inputTensor.GetInfo(), inputTensor.GetMemoryArea());
1335 auto copyFunc = [](
void* dst,
const void* src,
size_t size)
1337 memcpy(dst, src, size);
1350 void LoadedNetwork::ImportOutputTensor(
const Tensor& outputTensor, ITensorHandle* outputTensorHandle)
1352 ARMNN_ASSERT_MSG(outputTensorHandle !=
nullptr,
"Data should have been allocated.");
1356 std::unique_ptr<ITensorHandle> tensorHandle =
1357 std::make_unique<PassthroughTensorHandle>(outputTensor.GetInfo(),
1358 outputTensor.GetMemoryArea());
1360 void* mem = tensorHandle->Map(
false);
1361 bool importOk = outputTensorHandle->Import(mem, m_NetworkProperties.
m_OutputSource);
1362 tensorHandle->Unmap();
1366 throw MemoryExportException(
"ImportOutputTensor: Memory Export failed");
1371 throw MemoryExportException(
"ImportOutputTensor: Memory Export failed, attempting to export Input Layer");
1379 auto copyFunc = [](
void* dst,
const void* src,
size_t size)
1381 memcpy(dst, src, size);
1384 std::unique_ptr<ITensorHandle> tensorHandle =
1385 std::make_unique<PassthroughTensorHandle>(outputTensor.
GetInfo(),
1394 for (
auto inputTensorPair : inputTensors)
1399 return inputTensorPair.second;
1407 for (
auto outputTensorPair : outputTensors)
1412 return outputTensorPair.second;
1426 throw MemoryImportException(
"ImportInputs: Memory Import failed, NetworkProperties.m_ImportEnabled");
1429 if (inputTensors.size() > m_OptimizedNetwork->pOptimizedNetworkImpl->GetGraph().GetNumInputs())
1431 throw MemoryImportException(
"ImportInputs: The number of tensors provided exceeds the number of inputs.");
1434 std::vector<ImportedInputId> importedInputs;
1435 Graph& graph = m_OptimizedNetwork->pOptimizedNetworkImpl->GetGraph().TopologicalSort();
1436 unsigned int inputIndex = 0;
1439 auto outputTensorHandle = m_PreImportedInputHandles[inputIndex].m_TensorHandle.get();
1441 if (!outputTensorHandle)
1447 auto layerBindingId = inputLayer->GetBindingId();
1448 auto it = std::find_if(inputTensors.begin(), inputTensors.end(), [=](
const auto& inputTensor)
1450 return inputTensor.first == layerBindingId;
1453 if (it == inputTensors.end())
1459 const auto& inputTensor = *it;
1460 std::unique_ptr<ITensorHandle> passThroughTensorHandle =
1461 std::make_unique<ConstPassthroughTensorHandle>(inputTensor.second.GetInfo(),
1462 inputTensor.second.GetMemoryArea());
1466 if (outputTensorHandle->CanBeImported(passThroughTensorHandle->Map(), forceImportMemorySource)
1467 && (outputTensorHandle->Import(passThroughTensorHandle->Map(), forceImportMemorySource)))
1469 importedInputs.push_back(inputIndex);
1471 passThroughTensorHandle->Unmap();
1475 ARMNN_LOG(
error) <<
"An error occurred attempting to import input_"
1476 << inputIndex <<
" : " << exception.
what();
1477 passThroughTensorHandle->Unmap();
1482 return importedInputs;
1487 std::vector<ImportedInputId> importedInputs;
1488 Graph& graph = m_OptimizedNetwork->pOptimizedNetworkImpl->GetGraph().TopologicalSort();
1490 for (
auto inputTensor : inputTensors)
1492 auto layerBindingId = inputTensor.first;
1495 return layer->GetBindingId() == layerBindingId;
1501 "ImportInputs: Memory Import failed, unknown LayerBindingId: {}", layerBindingId));
1504 const Layer* layer = *it;
1513 std::string er = backend->GetId();
1514 er +=
" does not have PreImportIOTensors capability";
1526 ImportedTensorHandlePin importedTensorHandlePin{layerBindingId,
1529 ITensorHandle* tensorHandle = importedTensorHandlePin.m_TensorHandle.get();
1534 fmt::format(
"ImportInputs: Memory Import failed, backend: "
1535 "{} does not support importing from source {}"
1539 std::unique_ptr<ITensorHandle> passThroughTensorHandle =
1540 std::make_unique<ConstPassthroughTensorHandle>(inputTensor.second.GetInfo(),
1541 inputTensor.second.GetMemoryArea());
1543 if (tensorHandle->
Import(passThroughTensorHandle->Map(), forceImportMemorySource))
1545 importedInputs.push_back(m_CurImportedInputId++);
1546 passThroughTensorHandle->Unmap();
1550 passThroughTensorHandle->Unmap();
1554 m_PreImportedInputHandles.push_back(std::move(importedTensorHandlePin));
1556 return importedInputs;
1568 throw MemoryImportException(
"ImportOutputs: Memory Import failed, NetworkProperties.m_ImportEnabled");
1571 if (outputTensors.size() != m_OptimizedNetwork->pOptimizedNetworkImpl->GetGraph().GetNumOutputs())
1575 std::vector<ImportedOutputId> importedOutputs;
1576 Graph& graph = m_OptimizedNetwork->pOptimizedNetworkImpl->GetGraph().TopologicalSort();
1578 unsigned int outputIndex = 0;
1581 auto inputTensorHandle = m_PreImportedOutputHandles[outputIndex].m_TensorHandle.get();
1582 if (!inputTensorHandle)
1588 auto layerBindingId = outputLayer->GetBindingId();
1589 auto it = std::find_if(outputTensors.begin(), outputTensors.end(), [=] (
const auto& outputTensor)
1591 return outputTensor.first == layerBindingId;
1594 if (it == outputTensors.end())
1600 const auto outputTensor = *it;
1604 if (inputTensorHandle->CanBeImported(outputTensor.second.GetMemoryArea(), forceImportMemorySource)
1605 && inputTensorHandle->Import(outputTensor.second.GetMemoryArea(), forceImportMemorySource))
1607 importedOutputs.push_back(outputIndex);
1612 ARMNN_LOG(
error) <<
"An error occurred attempting to import output_"
1613 << outputIndex <<
" : " << exception.
what();
1617 return importedOutputs;
1620 std::vector<ImportedOutputId> importedOutputs;
1621 Graph& graph = m_OptimizedNetwork->pOptimizedNetworkImpl->GetGraph().TopologicalSort();
1623 for (
const auto& outputTensor : outputTensors)
1625 auto layerBindingId = outputTensor.first;
1628 return layer->GetBindingId() == layerBindingId;
1633 throw MemoryImportException(fmt::format(
"ImportOutputs: Memory Import failed, unknown LayerBindingId: {}",
1637 const Layer* layer = *it;
1646 std::string er = backend->GetId();
1647 er +=
" does not have PreImportIOTensors capability";
1658 ImportedTensorHandlePin importedTensorHandlePin{layerBindingId,
1661 ITensorHandle* tensorHandle = importedTensorHandlePin.m_TensorHandle.get();
1666 "{} does not support importing from source {}"
1667 , factoryId, forceImportMemorySource));
1670 if (tensorHandle->
Import(outputTensor.second.GetMemoryArea(), forceImportMemorySource))
1672 importedOutputs.push_back(m_CurImportedOutputId++);
1679 m_PreImportedOutputHandles.push_back(std::move(importedTensorHandlePin));
1682 return importedOutputs;
1687 for (
auto id : inputIds)
1689 if (
id > m_PreImportedInputHandles.size())
1694 auto& importedTensorHandle = m_PreImportedInputHandles[id].m_TensorHandle;
1695 if (!importedTensorHandle)
1698 fmt::format(
"ClearImportedInputs::ImportedInput with id: {} has already been deleted",
id));
1701 importedTensorHandle->Unimport();
1702 importedTensorHandle = {};
1708 for (
auto id : outputIds)
1710 if (
id > m_PreImportedOutputHandles.size())
1715 auto& importedTensorHandle = m_PreImportedOutputHandles[id].m_TensorHandle;
1716 if (!importedTensorHandle)
1719 fmt::format(
"ClearImportedOutputs::ImportedOutput with id: {} has already been deleted",
id));
1722 importedTensorHandle->Unimport();
1723 importedTensorHandle = {};
1730 std::vector<ImportedInputId> preImportedInputs,
1731 std::vector<ImportedOutputId> preImportedOutputs)
1733 const Graph& graph = m_OptimizedNetwork->pOptimizedNetworkImpl->GetGraph();
1735 if (inputTensors.size() + preImportedInputs.size() != graph.
GetNumInputs())
1737 if (preImportedInputs.empty())
1744 "Number of inputs + preImportedInputs provided does not match network.");
1748 if (outputTensors.size() + preImportedOutputs.size() != graph.
GetNumOutputs())
1750 if (preImportedOutputs.empty())
1753 "Number of outputs provided does not match network.");
1758 "Number of outputs + preImportedOutputs provided does not match network.");
1765 unsigned int index = 0;
1766 for (
auto pair : inputTensors)
1768 bindingIds[index++] = pair.first;
1772 bindingIds[index++] = ValidateImportedInputID(
id);
1774 for (
auto pair : outputTensors)
1776 bindingIds[index++] = pair.first;
1780 bindingIds[index++] = ValidateImportedOutputID(
id);
1785 auto resetMemHandle = [&]()
1789 const LayerBindingId layerBindingId = m_PreImportedInputHandles[id].m_LayerBindingId;
1791 auto inputHandle = workingMemHandle.
GetInputHandle(layerBindingId);
1793 for (
auto it : inputConnections)
1801 const LayerBindingId layerBindingId = m_PreImportedOutputHandles[id].m_LayerBindingId;
1806 for (
auto it : outputConnections)
1813 std::unique_ptr<TimelineUtilityMethods> timelineUtils =
1814 TimelineUtilityMethods::GetTimelineUtils(*m_ProfilingService);
1815 ProfilingGuid inferenceGuid = m_ProfilingService->GetNextGuid();
1819 ProfilingGuid networkGuid = m_OptimizedNetwork->GetGuid();
1820 timelineUtils->CreateTypedEntity(inferenceGuid,LabelsAndEventClasses::INFERENCE_GUID);
1821 timelineUtils->CreateRelationship(ProfilingRelationshipType::RetentionLink,
1824 LabelsAndEventClasses::EXECUTION_OF_GUID);
1825 timelineUtils->RecordEvent(inferenceGuid,LabelsAndEventClasses::ARMNN_PROFILING_SOL_EVENT_CLASS);
1828 bool executionSucceeded =
true;
1833 timelineUtils->RecordEvent(inferenceGuid,LabelsAndEventClasses::ARMNN_PROFILING_EOL_EVENT_CLASS);
1834 timelineUtils->Commit();
1844 for (
auto pair : inputTensors)
1846 EnqueueInput(pair.second, workingMemHandle.
GetInputHandle(pair.first));
1852 const ImportedTensorHandlePin& importedInputPin = m_PreImportedInputHandles[id];
1853 const LayerBindingId layerBindingId = m_PreImportedInputHandles[id].m_LayerBindingId;
1854 const auto& preimportedHandle = importedInputPin.m_TensorHandle;
1857 for (
auto it : inputConnections)
1859 *it = preimportedHandle.get();
1867 for (
auto pair: outputTensors)
1869 ImportOutputTensor(pair.second, workingMemHandle.
GetOutputHandle(pair.first));
1875 const ImportedTensorHandlePin& importedOutputPin = m_PreImportedOutputHandles[id];
1876 const LayerBindingId layerBindingId = m_PreImportedOutputHandles[id].m_LayerBindingId;
1877 const auto& preimportedHandle = importedOutputPin.m_TensorHandle;
1880 for (
auto it : outputConnections)
1882 *it = preimportedHandle.get();
1887 auto Fail = [&](
const std::exception&
error)
1889 ARMNN_LOG(
error) <<
"An error occurred attempting to execute a workload: " <<
error.what();
1890 executionSucceeded =
false;
1892 ProfilingDynamicGuid workloadInferenceID(0);
1896 for (
unsigned int i = 0; i < m_WorkloadQueue.size(); ++i)
1898 auto& workload = m_WorkloadQueue[i];
1901 workloadInferenceID = timelineUtils->RecordWorkloadInferenceAndStartOfLifeEvent(workload->GetGuid(),
1909 timelineUtils->RecordEndOfLifeEvent(workloadInferenceID);
1918 catch (
const std::runtime_error&
error)
1931 for (
auto pair: outputTensors)
1951 Graph& order = m_OptimizedNetwork->pOptimizedNetworkImpl->GetGraph();
1954 std::vector<std::unique_ptr<ITensorHandle>> managedTensorHandles;
1956 std::vector<std::unique_ptr<ITensorHandle>> unmanagedTensorHandles;
1958 std::vector<WorkingMemDescriptor> workingMemDescriptors;
1959 std::vector<std::pair<BackendId, ExecutionData>> executionDataVec;
1961 auto GetTensorHandle = [&](
Layer* layer,
const OutputSlot& outputSlot)
1964 const TensorInfo& tensorInfo = outputSlot.GetTensorInfo();
1970 return m_WorkloadFactories.at(
id)->CreateTensorHandle(tensorInfo,
false);
1985 bool m_IsInputLayerHandle =
false;
1986 bool m_IsOutputLayerHandle =
false;
1992 std::unordered_map<const OutputSlot*, HandleInfo> outputToHandleInfoMap;
1994 unsigned int layerIndex = 0;
1995 for (
auto&& layer : order)
2005 bool isMemoryManaged =
true;
2006 bool isInputLayer =
false;
2007 bool isOutputLayer =
false;
2008 bool isConnectedToOutputLayer =
false;
2014 isInputLayer =
true;
2019 isOutputLayer =
true;
2022 unsigned int slotIndex = 0;
2027 for (
unsigned int i = 0; i < slot.GetNumConnections(); ++i)
2031 if (!isConnectedToOutputLayer)
2033 isConnectedToOutputLayer =
true;
2041 fmt::format(
"Layer name: '{0}' guid: '{1}' has two or more OutputLayers connected to it. "
2042 "This will prevent importing on the connected OutputLayers.",
2044 isMemoryManaged =
true;
2050 if (isMemoryManaged)
2052 managedTensorHandles.emplace_back(GetTensorHandle(layer, slot));
2053 tensorHandle = managedTensorHandles.back().get();
2057 unmanagedTensorHandles.emplace_back(GetTensorHandle(layer, slot));
2058 tensorHandle = unmanagedTensorHandles.back().get();
2061 workingMemDescriptor.
m_Outputs.push_back(tensorHandle);
2063 HandleInfo& handleInfo = outputToHandleInfoMap[&slot];
2064 handleInfo.m_TensorHandle = tensorHandle;
2067 if (isConnectedToOutputLayer)
2069 handleInfo.m_IsOutputLayerHandle =
true;
2070 handleInfo.m_OutputMemDescriptorCoords.m_OutputSlotCoords = {layerIndex, slotIndex};
2075 handleInfo.m_IsInputLayerHandle =
true;
2077 handleInfo.m_InputMemDescriptorCoords.m_LayerBindingId = bindingId;
2088 auto outputSlot = slot.GetConnectedOutputSlot();
2089 auto key = outputSlot->GetOwningLayer().GetGuid();
2092 auto found = m_ConstantTensorHandles.find(key);
2093 if (found != m_ConstantTensorHandles.end())
2096 workingMemDescriptor.
m_Inputs.push_back(tensorHandle);
2104 HandleInfo& handleInfo = outputToHandleInfoMap[outputSlot];
2105 handleInfo.m_TensorHandle = tensorHandle;
2106 handleInfo.m_IsOutputLayerHandle =
true;
2107 handleInfo.m_OutputMemDescriptorCoords.m_LayerBindingIds.push_back(bindingId);
2108 handleInfo.m_OutputMemDescriptorCoords.m_InputSlotCoords.push_back({layerIndex, 0});
2113 HandleInfo& handleInfo = outputToHandleInfoMap.at(outputSlot);
2115 ITensorHandle* inputTensorHandle = handleInfo.m_TensorHandle;
2116 workingMemDescriptor.
m_Inputs.push_back(inputTensorHandle);
2122 handleInfo.m_OutputMemDescriptorCoords.m_LayerBindingIds.push_back(bindingId);
2123 handleInfo.m_OutputMemDescriptorCoords.m_InputSlotCoords.push_back({layerIndex, 0});
2127 else if (handleInfo.m_IsOutputLayerHandle)
2129 handleInfo.m_OutputMemDescriptorCoords.m_InputSlotCoords.push_back({layerIndex, slot.GetSlotIndex()});
2134 if (handleInfo.m_IsInputLayerHandle)
2136 std::pair<LayerGuid, unsigned int> connectionLocation{layerIndex, slot.GetSlotIndex()};
2137 handleInfo.m_InputMemDescriptorCoords.m_InputSlotCoords.emplace_back(connectionLocation);
2147 std::pair<BackendId, ExecutionData> dataPair;
2150 executionDataVec.push_back(dataPair);
2151 workingMemDescriptors.push_back(workingMemDescriptor);
2157 std::vector<std::pair<std::shared_ptr<TensorMemory>,
MemorySource>> tensorMemory;
2159 auto externalMemoryManager = CreateExternalMemoryManger(tensorMemory);
2162 std::sort(tensorMemory.begin(), tensorMemory.end(),
2163 [](
const std::pair<std::shared_ptr<TensorMemory>,
MemorySource>& lhs,
2164 const std::pair<std::shared_ptr<TensorMemory>,
MemorySource>& rhs)
2166 return lhs.first->m_OutputSlotId < rhs.first->m_OutputSlotId;
2169 std::vector<WorkingMemHandle::InputMemDescriptorCoords> inputConnectionsInfo;
2170 std::vector<WorkingMemHandle::OutputMemDescriptorCoords> outputConnectionsInfo;
2172 for (
const auto& handleInfo: outputToHandleInfoMap)
2174 if (handleInfo.second.m_IsOutputLayerHandle)
2176 outputConnectionsInfo.emplace_back(handleInfo.second.m_OutputMemDescriptorCoords);
2179 if (handleInfo.second.m_IsInputLayerHandle)
2181 inputConnectionsInfo.emplace_back(handleInfo.second.m_InputMemDescriptorCoords);
2185 return std::make_unique<WorkingMemHandle>(networkId,
2186 inputConnectionsInfo,
2187 outputConnectionsInfo,
2188 workingMemDescriptors,
2189 std::move(externalMemoryManager),
2190 std::move(tensorMemory),
2191 std::move(managedTensorHandles),
2192 std::move(unmanagedTensorHandles),
2199 for (
auto&& workloadPtr: m_WorkloadQueue)
2201 workloadPtr.get()->RegisterDebugCallback(func);
2206 void LoadedNetwork::CreateMemoryProfileAsync()
2210 unsigned int m_StartOfLife;
2211 unsigned int m_Lifetime;
2214 unsigned int m_Index;
2219 auto align = [](
size_t numToAlign)
2221 const size_t alignment =
sizeof(float);
2222 return ((numToAlign + alignment - 1) / alignment) * alignment;
2225 std::unordered_map<const OutputSlot*, PartialBlock> memBlockTrackerMap;
2230 unsigned int timestep = 0;
2231 unsigned int outputIndex = 0;
2232 Graph& order = m_OptimizedNetwork->pOptimizedNetworkImpl->GetGraph().TopologicalSort();
2234 for (
auto&& layer : order)
2236 const LayerType& layerType = layer->GetType();
2244 && layer->GetInputSlot(0).GetConnectedOutputSlot()->GetNumConnections() == 1)
2255 BackendId backendId = layer->GetBackendId();
2256 for (
auto& outputSlot : layer->GetOutputSlots())
2258 if (!m_SupportsExternallyManagedMemory[backendId])
2263 PartialBlock partialBlock;
2265 partialBlock.m_StartOfLife = timestep;
2267 size_t alignedSize = align(outputSlot.GetOutputHandler().GetTensorInfo().GetNumBytes());
2268 partialBlock.m_MemSize = alignedSize;
2269 partialBlock.m_Index = outputIndex++;
2270 partialBlock.m_Lifetime = outputSlot.GetNumConnections();
2271 partialBlock.m_BackendId = backendId;
2273 if (partialBlock.m_Lifetime == 0)
2275 m_MemBlockMap[partialBlock.m_BackendId].emplace_back(partialBlock.m_StartOfLife,
2276 partialBlock.m_StartOfLife,
2277 partialBlock.m_MemSize,
2279 partialBlock.m_Index);
2283 memBlockTrackerMap[&outputSlot] = partialBlock;
2287 for (
auto& inputSlot : layer->GetInputSlots())
2289 const Layer& connectedInputLayer = inputSlot.GetConnectedOutputSlot()->GetOwningLayer();
2290 const LayerType& owningLayerType = connectedInputLayer.GetType();
2301 auto outputSlot = inputSlot.GetConnectedOutputSlot();
2303 PartialBlock& partialBlock = memBlockTrackerMap.at(outputSlot);
2305 auto& lifetime = partialBlock.m_Lifetime;
2310 m_MemBlockMap[partialBlock.m_BackendId].emplace_back(partialBlock.m_StartOfLife,
2312 partialBlock.m_MemSize,
2314 partialBlock.m_Index);
2321 void LoadedNetwork::CreateMemoryProfile()
2325 auto TraceSubTensorHandleAncestry = [](ITensorHandle*
const subTensorHandle)
2327 ITensorHandle* ancestor = subTensorHandle;
2328 while (ancestor && ancestor->GetParent())
2330 ancestor = ancestor->GetParent();
2337 unsigned int m_StartOfLife;
2338 unsigned int m_Lifetime;
2341 unsigned int m_Index;
2343 BackendId m_BackendId;
2346 auto align = [](
size_t numToAlign)
2348 const size_t alignment =
sizeof(float);
2349 return ((numToAlign + alignment - 1) / alignment) * alignment;
2352 std::unordered_map<ITensorHandle*, PartialBlock> memBlockTrackerMap;
2357 unsigned int timestep = 0;
2358 unsigned int outputIndex = 0;
2359 Graph& order = m_OptimizedNetwork->pOptimizedNetworkImpl->GetGraph().TopologicalSort();
2361 for (
auto&& layer : order)
2363 const LayerType& layerType = layer->GetType();
2371 && layer->GetInputSlot(0).GetConnectedOutputSlot()->GetNumConnections() == 1)
2382 BackendId backendId = layer->GetBackendId();
2383 for (
auto& outputSlot : layer->GetOutputSlots())
2385 if (!m_SupportsExternallyManagedMemory[backendId])
2390 ITensorHandle* tensorHandle = outputSlot.GetOutputHandler().GetData();
2391 tensorHandle = TraceSubTensorHandleAncestry(tensorHandle);
2393 if (memBlockTrackerMap.find(tensorHandle) == memBlockTrackerMap.end())
2395 PartialBlock partialBlock;
2397 partialBlock.m_StartOfLife = timestep;
2399 size_t alignedSize = align(outputSlot.GetOutputHandler().GetTensorInfo().GetNumBytes());
2400 partialBlock.m_MemSize = alignedSize;
2401 partialBlock.m_Index = outputIndex++;
2402 partialBlock.m_Lifetime = outputSlot.GetNumConnections();
2403 partialBlock.m_BackendId = backendId;
2405 if (partialBlock.m_Lifetime == 0)
2407 m_MemBlockMap[partialBlock.m_BackendId].emplace_back(partialBlock.m_StartOfLife,
2408 partialBlock.m_StartOfLife,
2409 partialBlock.m_MemSize,
2411 partialBlock.m_Index);
2415 memBlockTrackerMap[tensorHandle] = partialBlock;
2417 m_Tensorhandles.push_back(tensorHandle);
2422 memBlockTrackerMap.at(tensorHandle).m_Lifetime += outputSlot.GetNumConnections();
2426 for (
auto& inputSlot : layer->GetInputSlots())
2428 const Layer& connectedInputLayer = inputSlot.GetConnectedOutputSlot()->GetOwningLayer();
2429 const LayerType& owningLayerType = connectedInputLayer.GetType();
2439 if (!m_SupportsExternallyManagedMemory[connectedInputLayer.GetBackendId()])
2444 auto outputSlot = inputSlot.GetConnectedOutputSlot();
2446 ITensorHandle* tensorHandle = outputSlot->GetOutputHandler().GetData();
2447 tensorHandle = TraceSubTensorHandleAncestry(tensorHandle);
2449 PartialBlock& partialBlock = memBlockTrackerMap.at(tensorHandle);
2451 auto& lifetime = partialBlock.m_Lifetime;
2456 m_MemBlockMap[partialBlock.m_BackendId].emplace_back(partialBlock.m_StartOfLife,
2458 partialBlock.m_MemSize,
2460 partialBlock.m_Index);
2468 std::unique_ptr<MemoryManager> LoadedNetwork::CreateExternalMemoryManger(
2469 std::vector<std::pair<std::shared_ptr<TensorMemory>,
MemorySource>>& tensorMemoryVec)
2471 std::unique_ptr<MemoryManager> memoryManager = std::make_unique<MemoryManager>();
2474 for (
auto& backend : m_MemBinMap)
2476 std::vector<BufferStorage> bufferStorageVec;
2478 std::shared_ptr<ICustomAllocator> backendAllocator;
2479 if (allocatorMap.find(backend.first) != allocatorMap.end())
2481 backendAllocator = allocatorMap[backend.first];
2485 backendAllocator = m_Backends[backend.first]->GetDefaultAllocator();
2488 for (
auto& memBin : backend.second)
2490 BufferStorage bufferStorage;
2491 bufferStorage.m_BufferSize = memBin.m_MemSize;
2492 bufferStorage.m_TensorMemoryVector.reserve(memBin.m_MemBlocks.size());
2494 for (
auto& memBlock : memBin.m_MemBlocks)
2496 auto tensorMemory = std::make_shared<TensorMemory>(TensorMemory{memBlock.m_Offset, memBlock.m_Index});
2498 tensorMemoryVec.emplace_back(tensorMemory, backendAllocator->GetMemorySourceType());
2499 bufferStorage.m_TensorMemoryVector.emplace_back(tensorMemory);
2502 bufferStorageVec.emplace_back(std::move(bufferStorage));
2505 memoryManager->StoreMemToAllocate(bufferStorageVec, backendAllocator, 4);
2508 return memoryManager;
2515 const auto& importedTensorHandlePin = m_PreImportedInputHandles.at(
id);
2516 if (!importedTensorHandlePin.m_TensorHandle)
2518 throw InvalidArgumentException(fmt::format(
"LoadedNetwork::Execute:"
2519 "PreImportedInput: {} has been deleted",
id));
2521 return importedTensorHandlePin.m_LayerBindingId;
2523 catch (
const std::out_of_range&)
2525 throw InvalidArgumentException(fmt::format(
"LoadedNetwork::Execute: Unknown ImportedInputId: {}",
id));
2533 const auto& importedTensorHandlePin = m_PreImportedOutputHandles.at(
id);
2534 if (!importedTensorHandlePin.m_TensorHandle)
2536 throw InvalidArgumentException(fmt::format(
"LoadedNetwork::Execute: "
2537 "PreImportedOutput: {} has been deleted",
id));
2539 return importedTensorHandlePin.m_LayerBindingId;
2541 catch (
const std::out_of_range&)
2543 throw InvalidArgumentException(fmt::format(
"LoadedNetwork::Execute: Unknown ImportedOutputId: {}",
id));