ArmNN
 22.08
LoadedNetwork.cpp
Go to the documentation of this file.
1 //
2 // Copyright © 2022 Arm Ltd and Contributors. All rights reserved.
3 // SPDX-License-Identifier: MIT
4 //
5 
6 #include "LoadedNetwork.hpp"
7 #include "Layer.hpp"
8 #include "Graph.hpp"
9 #include "Profiling.hpp"
10 #include "HeapProfiling.hpp"
11 #include "WorkingMemHandle.hpp"
12 #include "ExecutionData.hpp"
13 
14 #include <armnn/BackendHelper.hpp>
16 #include <armnn/Logging.hpp>
17 
22 
24 
25 #include <armnn/utility/Assert.hpp>
26 
28 
29 #include <common/include/Processes.hpp>
30 
31 #include <fmt/format.h>
32 
33 namespace armnn
34 {
35 
36 using namespace std;
37 using namespace arm::pipe;
38 
39 namespace
40 {
41 
42 template <typename ExceptionType>
43 std::string ToErrorMessage(const char * prefix, const ExceptionType & error)
44 {
45  std::stringstream ss;
46  ss << prefix << " " << error.what();
47  return ss.str();
48 }
49 
50 void AddLayerStructure(std::unique_ptr<TimelineUtilityMethods>& timelineUtils,
51  const Layer& layer,
52  ProfilingGuid networkGuid)
53 {
54  // Add layer to the post-optimisation network structure
55  std::string layerName = layer.GetNameStr().empty() ? "<Unnamed>" : layer.GetNameStr();
56  timelineUtils->CreateNamedTypedChildEntity(layer.GetGuid(),
57  networkGuid,
58  layerName,
59  LabelsAndEventClasses::LAYER_GUID);
60  for (auto&& input : layer.GetInputSlots())
61  {
62  const IOutputSlot* source = input.GetConnectedOutputSlot();
63  ARMNN_ASSERT(source != NULL);
64  timelineUtils->CreateConnectionRelationship(ProfilingRelationshipType::RetentionLink,
65  source->GetOwningLayerGuid(),
66  layer.GetGuid());
67  }
68 }
69 
70 void AddWorkloadStructure(std::unique_ptr<TimelineUtilityMethods>& timelineUtils,
71  std::unique_ptr<IWorkload>& workload,
72  const Layer& layer)
73 {
74  // Add workload to the post-optimisation network structure
75  timelineUtils->CreateTypedEntity(workload->GetGuid(), LabelsAndEventClasses::WORKLOAD_GUID);
76  timelineUtils->MarkEntityWithLabel(workload->GetGuid(),
77  layer.GetBackendId().Get(),
78  LabelsAndEventClasses::BACKENDID_GUID);
79 
80  // Link the workload to the layer
81  timelineUtils->CreateRelationship(ProfilingRelationshipType::RetentionLink,
82  layer.GetGuid(),
83  workload->GetGuid(),
84  LabelsAndEventClasses::CHILD_GUID);
85 }
86 
87 } // anonymous
88 
89 /**
90  * This function performs a sanity check to ensure that the combination of input and output memory source matches the
91  * values for importEnabled and exportEnabled that were specified during optimization. During optimization the tensor
92  * handle factories are chosen based on whether import and export are enabled. If the user then specifies something
93  * incompatible here it can lead to problems.
94  *
95  * @param optimizedOptions
96  * @param networkProperties
97  */
98 void ValidateSourcesMatchOptimizedNetwork(std::vector<BackendOptions> optimizedOptions,
99  const INetworkProperties& networkProperties)
100 {
101  // Find the "Global" backend options. During the optimize phase the values of importEnabled and exportEnabled are
102  // added as backend options.
103  const vector<BackendOptions>::iterator& backendItr =
104  find_if(optimizedOptions.begin(), optimizedOptions.end(), [](const BackendOptions& backend) {
105  if (backend.GetBackendId().Get() == "Global")
106  {
107  return true;
108  }
109  else
110  {
111  return false;
112  }
113  });
114  bool importEnabled = false;
115  bool exportEnabled = false;
116  if (backendItr != optimizedOptions.end())
117  {
118  // Find the importEnabled and exportEnabled values.
119  for (size_t i = 0; i < backendItr->GetOptionCount(); i++)
120  {
121  const BackendOptions::BackendOption& option = backendItr->GetOption(i);
122  if (option.GetName() == "ImportEnabled")
123  {
124  importEnabled = option.GetValue().AsBool();
125  }
126  if (option.GetName() == "ExportEnabled")
127  {
128  exportEnabled = option.GetValue().AsBool();
129  }
130  }
131  }
132 
133  // Now that we have values for import and export compare them to the MemorySource variables.
134  // Any value of MemorySource that's not "Undefined" implies that we need to do an import of some kind.
135  if ((networkProperties.m_InputSource == MemorySource::Undefined && importEnabled) ||
136  (networkProperties.m_InputSource != MemorySource::Undefined && !importEnabled))
137  {
138  auto message = fmt::format("The input memory source specified, '{0}',", networkProperties.m_InputSource);
139  if (!importEnabled)
140  {
141  message.append(" requires that memory import be enabled. However, "
142  "it was disabled when this network was optimized.");
143  }
144  else
145  {
146  message.append(" requires that memory import be disabled. However, "
147  "it was enabled when this network was optimized.");
148  }
149  throw InvalidArgumentException(message);
150  }
151 
152  if ((networkProperties.m_OutputSource == MemorySource::Undefined && exportEnabled) ||
153  (networkProperties.m_OutputSource != MemorySource::Undefined && !exportEnabled))
154  {
155  auto message = fmt::format("The output memory source specified, '{0}',", networkProperties.m_OutputSource);
156  if (!exportEnabled)
157  {
158  message.append(" requires that memory export be enabled. However, "
159  "it was disabled when this network was optimized.");
160  }
161  else
162  {
163  message.append(" requires that memory export be disabled. However, "
164  "it was enabled when this network was optimized.");
165  }
166  throw InvalidArgumentException(message);
167  }
168 } // anonymous
169 
170 std::unique_ptr<LoadedNetwork> LoadedNetwork::MakeLoadedNetwork(std::unique_ptr<IOptimizedNetwork> net,
171  std::string& errorMessage,
172  const INetworkProperties& networkProperties,
173  arm::pipe::IProfilingService* profilingService)
174 {
175  std::unique_ptr<LoadedNetwork> loadedNetwork;
176 
177  auto Fail = [&](const std::exception& error) -> std::unique_ptr<LoadedNetwork>
178  {
179  errorMessage = ToErrorMessage("An error occurred when preparing the network workloads: ", error);
180  ARMNN_LOG(error) << errorMessage;
181 
182  return std::unique_ptr<LoadedNetwork>();
183  };
184 
185  try
186  {
187  loadedNetwork.reset(new LoadedNetwork(std::move(net), networkProperties, profilingService));
188  }
189  catch (const armnn::RuntimeException& error)
190  {
191  return Fail(error);
192  }
193  catch (const armnn::Exception& error)
194  {
195  return Fail(error);
196  }
197  catch (const std::runtime_error& error)
198  {
199  return Fail(error);
200  }
201 
202  return loadedNetwork;
203 }
204 
205 LoadedNetwork::LoadedNetwork(std::unique_ptr<IOptimizedNetwork> net,
206  const INetworkProperties& networkProperties,
207  arm::pipe::IProfilingService* profilingService) :
208  m_OptimizedNetwork(std::move(net)),
209  m_NetworkProperties(networkProperties),
210  m_TensorHandleFactoryRegistry(),
211  m_ProfilingService(profilingService)
212 {
214  // Get the profiler and register it for the current thread.
215  const std::shared_ptr<IProfiler>& profiler = m_OptimizedNetwork->GetProfiler();
217 
218  profiler->EnableProfiling(networkProperties.m_ProfilingEnabled);
219 
220  profiler->EnableNetworkDetailsToStdOut(networkProperties.m_OutputNetworkDetailsMethod);
221 
222  // We need to check that the memory sources match up with the values of import and export specified during the
223  // optimize phase. If they don't this will throw an exception.
224  ValidateSourcesMatchOptimizedNetwork(m_OptimizedNetwork.get()->pOptimizedNetworkImpl->GetModelOptions(),
225  m_NetworkProperties);
226 
227  //First create tensor handlers, backends and workload factories.
228  //Handlers are created before workloads are.
229  //Because workload creation can modify some of the handlers,
230  //(for example the splitter and concat layers).
231 
232  bool useExternalMemoryManager = false;
233  bool useInternalMemoryManager = false;
234  Graph& order = m_OptimizedNetwork->pOptimizedNetworkImpl->GetGraph();
235  // Ensure Topological order
236  order.SetLayersOutOfOrder();
237  order.TopologicalSort();
238 
239  if (!networkProperties.m_AsyncEnabled)
240  {
241  m_IsInputImported = std::vector<bool>(order.GetNumInputs(), false);
242  m_IsOutputImported = std::vector<bool>(order.GetNumOutputs(), false);
243  }
244 
245  for (auto&& layer : order)
246  {
247  auto const& backendId = layer->GetBackendId();
248  if (m_Backends.count(backendId) == 0)
249  {
250  auto createBackend = BackendRegistryInstance().GetFactory(backendId);
251  auto it = m_Backends.emplace(std::make_pair(backendId, createBackend()));
252 
253  IBackendInternal* backend = it.first->second.get();
254 
255  // If we're doing async execution verify that the backend supports it and ExternallyManagedMemory.
256  if (networkProperties.m_AsyncEnabled)
257  {
258  if (!HasCapability(BackendOptions::BackendOption{"AsyncExecution", true}, backend->GetCapabilities()))
259  {
260  std::string er = backend->GetId();
261  er += " does not support AsyncExecution";
262  throw BackendCapabilityException(er);
263  }
264  if (!HasCapability(BackendOptions::BackendOption{"ExternallyManagedMemory", true},
265  backend->GetCapabilities()))
266  {
267  std::string er = backend->GetId();
268  er += " does not support ExternallyManagedMemory\n";
269  er += "AsyncEnabled networks require all backends to support ExternallyManagedMemory";
270  throw BackendCapabilityException(er);
271  }
272  m_SupportsExternallyManagedMemory[backend->GetId()] = true;
273  useExternalMemoryManager = true;
274  }
275  else
276  {
277  m_SupportsExternallyManagedMemory[backend->GetId()] = false;
278  useInternalMemoryManager = true;
279  }
280 
282  if (backend->SupportsTensorAllocatorAPI())
283  {
284  workloadFactory = backend->CreateWorkloadFactory(
285  m_TensorHandleFactoryRegistry,
286  m_OptimizedNetwork->pOptimizedNetworkImpl->GetModelOptions(),
287  static_cast<MemorySourceFlags>(m_NetworkProperties.m_InputSource),
288  static_cast<MemorySourceFlags>(m_NetworkProperties.m_OutputSource));
289  }
290  else
291  {
292  m_BackendMemoryMangers.emplace_back(backend->CreateMemoryManager());
293  workloadFactory = backend->CreateWorkloadFactory(
294  m_BackendMemoryMangers.back(), m_OptimizedNetwork->pOptimizedNetworkImpl->GetModelOptions());
295  }
296  m_WorkloadFactories[backendId ] = std::move(workloadFactory);
297  }
298  }
299 
300  if (!networkProperties.m_AsyncEnabled)
301  {
302  for (auto&& layer : order)
303  {
304  auto& workloadFactory = GetWorkloadFactory(*layer);
305  bool supportsExternalManager = m_SupportsExternallyManagedMemory[layer->GetBackendId()];
306 
307  switch (layer->GetType())
308  {
309  case LayerType::Input:
311  {
312  // If IsImportEnabled is true then we need to set IsMemoryManaged
313  // to false when creating TensorHandles
314  layer->CreateTensorHandles(m_TensorHandleFactoryRegistry,
315  workloadFactory,
316  !supportsExternalManager && !m_NetworkProperties.m_ImportEnabled);
317  break;
318  }
319  case LayerType::Constant:
320  {
321  layer->CreateTensorHandles(m_TensorHandleFactoryRegistry, workloadFactory, true);
322  break;
323  }
324  default:
325  {
326  // Look for a layer with 1 OutputSlot which has 1 connection and that connection is an Output Layer
327  // If Export is enabled disable memory management so we can export, otherwise we do a copy
328  if ((layer->GetNumOutputSlots() == 1) &&
329  (layer->GetOutputSlots()[0].GetNumConnections() == 1) &&
330  (layer->GetOutputSlots()[0].GetConnection(0)->GetOwningLayer().GetType() == LayerType::Output))
331  {
332  layer->CreateTensorHandles(m_TensorHandleFactoryRegistry,
333  workloadFactory,
334  !supportsExternalManager && !m_NetworkProperties.m_ExportEnabled);
335  }
336  else
337  {
338  layer->CreateTensorHandles(m_TensorHandleFactoryRegistry,
339  workloadFactory,
340  !supportsExternalManager);
341  }
342  }
343  }
344  }
345  }
346 
347  ProfilingGuid networkGuid = m_OptimizedNetwork->GetGuid();
348  std::unique_ptr<TimelineUtilityMethods> timelineUtils =
349  TimelineUtilityMethods::GetTimelineUtils(*m_ProfilingService);
350  if (timelineUtils)
351  {
352  timelineUtils->CreateTypedEntity(networkGuid, LabelsAndEventClasses::NETWORK_GUID);
353  // Mark the network with a start of life event
354  timelineUtils->RecordEvent(networkGuid, LabelsAndEventClasses::ARMNN_PROFILING_SOL_EVENT_CLASS);
355  // and with the process ID
356  int processID = arm::pipe::GetCurrentProcessId();
357  std::stringstream ss;
358  ss << processID;
359  timelineUtils->MarkEntityWithLabel(networkGuid, ss.str(), LabelsAndEventClasses::PROCESS_ID_GUID);
360  }
361 
362  std::vector<IWorkload*> ConstWorkloads;
363 
364  //Then create workloads.
365  {
366  ARMNN_SCOPED_PROFILING_EVENT(Compute::Undefined, "LoadNetwork_CreateWorkloads");
367  for (auto&& layer: order)
368  {
369  if (timelineUtils)
370  {
371  // Add layer to the post-optimisation network structure
372  AddLayerStructure(timelineUtils, *layer, networkGuid);
373  }
374 
375  const IWorkloadFactory& workloadFactory = GetWorkloadFactory(*layer);
376 
377  switch (layer->GetType())
378  {
379  case LayerType::Input:
380  case LayerType::Output:
381  {
382  // Inputs and outputs are treated in a special way - see EnqueueInput() and EnqueueOutput().
383  break;
384  }
385  default:
386  {
387  auto workload = layer->CreateWorkload(workloadFactory);
388 
389  if (!workload)
390  {
391  const char* const layerName =
392  layer->GetNameStr().length() != 0 ? layer->GetName() : "<Unnamed>";
394  fmt::format("No workload created for layer (name: '{0}' type: '{1}') (compute '{2}')",
395  layerName, static_cast<int>(layer->GetType()), layer->GetBackendId().Get()
396  ));
397  }
398 
399  if (timelineUtils)
400  {
401  // Add workload to the post-optimisation network structure
402  AddWorkloadStructure(timelineUtils, workload, *layer);
403  }
404 
405  // For async networks ConstantWorkloads are managed exclusively by LoadedNetwork
406  // and are separated out from the other workloads
407  if((networkProperties.m_AsyncEnabled || useExternalMemoryManager) &&
408  layer->GetType() == LayerType::Constant)
409  {
410  m_ConstantTensorHandles[layer->GetGuid()] =
411  layer->GetOutputSlot(0).GetOutputHandler().GetData();
412  m_ConstantWorkloads[layer->GetGuid()] = std::move(workload);
413  }
414  else
415  {
416  m_WorkloadQueue.push_back(std::move(workload));
417 
418  if (layer->GetType() == LayerType::Constant)
419  {
420  // Place the Constant Workloads into a queue so that they can be executed first
421  ConstWorkloads.push_back(m_WorkloadQueue.back().get());
422  }
423  }
424  // release the constant data in the layer..
425  layer->ReleaseConstantData();
426  break;
427  }
428  }
429  }
430  }
431 
432  // Gather information about workloads for inputs & outputs
433  if (!networkProperties.m_AsyncEnabled && m_WorkloadQueue.size() != 0)
434  {
435  const int noOfInputs = armnn::numeric_cast<int>(order.GetNumInputs());
436 
437  // Get indices of all workloads connected to each input and
438  // check if they support tensor handle replacement
439  for (const BindableLayer* layer: order.GetInputLayers())
440  {
441  const auto bindingId = layer->GetBindingId();
442 
443  bool supportsReplacement = true;
444 
445  for (const auto inputSlot: layer->GetOutputSlot(0).GetConnections())
446  {
447  auto workloadIndex = std::distance(order.begin(), order.GetPosInGraph(inputSlot->GetOwningLayer()));
448  workloadIndex -= noOfInputs;
449 
450  m_InputWorkloadSlotPairs[bindingId].emplace_back(WorkloadIndices{
451  armnn::numeric_cast<unsigned int>(workloadIndex), inputSlot->GetSlotIndex()});
452 
453  auto workload = m_WorkloadQueue[m_InputWorkloadSlotPairs[bindingId].back().m_WorkloadIndex].get();
454  supportsReplacement &= workload->SupportsTensorHandleReplacement();
455  }
456 
457  ITensorHandleFactory::FactoryId factoryId = layer->GetOutputSlot(0).GetTensorHandleFactoryId();
458  // Get matching import factory Id
459  ITensorHandleFactory::FactoryId importFactoryId =
460  m_TensorHandleFactoryRegistry.GetMatchingImportFactoryId(factoryId);
461 
462  ITensorHandleFactory *importFactory = m_TensorHandleFactoryRegistry.GetFactory(importFactoryId);
463 
464  if (supportsReplacement && importFactory)
465  {
466  m_PreImportedInputHandles.emplace_back(
467  bindingId, importFactory->CreateTensorHandle(layer->GetOutputSlot(0).GetTensorInfo(), false));
468  }
469  else
470  {
471  m_PreImportedInputHandles.emplace_back(bindingId, nullptr);
472  }
473  }
474 
475  // Get indices of all workloads connected to each output and
476  // check if they support tensor handle replacement
477  for (const BindableLayer* layer: order.GetOutputLayers())
478  {
479  const auto bindingId = layer->GetBindingId();
480 
481  const auto outputSlot = layer->GetInputSlot(0).GetConnectedOutputSlot();
482  auto& indices = m_OutputWorkloadSlotPairs[bindingId];
483 
484  auto workloadIndex = std::distance(order.begin(), order.GetPosInGraph(outputSlot->GetOwningLayer()));
485  workloadIndex -= noOfInputs;
486 
487  indices.m_OutputSlotIndices = WorkloadIndices{numeric_cast<unsigned int>(workloadIndex),
488  outputSlot->CalculateIndexOnOwner()};
489 
490  bool supportsReplacement = true;
491  auto outputWorkload = m_WorkloadQueue[indices.m_OutputSlotIndices.m_WorkloadIndex].get();
492  supportsReplacement &= outputWorkload->SupportsTensorHandleReplacement();
493 
494  for (auto &inputSlot: outputSlot->GetConnections())
495  {
496  if(inputSlot->GetOwningLayer().GetType() != LayerType::Output)
497  {
498  auto inWorkloadIndex = std::distance(order.begin(),
499  order.GetPosInGraph(inputSlot->GetOwningLayer()));
500  inWorkloadIndex -= noOfInputs;
501  indices.m_InputSlotIndices.emplace_back(WorkloadIndices{numeric_cast<unsigned int>(inWorkloadIndex),
502  inputSlot->GetSlotIndex()});
503  auto inputWorkload = m_WorkloadQueue[indices.m_InputSlotIndices.back().m_WorkloadIndex].get();
504  supportsReplacement &= inputWorkload->SupportsTensorHandleReplacement();
505  }
506  }
507 
508  ITensorHandleFactory::FactoryId factoryId = outputSlot->GetTensorHandleFactoryId();
509  // Get matching import factory Id
510  ITensorHandleFactory::FactoryId importFactoryId =
511  m_TensorHandleFactoryRegistry.GetMatchingImportFactoryId(factoryId);
512  ITensorHandleFactory *importFactory = m_TensorHandleFactoryRegistry.GetFactory(importFactoryId);
513 
514  if (supportsReplacement && importFactory)
515  {
516  m_PreImportedOutputHandles.emplace_back(
517  bindingId, importFactory->CreateTensorHandle(outputSlot->GetTensorInfo(), false));
518  }
519  else
520  {
521  m_PreImportedOutputHandles.emplace_back(bindingId, nullptr);
522  }
523  }
524  }
525 
526  for (auto&& workloadFactory : m_WorkloadFactories)
527  {
528  workloadFactory.second->AfterWorkloadsCreated();
529  }
530 
531  if (timelineUtils)
532  {
533  // Commit to send the post-optimisation network structure
534  timelineUtils->Commit();
535  }
536 
537  if (useExternalMemoryManager)
538  {
539  if (networkProperties.m_AsyncEnabled)
540  {
541  CreateMemoryProfileAsync();
542  }
543  else
544  {
545  CreateMemoryProfile();
546  }
547 
548  auto backendStrategyMap = BackendRegistryInstance().GetMemoryOptimizerStrategies();
549  for (auto& backendMemoryProfile : m_MemBlockMap)
550  {
551  const BackendId& backendId = backendMemoryProfile.first;
552  if (backendStrategyMap.find(backendId) != backendStrategyMap.end())
553  {
554  m_MemBinMap[backendId] = backendStrategyMap[backendId]->Optimize(backendMemoryProfile.second);
555  }
556  else
557  {
558  m_MemBinMap[backendId] = m_ConstantStrategy->Optimize(backendMemoryProfile.second);
559  }
560  }
561 
562  if (!networkProperties.m_AsyncEnabled)
563  {
564  m_ExternalMemoryManager = CreateExternalMemoryManger(m_TensorMemory);
565 
566  // Sort m_TensorMemory, so it's order matches m_Tensorhandles
567  std::sort(m_TensorMemory.begin(), m_TensorMemory.end(),
568  [](const std::pair<std::shared_ptr<TensorMemory>, MemorySource>& lhs,
569  const std::pair<std::shared_ptr<TensorMemory>, MemorySource>& rhs)
570  {
571  return lhs.first->m_OutputSlotId < rhs.first->m_OutputSlotId;
572  });
573  }
574  }
575 
576  // Now that the intermediate tensor memory has been set-up,
577  // do any post allocation configuration for each workload.
578  if (!networkProperties.m_AsyncEnabled)
579  {
580  if (useInternalMemoryManager)
581  {
582  // Set up memory.
583  m_OptimizedNetwork->pOptimizedNetworkImpl->GetGraph().AllocateDynamicBuffers();
584  }
585 
586  for (auto &workload : m_WorkloadQueue)
587  {
588  workload->PostAllocationConfigure();
589  }
590  }
591 
592  if (useExternalMemoryManager)
593  {
594  if (!networkProperties.m_AsyncEnabled)
595  {
596  AllocateAndExecuteConstantWorkloads();
597  }
598  else
599  {
600  AllocateAndExecuteConstantWorkloadsAsync();
601  }
602  }
603  // If synchronous, execute all constant layer workloads
604  if (!networkProperties.m_AsyncEnabled)
605  {
606  for (auto workload: ConstWorkloads)
607  {
608  workload->Execute();
609  }
610  }
611 }
612 
613 void LoadedNetwork::AllocateAndExecuteConstantWorkloads()
614 {
615  ARMNN_SCOPED_PROFILING_EVENT(Compute::Undefined, "LoadNetwork_AllocateAndExecuteConstants");
616  for (auto& pair : m_ConstantWorkloads)
617  {
618  auto tensorHandle = m_ConstantTensorHandles[pair.first];
619  tensorHandle->Allocate();
620  pair.second->Execute();
621  }
622 }
623 
624 void LoadedNetwork::AllocateAndExecuteConstantWorkloadsAsync()
625 {
626  ARMNN_SCOPED_PROFILING_EVENT(Compute::Undefined, "LoadNetwork_AllocateAndExecuteConstants");
627  Graph& order = m_OptimizedNetwork->pOptimizedNetworkImpl->GetGraph();
628  for (auto&& layer : order)
629  {
630  if (layer->GetType() == LayerType::Constant)
631  {
632  const auto& outSlot = layer->GetOutputSlots()[0];
633  const auto factoryId = outSlot.GetTensorHandleFactoryId();
635  auto& workloadFactory = GetWorkloadFactory(*layer);
636 
637  layer->CreateTensorHandles(m_TensorHandleFactoryRegistry, workloadFactory);
638  ITensorHandle* tensorHandle = outSlot.GetOutputHandler().GetData();
639 
640  m_ConstantTensorHandles[layer->GetGuid()] = tensorHandle;
641  tensorHandle->Allocate();
642 
643  auto& backend = m_Backends.at(layer->GetBackendId());
644 
645  WorkingMemDescriptor memDesc;
646  memDesc.m_Outputs.push_back(tensorHandle);
647 
648  ExecutionData executionData = backend->CreateExecutionData(memDesc);
649  m_ConstantWorkloads[layer->GetGuid()]->ExecuteAsync(executionData);
650  }
651  }
652 }
653 
654 void LoadedNetwork::SendNetworkStructure(arm::pipe::IProfilingService& profilingService)
655 {
656  ARMNN_SCOPED_PROFILING_EVENT(Compute::Undefined, "LoadNetwork_SendNetworkStructure");
657  Graph& order = m_OptimizedNetwork->pOptimizedNetworkImpl->GetGraph().TopologicalSort();
658  ProfilingGuid networkGuid = m_OptimizedNetwork->GetGuid();
659 
660  std::unique_ptr<TimelineUtilityMethods> timelineUtils =
661  TimelineUtilityMethods::GetTimelineUtils(profilingService);
662 
663  timelineUtils->CreateTypedEntity(networkGuid, LabelsAndEventClasses::NETWORK_GUID);
664 
665  for (auto&& layer : order)
666  {
667  // Add layer to the post-optimisation network structure
668  AddLayerStructure(timelineUtils, *layer, networkGuid);
669  switch (layer->GetType())
670  {
671  case LayerType::Input:
672  case LayerType::Output:
673  {
674  // Inputs and outputs are treated in a special way - see EnqueueInput() and EnqueueOutput().
675  break;
676  }
677  default:
678  {
679  for (auto& workload : m_WorkloadQueue)
680  {
681  // Add workload to the post-optimisation network structure
682  AddWorkloadStructure(timelineUtils, workload, *layer);
683  }
684  break;
685  }
686  }
687  }
688  // Commit to send the post-optimisation network structure
689  timelineUtils->Commit();
690 }
691 
693 {
694  return m_OptimizedNetwork->GetGuid();
695 }
696 
698 {
699  for (auto&& inputLayer : m_OptimizedNetwork->pOptimizedNetworkImpl->GetGraph().GetInputLayers())
700  {
701  ARMNN_ASSERT_MSG(inputLayer->GetNumOutputSlots() == 1, "Input layer should have exactly 1 output slot");
702  if (inputLayer->GetBindingId() == layerId)
703  {
704  return inputLayer->GetOutputSlot(0).GetTensorInfo();
705  }
706  }
707 
708  throw InvalidArgumentException(fmt::format("No input layer is associated with id {}", layerId));
709 }
710 
712 {
713  for (auto&& outputLayer : m_OptimizedNetwork->pOptimizedNetworkImpl->GetGraph().GetOutputLayers())
714  {
715  ARMNN_ASSERT_MSG(outputLayer->GetNumInputSlots() == 1, "Output layer should have exactly 1 input slot");
716  ARMNN_ASSERT_MSG(outputLayer->GetInputSlot(0).GetConnection(), "Input slot on Output layer must be connected");
717  if (outputLayer->GetBindingId() == layerId)
718  {
719  return outputLayer->GetInputSlot(0).GetConnection()->GetTensorInfo();
720  }
721  }
722 
723  throw InvalidArgumentException(fmt::format("No output layer is associated with id {}", layerId));
724 }
725 
726 const IWorkloadFactory& LoadedNetwork::GetWorkloadFactory(const Layer& layer) const
727 {
728  const IWorkloadFactory* workloadFactory = nullptr;
729 
730  auto it = m_WorkloadFactories.find(layer.GetBackendId());
731  if (it == m_WorkloadFactories.end())
732  {
733  throw RuntimeException(fmt::format("No workload factory for {0} to be used for layer: {1}",
734  layer.GetBackendId().Get(),
735  layer.GetNameStr()),
736  CHECK_LOCATION());
737  }
738 
739  workloadFactory = it->second.get();
740 
741  ARMNN_ASSERT_MSG(workloadFactory, "No workload factory");
742 
743  std::string reasonIfUnsupported;
745  {},
746  reasonIfUnsupported,
747  m_OptimizedNetwork->pOptimizedNetworkImpl->GetModelOptions()),
748  "Factory does not support layer");
749  IgnoreUnused(reasonIfUnsupported);
750  return *workloadFactory;
751 }
752 
753 namespace {
754 
755 // Non-copyable class owning accelerator-specific tensor data.
756 class TensorPin
757 {
758 public:
759  TensorPin(std::unique_ptr<ITensorHandle> handle, const TensorInfo& info, LayerBindingId id)
760  : m_TensorHandle(std::move(handle))
761  , m_TensorInfo(info)
762  , m_Id(id)
763  {
764  }
765 
766  ITensorHandle* GetTensorHandle() const { return m_TensorHandle.get(); }
767  const TensorInfo& GetTensorInfo() const { return m_TensorInfo; }
768  LayerBindingId GetBindingId() const { return m_Id; }
769 
770 private:
771  std::unique_ptr<ITensorHandle> m_TensorHandle;
772  TensorInfo m_TensorInfo;
773  LayerBindingId m_Id;
774 };
775 
776 static const TensorPin& GetTensorPin(LayerBindingId id,
777  const std::vector<TensorPin>& pins,
778  char const* bindingPointDesc)
779 {
780  auto it = std::find_if(pins.begin(), pins.end(),
781  [id](const TensorPin& pin)
782  {
783  return pin.GetBindingId() == id;
784  });
785 
786  if (it != pins.end())
787  {
788  return *it;
789  }
790  else
791  {
792  throw InvalidArgumentException(fmt::format("No tensor supplied for {0} {1}", bindingPointDesc, id));
793  }
794 }
795 
796 // Stores data that needs to be kept accessible for the entire execution of a workload.
797 class WorkloadData
798 {
799 public:
800  WorkloadData(const InputTensors& inputTensors, const OutputTensors& outputTensors)
801  {
802  m_InputTensorPins.reserve(inputTensors.size());
803  m_OutputTensorPins.reserve(outputTensors.size());
804 
805  for (auto inputTensorPair : inputTensors)
806  {
807  auto inputTensor = inputTensorPair.second;
808 
809  std::unique_ptr<ITensorHandle> tensorHandle =
810  std::make_unique<ConstPassthroughTensorHandle>(inputTensor.GetInfo(),inputTensor.GetMemoryArea());
811  LayerBindingId layerId = inputTensorPair.first;
812 
813  m_InputTensorPins.emplace_back(std::move(tensorHandle), inputTensor.GetInfo(), layerId);
814  }
815 
816  for (auto outputTensorPair : outputTensors)
817  {
818  auto outputTensor = outputTensorPair.second;
819 
820  std::unique_ptr<ITensorHandle> tensorHandle =
821  std::make_unique<PassthroughTensorHandle>(outputTensor.GetInfo(), outputTensor.GetMemoryArea());
822  LayerBindingId layerId = outputTensorPair.first;
823 
824  m_OutputTensorPins.emplace_back(std::move(tensorHandle), outputTensor.GetInfo(), layerId);
825  }
826  }
827 
828  const TensorPin& GetInputTensorPin(LayerBindingId id) const
829  {
830  return GetTensorPin(id, m_InputTensorPins, "input");
831  }
832 
833  const TensorPin& GetOutputTensorPin(LayerBindingId id) const
834  {
835  return GetTensorPin(id, m_OutputTensorPins, "output");
836  }
837 
838 private:
839 
840  std::vector<TensorPin> m_InputTensorPins;
841  std::vector<TensorPin> m_OutputTensorPins;
842 };
843 
844 }
845 
847  const OutputTensors& outputTensors,
848  std::vector<ImportedInputId> preImportedInputIds,
849  std::vector<ImportedOutputId> preImportedOutputIds)
850 {
851  const Graph& graph = m_OptimizedNetwork->pOptimizedNetworkImpl->GetGraph();
852 
853  // Walk graph to determine the order of execution.
854  if (graph.GetNumLayers() < 2)
855  {
856  ARMNN_LOG(warning) << "IRuntime::EnqueueWorkload()::Less than two nodes in graph";
857  return Status::Failure;
858  }
859 
860  // Data that must be kept alive for the entire execution of the workload.
861  WorkloadData workloadData(inputTensors, outputTensors);
862 
863  // Input tensors can be provided as parameters or pre imported. Either way the number of
864  // tensors should match the number of inputs.
865  if (graph.GetNumInputs() != (inputTensors.size() + preImportedInputIds.size()))
866  {
867  throw InvalidArgumentException("Number of inputs provided does not match network.");
868  }
869 
870  // For each input to the network, call EnqueueInput with the data passed by the user.
871  {
873  m_InputQueue.clear();
874  m_InputQueue.reserve(graph.GetNumInputs());
875 
876  unsigned int inputIndex = 0;
877  unsigned int importedInputIdIndex = 0;
878  std::sort(preImportedInputIds.begin(), preImportedInputIds.end());
879  for (const BindableLayer* inputLayer : graph.GetInputLayers())
880  {
881  if (importedInputIdIndex < preImportedInputIds.size() &&
882  inputIndex == preImportedInputIds[importedInputIdIndex])
883  {
884  // Only replace tensorhandles if they have not already been replaced
885  if (!m_IsInputImported[inputIndex])
886  {
887  auto outputTensorHandle = m_PreImportedInputHandles[inputIndex].m_TensorHandle.get();
888 
889  for (const auto& workloadInfo: m_InputWorkloadSlotPairs[inputLayer->GetBindingId()])
890  {
891  auto workload = m_WorkloadQueue[workloadInfo.m_WorkloadIndex].get();
892  workload->ReplaceInputTensorHandle(outputTensorHandle, workloadInfo.m_SlotIndex);
893  }
894  m_IsInputImported[inputIndex] = true;
895  }
896  importedInputIdIndex++;
897  }
898  else
899  {
900  if (m_IsInputImported[inputIndex])
901  {
902  OutputHandler& handler = const_cast<OutputHandler&>(inputLayer->GetOutputHandler(0));
903 
904  for (const auto& workloadInfo: m_InputWorkloadSlotPairs[inputLayer->GetBindingId()])
905  {
906  auto workload = m_WorkloadQueue[workloadInfo.m_WorkloadIndex].get();
907  workload->ReplaceInputTensorHandle(handler.GetData(), workloadInfo.m_SlotIndex);
908  }
909 
910  m_IsInputImported[inputIndex] = false;
911  }
912 
913  // InputTensorHandle is not imported yet, process to enqueue input
914  const TensorPin& pin = workloadData.GetInputTensorPin(inputLayer->GetBindingId());
915  EnqueueInput(*inputLayer, pin.GetTensorHandle(), pin.GetTensorInfo());
916  }
917  inputIndex++;
918  }
919  }
920  // For each output to the network, call EnqueueOutput with the data passed by the user.
921  {
923  m_OutputQueue.clear();
924  m_OutputQueue.reserve(graph.GetNumOutputs());
925 
926  if (preImportedOutputIds.size() > graph.GetNumOutputs())
927  {
928  throw InvalidArgumentException("Invalid number of preImportedOutputIds");
929  }
930 
931  unsigned int outputIndex = 0;
932  unsigned int importedOutputIdIndex = 0;
933  std::sort(preImportedOutputIds.begin(), preImportedOutputIds.end());
934  for (const BindableLayer* outputLayer : graph.GetOutputLayers())
935  {
936  if (importedOutputIdIndex < preImportedOutputIds.size() &&
937  outputIndex == preImportedOutputIds[importedOutputIdIndex])
938  {
939  // Only replace tensorhandles if they have not already been replaced
940  ITensorHandle* inputTensorHandle = m_PreImportedOutputHandles[outputIndex].m_TensorHandle.get();
941 
942  if (!m_IsOutputImported[outputIndex])
943  {
944  const auto bindingId = outputLayer->GetBindingId();
945  const auto& indices = m_OutputWorkloadSlotPairs[bindingId];
946 
947  auto outputWorkload = m_WorkloadQueue[indices.m_OutputSlotIndices.m_WorkloadIndex].get();
948 
949  outputWorkload->ReplaceOutputTensorHandle(inputTensorHandle,
950  indices.m_OutputSlotIndices.m_SlotIndex);
951 
952  for (const auto& workloadInfo: indices.m_InputSlotIndices)
953  {
954  auto inputWorkload = m_WorkloadQueue[workloadInfo.m_WorkloadIndex].get();
955  inputWorkload->ReplaceInputTensorHandle(inputTensorHandle, workloadInfo.m_SlotIndex);
956  }
957  m_IsOutputImported[outputIndex] = true;
958  }
959 
960  ARMNN_ASSERT_MSG(inputTensorHandle != nullptr, "Data should have been allocated.");
961  MemSyncQueueDescriptor syncDesc;
962  syncDesc.m_Inputs.push_back(inputTensorHandle);
964  info.m_InputTensorInfos.push_back(
965  outputLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo());
966  auto syncWorkload = std::make_unique<SyncMemGenericWorkload>(syncDesc, info);
967  ARMNN_ASSERT_MSG(syncWorkload, "No sync workload created");
968  m_OutputQueue.push_back(move(syncWorkload));
969  importedOutputIdIndex++;
970  }
971  else
972  {
973  if (m_IsOutputImported[outputIndex])
974  {
975  const auto bindingId = outputLayer->GetBindingId();
976  const auto& indices = m_OutputWorkloadSlotPairs[bindingId];
977 
978  auto outputWorkload = m_WorkloadQueue[indices.m_OutputSlotIndices.m_WorkloadIndex].get();
979  const OutputHandler& outputHandler =
980  outputLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetOutputHandler();
981 
982  outputWorkload->ReplaceOutputTensorHandle(
983  outputHandler.GetData(), indices.m_OutputSlotIndices.m_SlotIndex);
984 
985  for (const auto& workloadInfo: indices.m_InputSlotIndices)
986  {
987  auto inputWorkload = m_WorkloadQueue[workloadInfo.m_WorkloadIndex].get();
988  inputWorkload->ReplaceInputTensorHandle(outputHandler.GetData(), workloadInfo.m_SlotIndex);
989  }
990  m_IsOutputImported[outputIndex] = false;
991  }
992 
993  const TensorPin& pin = workloadData.GetOutputTensorPin(outputLayer->GetBindingId());
994  // OutputTensorHandle is not imported yet, process to enqueue Output
995  EnqueueOutput(*outputLayer, pin.GetTensorHandle(), pin.GetTensorInfo());
996  }
997  outputIndex++;
998  }
999  }
1000 
1001  std::unique_ptr<TimelineUtilityMethods> timelineUtils =
1002  TimelineUtilityMethods::GetTimelineUtils(*m_ProfilingService);
1003  ProfilingGuid inferenceGuid = m_ProfilingService->GetNextGuid();
1004  if (timelineUtils)
1005  {
1006  // Add inference timeline trace if profiling is enabled.
1007  ProfilingGuid networkGuid = m_OptimizedNetwork->GetGuid();
1008  timelineUtils->CreateTypedEntity(inferenceGuid, LabelsAndEventClasses::INFERENCE_GUID);
1009  timelineUtils->CreateRelationship(ProfilingRelationshipType::RetentionLink,
1010  networkGuid,
1011  inferenceGuid,
1012  LabelsAndEventClasses::EXECUTION_OF_GUID);
1013  timelineUtils->RecordEvent(inferenceGuid, LabelsAndEventClasses::ARMNN_PROFILING_SOL_EVENT_CLASS);
1014  }
1015 
1016  bool executionSucceeded = true;
1017 
1018  {
1019  if (m_ProfilingService->IsProfilingEnabled())
1020  {
1021  m_ProfilingService->IncrementCounterValue(INFERENCES_RUN);
1022  }
1024  ARMNN_SCOPED_HEAP_PROFILING("Executing");
1025  executionSucceeded = Execute(timelineUtils, inferenceGuid);
1026  }
1027 
1028  if (timelineUtils)
1029  {
1030  // Add end of life of the inference timeline if profiling is enabled.
1031  timelineUtils->RecordEvent(inferenceGuid, LabelsAndEventClasses::ARMNN_PROFILING_EOL_EVENT_CLASS);
1032  timelineUtils->Commit();
1033  }
1034 
1035  return executionSucceeded ? Status::Success : Status::Failure;
1036 }
1037 
1038 void LoadedNetwork::EnqueueInput(const BindableLayer& layer, ITensorHandle* tensorHandle, const TensorInfo& tensorInfo)
1039 {
1040  if (layer.GetType() != LayerType::Input)
1041  {
1042  throw InvalidArgumentException("EnqueueInput: given layer not an InputLayer");
1043  }
1044 
1045  if (tensorHandle == nullptr)
1046  {
1047  throw InvalidArgumentException("EnqueueInput: tensorHandle must not be NULL");
1048  }
1049 
1050  InputQueueDescriptor inputQueueDescriptor;
1052 
1053  inputQueueDescriptor.m_Inputs.push_back(tensorHandle);
1054  info.m_InputTensorInfos.push_back(tensorInfo);
1055 
1056  ARMNN_ASSERT_MSG(layer.GetNumOutputSlots() == 1, "Can only handle Input Layer with one output");
1057  const OutputHandler& handler = layer.GetOutputHandler();
1058  const TensorInfo& outputTensorInfo = handler.GetTensorInfo();
1059  ITensorHandle* outputTensorHandle = handler.GetData();
1060  ARMNN_ASSERT_MSG(outputTensorHandle != nullptr,
1061  "Data should have been allocated.");
1062  inputQueueDescriptor.m_Outputs.push_back(outputTensorHandle);
1063  info.m_OutputTensorInfos.push_back(outputTensorInfo);
1064 
1065  MemorySourceFlags importFlags = outputTensorHandle->GetImportFlags();
1066  bool needMemCopy = true;
1067  if (m_NetworkProperties.m_ImportEnabled) // Try import the input tensor
1068  {
1069  if(CheckFlag(importFlags, m_NetworkProperties.m_InputSource))
1070  {
1071  needMemCopy = false;
1072  // This assumes a CPU Tensor handle
1073  void* mem = tensorHandle->Map(false);
1074  if (outputTensorHandle->Import(mem, m_NetworkProperties.m_InputSource))
1075  {
1076  tensorHandle->Unmap();
1077  return; // No need for a workload since the import has been done.
1078  }
1079  tensorHandle->Unmap();
1080  throw MemoryImportException("EnqueueInput: Memory Import failed");
1081  }
1082  }
1083  if (needMemCopy)
1084  {
1085  // Create a mem copy workload for input since we did not import
1086  std::unique_ptr<IWorkload> inputWorkload = std::make_unique<CopyMemGenericWorkload>(inputQueueDescriptor, info);
1087 
1088  ARMNN_ASSERT_MSG(inputWorkload, "No input workload created");
1089 
1090  std::unique_ptr<TimelineUtilityMethods> timelineUtils =
1091  TimelineUtilityMethods::GetTimelineUtils(*m_ProfilingService);
1092  if (timelineUtils)
1093  {
1094  // Add Input Workload to the post-optimisation network structure
1095  AddWorkloadStructure(timelineUtils, inputWorkload, layer);
1096  timelineUtils->Commit();
1097  }
1098 
1099  m_InputQueue.push_back(move(inputWorkload));
1100  }
1101 }
1102 
1103 void LoadedNetwork::EnqueueOutput(const BindableLayer& layer, ITensorHandle* tensorHandle, const TensorInfo& tensorInfo)
1104 {
1105  if (layer.GetType() != LayerType::Output)
1106  {
1107  throw InvalidArgumentException("EnqueueOutput: given layer not an OutputLayer");
1108  }
1109 
1110  if (tensorHandle == nullptr)
1111  {
1112  throw InvalidArgumentException("EnqueueOutput: tensorHandle must not be NULL");
1113  }
1114 
1115  OutputQueueDescriptor outputQueueDescriptor;
1117 
1118  outputQueueDescriptor.m_Outputs.push_back(tensorHandle);
1119  info.m_OutputTensorInfos.push_back(tensorInfo);
1120 
1121  ARMNN_ASSERT_MSG(layer.GetNumInputSlots() == 1, "Output Layer should have exactly one input.");
1122 
1123  // Gets the output handler from the previous node.
1124  const OutputHandler& outputHandler = layer.GetInputSlots()[0].GetConnectedOutputSlot()->GetOutputHandler();
1125 
1126  const TensorInfo& inputTensorInfo = outputHandler.GetTensorInfo();
1127  ITensorHandle* inputTensorHandle = outputHandler.GetData();
1128  ARMNN_ASSERT_MSG(inputTensorHandle != nullptr, "Data should have been allocated.");
1129 
1130  // Try import the output tensor.
1131  // Note: We can only import the output pointer if all of the following hold true:
1132  // a) The imported pointer is aligned sufficiently
1133  // b) The tensor has zero padding
1134  // c) There is only one connection to the OutputSlot and it is to an OutputLayer.
1135  // d) The output pointer is allocated via malloc. (Other types will be supported in a later release)
1136  // e) m_IsExportEnabled must be set to true
1137  bool needMemCopy = true;
1138  if (m_NetworkProperties.m_ExportEnabled &&
1139  (layer.GetInputSlots()[0].GetConnectedOutputSlot()->GetNumConnections() == 1))
1140  {
1141  if(layer.GetInputSlots()[0].GetConnectedOutputSlot()->GetOwningLayer().GetType() != LayerType::Input)
1142  {
1143  MemorySourceFlags importFlags = inputTensorHandle->GetImportFlags();
1144  if (CheckFlag(importFlags, m_NetworkProperties.m_OutputSource))
1145  {
1146  needMemCopy = false;
1147  void *mem = tensorHandle->Map(false);
1148  bool importOk = inputTensorHandle->Import(mem, m_NetworkProperties.m_OutputSource);
1149  tensorHandle->Unmap();
1150 
1151  if (importOk)
1152  {
1153  // Insert synchronization workload
1154  MemSyncQueueDescriptor syncDesc;
1155  syncDesc.m_Inputs.push_back(inputTensorHandle);
1156  info.m_InputTensorInfos.push_back(inputTensorInfo);
1157  auto syncWorkload = std::make_unique<SyncMemGenericWorkload>(syncDesc, info);
1158  ARMNN_ASSERT_MSG(syncWorkload, "No sync workload created");
1159  m_OutputQueue.push_back(move(syncWorkload));
1160  }
1161  else
1162  {
1163  throw MemoryExportException("EnqueueOutput: Memory Export failed");
1164  }
1165  }
1166  }
1167  }
1168  if (needMemCopy)
1169  {
1170  // If we got here then we didn't export the memory, so add an output workload which performs a memcopy.
1171  outputQueueDescriptor.m_Inputs.push_back(inputTensorHandle);
1172  info.m_InputTensorInfos.push_back(inputTensorInfo);
1173 
1174  std::unique_ptr<IWorkload> outputWorkload =
1175  std::make_unique<CopyMemGenericWorkload>(outputQueueDescriptor, info);
1176  ARMNN_ASSERT_MSG(outputWorkload, "No output workload created");
1177 
1178  std::unique_ptr<TimelineUtilityMethods> timelineUtils =
1179  TimelineUtilityMethods::GetTimelineUtils(*m_ProfilingService);
1180  if (timelineUtils)
1181  {
1182  // Add Output Workload to the post-optimisation network structure
1183  AddWorkloadStructure(timelineUtils, outputWorkload, layer);
1184  timelineUtils->Commit();
1185  }
1186 
1187  m_OutputQueue.push_back(move(outputWorkload));
1188  }
1189 }
1190 
1191 void LoadedNetwork::AllocateWorkingMemory(
1192 #if !defined(ARMNN_DISABLE_THREADS)
1193  std::lock_guard<std::mutex>& lock
1194 #endif
1195  )
1196 {
1197  ARMNN_SCOPED_PROFILING_EVENT(Compute::Undefined, "Working Memory Allocation");
1198 
1199 #if !defined(ARMNN_DISABLE_THREADS)
1200  // this unused parameter makes sure we can only call this function with a valid lock
1201  IgnoreUnused(lock);
1202 #endif
1203  if (m_IsWorkingMemAllocated)
1204  {
1205  return;
1206  }
1207 
1208  if (m_ExternalMemoryManager)
1209  {
1210  m_ExternalMemoryManager->Allocate();
1211 
1212  for (unsigned int i = 0; i < m_TensorMemory.size(); ++i)
1213  {
1214  m_Tensorhandles[i]->Import(m_TensorMemory[i].first->m_Data, m_TensorMemory[i].second);
1215  }
1216  }
1217 
1218  for (auto&& memoryManager : m_BackendMemoryMangers)
1219  {
1220  if (memoryManager)
1221  {
1222  memoryManager->Acquire();
1223  }
1224  }
1225  m_TensorHandleFactoryRegistry.AquireMemory();
1226  m_IsWorkingMemAllocated = true;
1227 }
1228 
1230 {
1231 #if !defined(ARMNN_DISABLE_THREADS)
1232  std::lock_guard<std::mutex> lockGuard(m_WorkingMemMutex);
1233 #endif
1234 
1235  if (!m_IsWorkingMemAllocated)
1236  {
1237  return;
1238  }
1239 
1240  if (m_ExternalMemoryManager)
1241  {
1242  m_ExternalMemoryManager->Deallocate();
1243  }
1244 
1245  // Informs the memory managers to release memory in its respective memory group
1246  for (auto&& memoryManager : m_BackendMemoryMangers)
1247  {
1248  if (memoryManager)
1249  {
1250  memoryManager->Release();
1251  }
1252  }
1253  m_TensorHandleFactoryRegistry.ReleaseMemory();
1254  m_IsWorkingMemAllocated = false;
1255 }
1256 
1257 bool LoadedNetwork::Execute(std::unique_ptr<TimelineUtilityMethods>& timelineUtils,
1258  ProfilingGuid inferenceGuid)
1259 {
1260  bool success = true;
1261 
1262  auto Fail = [&](const std::exception& error)
1263  {
1264  ARMNN_LOG(error) << "An error occurred attempting to execute a workload: " << error.what();
1265  success = false;
1266  };
1267 
1268  try
1269  {
1270 #if !defined(ARMNN_DISABLE_THREADS)
1271  std::lock_guard<std::mutex> lockGuard(m_WorkingMemMutex);
1272  AllocateWorkingMemory(lockGuard);
1273 #else
1274  AllocateWorkingMemory();
1275 #endif
1276 
1277  ProfilingDynamicGuid workloadInferenceID(0);
1278  auto ExecuteQueue = [&timelineUtils, &workloadInferenceID, &inferenceGuid](WorkloadQueue& queue)
1279  {
1280  for (auto& workload : queue)
1281  {
1282  if(timelineUtils)
1283  {
1284  workloadInferenceID = timelineUtils->RecordWorkloadInferenceAndStartOfLifeEvent(workload->GetGuid(),
1285  inferenceGuid);
1286  }
1287  workload->Execute();
1288  if(timelineUtils)
1289  {
1290  timelineUtils->RecordEndOfLifeEvent(workloadInferenceID);
1291  }
1292  }
1293  };
1294 
1295  ExecuteQueue(m_InputQueue);
1296  ExecuteQueue(m_WorkloadQueue);
1297  ExecuteQueue(m_OutputQueue);
1298  }
1299  catch (const RuntimeException& error)
1300  {
1301  Fail(error);
1302  }
1303  catch (const std::runtime_error& error)
1304  {
1305  Fail(error);
1306  }
1307 
1308  return success;
1309 }
1310 
1311 void LoadedNetwork::EnqueueInput(const ConstTensor& inputTensor, ITensorHandle* inputTensorHandle)
1312 {
1313  if (m_NetworkProperties.m_ImportEnabled) // Try import the input tensor
1314  {
1315  MemorySourceFlags importFlags = inputTensorHandle->GetImportFlags();
1316  if (CheckFlag(importFlags, m_NetworkProperties.m_InputSource) )
1317  {
1318  std::unique_ptr<ITensorHandle> tensorHandle =
1319  std::make_unique<ConstPassthroughTensorHandle>(inputTensor.GetInfo(),
1320  inputTensor.GetMemoryArea());
1321  void* mem = tensorHandle->Map(false);
1322 
1323  if (inputTensorHandle->Import(mem, m_NetworkProperties.m_InputSource))
1324  {
1325  tensorHandle->Unmap();
1326  return;
1327  }
1328  tensorHandle->Unmap();
1329  throw MemoryImportException("EnqueueInput: Memory Import failed");
1330  }
1331  else
1332  {
1333  throw MemoryImportException("EnqueueInput: Memory Import failed, backend does not support Import");
1334  }
1335  }
1336  else
1337  {
1338  std::unique_ptr<ITensorHandle> tensorHandle =
1339  std::make_unique<ConstPassthroughTensorHandle>(inputTensor.GetInfo(), inputTensor.GetMemoryArea());
1340 
1341  auto copyFunc = [](void* dst, const void* src, size_t size)
1342  {
1343  memcpy(dst, src, size);
1344  };
1345 
1346  CopyTensorContentsGeneric(tensorHandle.get(), inputTensorHandle, copyFunc);
1347  }
1348 }
1349 
1350 // Note: We can only import the output pointer if all of the following hold true:
1351 // a) The imported pointer is aligned sufficiently
1352 // b) The tensor has zero padding
1353 // c) There is only one connection to the OutputSlot and it is to an OutputLayer.
1354 // d) The output pointer is allocated via malloc. (Other types will be supported in a later release)
1355 // e) m_IsExportEnabled must be set to true
1356 void LoadedNetwork::ImportOutputTensor(const Tensor& outputTensor, ITensorHandle* outputTensorHandle)
1357 {
1358  ARMNN_ASSERT_MSG(outputTensorHandle != nullptr, "Data should have been allocated.");
1359  MemorySourceFlags importFlags = outputTensorHandle->GetImportFlags();
1360  if (CheckFlag(importFlags, m_NetworkProperties.m_OutputSource))
1361  {
1362  std::unique_ptr<ITensorHandle> tensorHandle =
1363  std::make_unique<PassthroughTensorHandle>(outputTensor.GetInfo(),
1364  outputTensor.GetMemoryArea());
1365 
1366  void* mem = tensorHandle->Map(false);
1367  bool importOk = outputTensorHandle->Import(mem, m_NetworkProperties.m_OutputSource);
1368  tensorHandle->Unmap();
1369 
1370  if (!importOk)
1371  {
1372  throw MemoryExportException("ImportOutputTensor: Memory Export failed");
1373  }
1374  }
1375  else
1376  {
1377  throw MemoryExportException("ImportOutputTensor: Memory Export failed, attempting to export Input Layer");
1378  }
1379 
1380 }
1381 
1382 void CopyToOutputTensor(const Tensor& outputTensor, ITensorHandle* outputTensorHandle)
1383 {
1384  auto copyFunc = [](void* dst, const void* src, size_t size)
1385  {
1386  memcpy(dst, src, size);
1387  };
1388 
1389  std::unique_ptr<ITensorHandle> tensorHandle =
1390  std::make_unique<PassthroughTensorHandle>(outputTensor.GetInfo(),
1391  outputTensor.GetMemoryArea());
1392 
1393  CopyTensorContentsGeneric(outputTensorHandle, tensorHandle.get(), copyFunc);
1394 }
1395 
1396 
1397 const armnn::ConstTensor GetInputTensor(const LayerBindingId layerId, const InputTensors& inputTensors)
1398 {
1399  for (auto inputTensorPair : inputTensors)
1400  {
1401  LayerBindingId id = inputTensorPair.first;
1402  if (id == layerId)
1403  {
1404  return inputTensorPair.second;
1405  }
1406  }
1407  throw InvalidArgumentException("Input does not exist.");
1408 }
1409 
1410 const armnn::Tensor GetOutputTensor(const LayerBindingId layerId, const OutputTensors& outputTensors)
1411 {
1412  for (auto outputTensorPair : outputTensors)
1413  {
1414  LayerBindingId id = outputTensorPair.first;
1415  if (id == layerId)
1416  {
1417  return outputTensorPair.second;
1418  }
1419  }
1420  throw InvalidArgumentException("Output does not exist.");
1421 }
1422 
1423 std::vector<ImportedInputId> LoadedNetwork::ImportInputs(const InputTensors& inputTensors,
1424  MemorySource forceImportMemorySource)
1425 {
1426  if (!m_NetworkProperties.m_AsyncEnabled)
1427  {
1428  // Cannot import if import is not enabled and forceImportMemorySource is undefined
1429  if (forceImportMemorySource == MemorySource::Undefined)
1430  {
1431  throw MemoryImportException("ImportInputs: Memory Import failed, NetworkProperties.m_ImportEnabled");
1432  }
1433  // The number of pre imported tensors should not exceed the number of inputs.
1434  if (inputTensors.size() > m_OptimizedNetwork->pOptimizedNetworkImpl->GetGraph().GetNumInputs())
1435  {
1436  throw MemoryImportException("ImportInputs: The number of tensors provided exceeds the number of inputs.");
1437  }
1438 
1439  std::vector<ImportedInputId> importedInputs;
1440  Graph& graph = m_OptimizedNetwork->pOptimizedNetworkImpl->GetGraph().TopologicalSort();
1441  unsigned int inputIndex = 0;
1442  for (const BindableLayer* inputLayer : graph.GetInputLayers())
1443  {
1444  auto outputTensorHandle = m_PreImportedInputHandles[inputIndex].m_TensorHandle.get();
1445 
1446  if (!outputTensorHandle)
1447  {
1448  inputIndex++;
1449  continue;
1450  }
1451 
1452  auto layerBindingId = inputLayer->GetBindingId();
1453  auto it = std::find_if(inputTensors.begin(), inputTensors.end(), [=](const auto& inputTensor)
1454  {
1455  return inputTensor.first == layerBindingId;
1456  });
1457 
1458  if (it == inputTensors.end())
1459  {
1460  inputIndex++;
1461  continue;
1462  }
1463 
1464  const auto& inputTensor = *it;
1465  std::unique_ptr<ITensorHandle> passThroughTensorHandle =
1466  std::make_unique<ConstPassthroughTensorHandle>(inputTensor.second.GetInfo(),
1467  inputTensor.second.GetMemoryArea());
1468 
1469  try
1470  {
1471  if (outputTensorHandle->CanBeImported(passThroughTensorHandle->Map(), forceImportMemorySource)
1472  && (outputTensorHandle->Import(passThroughTensorHandle->Map(), forceImportMemorySource)))
1473  {
1474  importedInputs.push_back(inputIndex);
1475  }
1476  passThroughTensorHandle->Unmap();
1477  }
1478  catch(const MemoryImportException& exception)
1479  {
1480  ARMNN_LOG(error) << "An error occurred attempting to import input_"
1481  << inputIndex << " : " << exception.what();
1482  passThroughTensorHandle->Unmap();
1483  }
1484  inputIndex++;
1485  }
1486 
1487  return importedInputs;
1488  }
1489  else
1490  {
1491  // Import when the import of network properties is enabled
1492  std::vector<ImportedInputId> importedInputs;
1493  Graph& graph = m_OptimizedNetwork->pOptimizedNetworkImpl->GetGraph().TopologicalSort();
1494 
1495  for (auto inputTensor : inputTensors)
1496  {
1497  auto layerBindingId = inputTensor.first;
1498  auto it = std::find_if(graph.GetInputLayers().begin(), graph.GetInputLayers().end(), [=](auto* layer)
1499  {
1500  return layer->GetBindingId() == layerBindingId;
1501  });
1502 
1503  if (it == graph.GetInputLayers().end())
1504  {
1505  throw MemoryImportException(fmt::format(
1506  "ImportInputs: Memory Import failed, unknown LayerBindingId: {}", layerBindingId));
1507  }
1508 
1509  const Layer* layer = *it;
1510  if (layer->GetType() != LayerType::Input)
1511  {
1512  throw InvalidArgumentException("ImportInputs: given layer not an InputLayer");
1513  }
1514 
1515  auto& backend = m_Backends.at(layer->GetBackendId());
1516  if (!HasCapability(BackendOptions::BackendOption{"PreImportIOTensors", true}, backend->GetCapabilities()))
1517  {
1518  std::string er = backend->GetId();
1519  er += " does not have PreImportIOTensors capability";
1520  throw BackendCapabilityException(er);
1521  }
1522 
1523  const OutputSlot& outputSlot = layer->GetOutputSlots()[0];
1524 
1526  const TensorInfo& tensorInfo = outputSlot.GetTensorInfo();
1527 
1528  ITensorHandleFactory* handleFactory = m_TensorHandleFactoryRegistry.GetFactory(factoryId);
1529  ARMNN_ASSERT(handleFactory);
1530 
1531  ImportedTensorHandlePin importedTensorHandlePin{layerBindingId,
1532  handleFactory->CreateTensorHandle(tensorInfo, false)};
1533 
1534  ITensorHandle* tensorHandle = importedTensorHandlePin.m_TensorHandle.get();
1535 
1536  if (!CheckFlag(tensorHandle->GetImportFlags(), forceImportMemorySource))
1537  {
1538  throw MemoryImportException(
1539  fmt::format("ImportInputs: Memory Import failed, backend: "
1540  "{} does not support importing from source {}"
1541  , factoryId, m_NetworkProperties.m_InputSource));
1542  }
1543 
1544  std::unique_ptr<ITensorHandle> passThroughTensorHandle =
1545  std::make_unique<ConstPassthroughTensorHandle>(inputTensor.second.GetInfo(),
1546  inputTensor.second.GetMemoryArea());
1547 
1548  if (tensorHandle->Import(passThroughTensorHandle->Map(), forceImportMemorySource))
1549  {
1550  importedInputs.push_back(m_CurImportedInputId++);
1551  passThroughTensorHandle->Unmap();
1552  }
1553  else
1554  {
1555  passThroughTensorHandle->Unmap();
1556  throw MemoryImportException("ImportInputs: Memory Import failed");
1557  }
1558 
1559  m_PreImportedInputHandles.push_back(std::move(importedTensorHandlePin));
1560  }
1561  return importedInputs;
1562  }
1563 }
1564 
1565 std::vector<ImportedOutputId> LoadedNetwork::ImportOutputs(const OutputTensors& outputTensors,
1566  MemorySource forceImportMemorySource)
1567 {
1568  if (!m_NetworkProperties.m_AsyncEnabled)
1569  {
1570  // Cannot import if import is not enabled and forceImportMemorySource is undefined
1571  if (forceImportMemorySource == MemorySource::Undefined)
1572  {
1573  throw MemoryImportException("ImportOutputs: Memory Import failed, NetworkProperties.m_ImportEnabled");
1574  }
1575  // If forceImportMemorySource is defined, try import if memory is aligned
1576  if (outputTensors.size() != m_OptimizedNetwork->pOptimizedNetworkImpl->GetGraph().GetNumOutputs())
1577  {
1578  throw MemoryImportException("ImportOutputs: Force Import failed, incorrect number of tensors");
1579  }
1580  std::vector<ImportedOutputId> importedOutputs;
1581  Graph& graph = m_OptimizedNetwork->pOptimizedNetworkImpl->GetGraph().TopologicalSort();
1582 
1583  unsigned int outputIndex = 0;
1584  for (const BindableLayer* const outputLayer : graph.GetOutputLayers())
1585  {
1586  auto inputTensorHandle = m_PreImportedOutputHandles[outputIndex].m_TensorHandle.get();
1587  if (!inputTensorHandle)
1588  {
1589  outputIndex++;
1590  continue;
1591  }
1592 
1593  auto layerBindingId = outputLayer->GetBindingId();
1594  auto it = std::find_if(outputTensors.begin(), outputTensors.end(), [=] (const auto& outputTensor)
1595  {
1596  return outputTensor.first == layerBindingId;
1597  });
1598 
1599  if (it == outputTensors.end())
1600  {
1601  outputIndex++;
1602  continue;
1603  }
1604 
1605  const auto outputTensor = *it;
1606  try
1607  {
1608  // Check if the output memory can be imported
1609  if (inputTensorHandle->CanBeImported(outputTensor.second.GetMemoryArea(), forceImportMemorySource)
1610  && inputTensorHandle->Import(outputTensor.second.GetMemoryArea(), forceImportMemorySource))
1611  {
1612  importedOutputs.push_back(outputIndex);
1613  }
1614  }
1615  catch(const MemoryImportException& exception)
1616  {
1617  ARMNN_LOG(error) << "An error occurred attempting to import output_"
1618  << outputIndex << " : " << exception.what();
1619  }
1620  outputIndex++;
1621  }
1622  return importedOutputs;
1623  }
1624 
1625  std::vector<ImportedOutputId> importedOutputs;
1626  Graph& graph = m_OptimizedNetwork->pOptimizedNetworkImpl->GetGraph().TopologicalSort();
1627 
1628  for (const auto& outputTensor : outputTensors)
1629  {
1630  auto layerBindingId = outputTensor.first;
1631  auto it = std::find_if(graph.GetOutputLayers().begin(), graph.GetOutputLayers().end(), [=](auto* layer)
1632  {
1633  return layer->GetBindingId() == layerBindingId;
1634  });
1635 
1636  if (it == graph.GetOutputLayers().end())
1637  {
1638  throw MemoryImportException(fmt::format("ImportOutputs: Memory Import failed, unknown LayerBindingId: {}",
1639  layerBindingId));
1640  }
1641 
1642  const Layer* layer = *it;
1643  if (layer->GetType() != LayerType::Output)
1644  {
1645  throw InvalidArgumentException("ImportOutputs: given layer not an OutputLayer");
1646  }
1647 
1648  auto& backend = m_Backends.at(layer->GetBackendId());
1649  if (!HasCapability(BackendOptions::BackendOption{"PreImportIOTensors", true}, backend->GetCapabilities()))
1650  {
1651  std::string er = backend->GetId();
1652  er += " does not have PreImportIOTensors capability";
1653  throw BackendCapabilityException(er);
1654  }
1655 
1656  const InputSlot& inputSlot = layer->GetInputSlots()[0];
1658  const TensorInfo& tensorInfo = inputSlot.GetConnectedOutputSlot()->GetTensorInfo();
1659 
1660  ITensorHandleFactory* handleFactory = m_TensorHandleFactoryRegistry.GetFactory(factoryId);
1661  ARMNN_ASSERT(handleFactory);
1662 
1663  ImportedTensorHandlePin importedTensorHandlePin{layerBindingId,
1664  handleFactory->CreateTensorHandle(tensorInfo, false)};
1665 
1666  ITensorHandle* tensorHandle = importedTensorHandlePin.m_TensorHandle.get();
1667 
1668  if (!CheckFlag(tensorHandle->GetImportFlags(), forceImportMemorySource))
1669  {
1670  throw MemoryImportException(fmt::format("ImportInputs: Memory Import failed, backend: "
1671  "{} does not support importing from source {}"
1672  , factoryId, forceImportMemorySource));
1673  }
1674 
1675  if (tensorHandle->Import(outputTensor.second.GetMemoryArea(), forceImportMemorySource))
1676  {
1677  importedOutputs.push_back(m_CurImportedOutputId++);
1678  }
1679  else
1680  {
1681  throw MemoryImportException("ImportInputs: Memory Import failed");
1682  }
1683 
1684  m_PreImportedOutputHandles.push_back(std::move(importedTensorHandlePin));
1685  }
1686 
1687  return importedOutputs;
1688 }
1689 
1690 void LoadedNetwork::ClearImportedInputs(const std::vector<ImportedInputId> inputIds)
1691 {
1692  for (auto id : inputIds)
1693  {
1694  if (id > m_PreImportedInputHandles.size())
1695  {
1696  throw InvalidArgumentException(fmt::format("ClearImportedInputs::Unknown ImportedInputId: {}", id));
1697  }
1698 
1699  auto& importedTensorHandle = m_PreImportedInputHandles[id].m_TensorHandle;
1700  if (!importedTensorHandle)
1701  {
1703  fmt::format("ClearImportedInputs::ImportedInput with id: {} has already been deleted", id));
1704  }
1705  // Call Unimport then destroy the tensorHandle
1706  importedTensorHandle->Unimport();
1707  importedTensorHandle = {};
1708  }
1709 }
1710 
1711 void LoadedNetwork::ClearImportedOutputs(const std::vector<ImportedOutputId> outputIds)
1712 {
1713  for (auto id : outputIds)
1714  {
1715  if (id > m_PreImportedOutputHandles.size())
1716  {
1717  throw InvalidArgumentException(fmt::format("ClearImportedOutputs::Unknown ImportedOutputId: {}", id));
1718  }
1719 
1720  auto& importedTensorHandle = m_PreImportedOutputHandles[id].m_TensorHandle;
1721  if (!importedTensorHandle)
1722  {
1724  fmt::format("ClearImportedOutputs::ImportedOutput with id: {} has already been deleted", id));
1725  }
1726  // Call Unimport then destroy the tensorHandle
1727  importedTensorHandle->Unimport();
1728  importedTensorHandle = {};
1729  }
1730 }
1731 
1733  const OutputTensors& outputTensors,
1734  IWorkingMemHandle& iWorkingMemHandle,
1735  std::vector<ImportedInputId> preImportedInputs,
1736  std::vector<ImportedOutputId> preImportedOutputs)
1737 {
1738  const Graph& graph = m_OptimizedNetwork->pOptimizedNetworkImpl->GetGraph();
1739 
1740  if (inputTensors.size() + preImportedInputs.size() != graph.GetNumInputs())
1741  {
1742  if (preImportedInputs.empty())
1743  {
1744  throw InvalidArgumentException("LoadedNetwork::Execute: Number of inputs provided does not match network.");
1745  }
1746  else
1747  {
1748  throw InvalidArgumentException("LoadedNetwork::Execute: "
1749  "Number of inputs + preImportedInputs provided does not match network.");
1750  }
1751  }
1752 
1753  if (outputTensors.size() + preImportedOutputs.size() != graph.GetNumOutputs())
1754  {
1755  if (preImportedOutputs.empty())
1756  {
1757  throw InvalidArgumentException("LoadedNetwork::Execute: "
1758  "Number of outputs provided does not match network.");
1759  }
1760  else
1761  {
1762  throw InvalidArgumentException("LoadedNetwork::Execute: "
1763  "Number of outputs + preImportedOutputs provided does not match network.");
1764  }
1765  }
1766 
1767  WorkingMemHandle& workingMemHandle = dynamic_cast<WorkingMemHandle&>(iWorkingMemHandle);
1768  // Collect all the given LayerBindingIds and check them for duplicates and unknowns.
1769  std::vector<LayerBindingId>& bindingIds = workingMemHandle.GetBindingIdVector();
1770  unsigned int index = 0;
1771  for (auto pair : inputTensors)
1772  {
1773  bindingIds[index++] = pair.first;
1774  }
1775  for (ImportedInputId id : preImportedInputs)
1776  {
1777  bindingIds[index++] = ValidateImportedInputID(id);
1778  }
1779  for (auto pair : outputTensors)
1780  {
1781  bindingIds[index++] = pair.first;
1782  }
1783  for (ImportedOutputId id : preImportedOutputs)
1784  {
1785  bindingIds[index++] = ValidateImportedOutputID(id);
1786  }
1787 
1788  workingMemHandle.ValidateBindingIds();
1789 
1790  auto resetMemHandle = [&]()
1791  {
1792  for (ImportedInputId id: preImportedInputs)
1793  {
1794  const LayerBindingId layerBindingId = m_PreImportedInputHandles[id].m_LayerBindingId;
1795 
1796  auto inputHandle = workingMemHandle.GetInputHandle(layerBindingId);
1797  auto inputConnections = workingMemHandle.GetInputConnections(layerBindingId);
1798  for (auto it : inputConnections)
1799  {
1800  *it = inputHandle;
1801  }
1802  }
1803 
1804  for (ImportedOutputId id: preImportedOutputs)
1805  {
1806  const LayerBindingId layerBindingId = m_PreImportedOutputHandles[id].m_LayerBindingId;
1807 
1808  auto outputHandle = workingMemHandle.GetOutputHandle(layerBindingId);
1809  auto outputConnections = workingMemHandle.GetOutputConnection(layerBindingId);
1810 
1811  for (auto it : outputConnections)
1812  {
1813  *it = outputHandle;
1814  }
1815  }
1816  };
1817 
1818  std::unique_ptr<TimelineUtilityMethods> timelineUtils =
1819  TimelineUtilityMethods::GetTimelineUtils(*m_ProfilingService);
1820  ProfilingGuid inferenceGuid = m_ProfilingService->GetNextGuid();
1821  if (timelineUtils)
1822  {
1823  // Add inference timeline trace if profiling is enabled.
1824  ProfilingGuid networkGuid = m_OptimizedNetwork->GetGuid();
1825  timelineUtils->CreateTypedEntity(inferenceGuid,LabelsAndEventClasses::INFERENCE_GUID);
1826  timelineUtils->CreateRelationship(ProfilingRelationshipType::RetentionLink,
1827  networkGuid,
1828  inferenceGuid,
1829  LabelsAndEventClasses::EXECUTION_OF_GUID);
1830  timelineUtils->RecordEvent(inferenceGuid,LabelsAndEventClasses::ARMNN_PROFILING_SOL_EVENT_CLASS);
1831  }
1832 
1833  bool executionSucceeded = true;
1834 
1835  if (timelineUtils)
1836  {
1837  // Add end of life of the inference timeline if profiling is enabled.
1838  timelineUtils->RecordEvent(inferenceGuid,LabelsAndEventClasses::ARMNN_PROFILING_EOL_EVENT_CLASS);
1839  timelineUtils->Commit();
1840  }
1841 
1842  if (!workingMemHandle.IsAllocated())
1843  {
1844  workingMemHandle.Allocate();
1845  }
1846 
1847  {
1849  for (auto pair : inputTensors)
1850  {
1851  EnqueueInput(pair.second, workingMemHandle.GetInputHandle(pair.first));
1852  }
1853 
1854  // Swap in the pre-imported inputs if any
1855  for (ImportedInputId id : preImportedInputs)
1856  {
1857  const ImportedTensorHandlePin& importedInputPin = m_PreImportedInputHandles[id];
1858  const LayerBindingId layerBindingId = m_PreImportedInputHandles[id].m_LayerBindingId;
1859  const auto& preimportedHandle = importedInputPin.m_TensorHandle;
1860 
1861  auto inputConnections = workingMemHandle.GetInputConnections(layerBindingId);
1862  for (auto it : inputConnections)
1863  {
1864  *it = preimportedHandle.get();
1865  }
1866  }
1867  }
1868  {
1870  if (m_NetworkProperties.m_ExportEnabled)
1871  {
1872  for (auto pair: outputTensors)
1873  {
1874  ImportOutputTensor(pair.second, workingMemHandle.GetOutputHandle(pair.first));
1875  }
1876  }
1877 
1878  for (ImportedOutputId id : preImportedOutputs)
1879  {
1880  const ImportedTensorHandlePin& importedOutputPin = m_PreImportedOutputHandles[id];
1881  const LayerBindingId layerBindingId = m_PreImportedOutputHandles[id].m_LayerBindingId;
1882  const auto& preimportedHandle = importedOutputPin.m_TensorHandle;
1883 
1884  auto outputConnections = workingMemHandle.GetOutputConnection(layerBindingId);
1885  for (auto it : outputConnections)
1886  {
1887  *it = preimportedHandle.get();
1888  }
1889  }
1890  }
1891 
1892  auto Fail = [&](const std::exception& error)
1893  {
1894  ARMNN_LOG(error) << "An error occurred attempting to execute a workload: " << error.what();
1895  executionSucceeded = false;
1896  };
1897  ProfilingDynamicGuid workloadInferenceID(0);
1898 
1899  try
1900  {
1901  for (unsigned int i = 0; i < m_WorkloadQueue.size(); ++i)
1902  {
1903  auto& workload = m_WorkloadQueue[i];
1904  if (timelineUtils)
1905  {
1906  workloadInferenceID = timelineUtils->RecordWorkloadInferenceAndStartOfLifeEvent(workload->GetGuid(),
1907  inferenceGuid);
1908  }
1909 
1910  workload->ExecuteAsync(workingMemHandle.GetExecutionDataAt(i).second);
1911 
1912  if (timelineUtils)
1913  {
1914  timelineUtils->RecordEndOfLifeEvent(workloadInferenceID);
1915  }
1916  }
1917  }
1918  catch (const RuntimeException& error)
1919  {
1920  resetMemHandle();
1921  Fail(error);
1922  }
1923  catch (const std::runtime_error& error)
1924  {
1925  resetMemHandle();
1926  Fail(error);
1927  }
1928  catch (...)
1929  {
1930  resetMemHandle();
1931  throw;
1932  }
1933 
1934  if (!m_NetworkProperties.m_ExportEnabled)
1935  {
1936  for (auto pair: outputTensors)
1937  {
1938  CopyToOutputTensor(pair.second, workingMemHandle.GetOutputHandle(pair.first));
1939  }
1940  }
1941  else
1942  {
1943  ARMNN_SCOPED_PROFILING_EVENT(Compute::Undefined, "SyncMemGeneric_Execute");
1944  workingMemHandle.MemSyncOutputs();
1945  }
1946 
1947  resetMemHandle();
1948 
1949  return executionSucceeded ? Status::Success : Status::Failure;
1950 }
1951 
1952 /// Create a new unique WorkingMemHandle object. Create multiple handles if you wish to have
1953 /// overlapped Execution by calling this function from different threads.
1954 std::unique_ptr<IWorkingMemHandle> LoadedNetwork::CreateWorkingMemHandle(NetworkId networkId)
1955 {
1956  Graph& order = m_OptimizedNetwork->pOptimizedNetworkImpl->GetGraph();
1957 
1958  // Tensors that will need to be allocated internally within armnn
1959  std::vector<std::unique_ptr<ITensorHandle>> managedTensorHandles;
1960  // Tensors that will be allocated externally by the user
1961  std::vector<std::unique_ptr<ITensorHandle>> unmanagedTensorHandles;
1962 
1963  std::vector<WorkingMemDescriptor> workingMemDescriptors;
1964  std::vector<std::pair<BackendId, ExecutionData>> executionDataVec;
1965 
1966  auto GetTensorHandle = [&](Layer* layer, const OutputSlot& outputSlot)
1967  {
1968  ITensorHandleFactory::FactoryId factoryId = outputSlot.GetTensorHandleFactoryId();
1969  const TensorInfo& tensorInfo = outputSlot.GetTensorInfo();
1970 
1971  if (factoryId == ITensorHandleFactory::LegacyFactoryId)
1972  {
1973  BackendId id = layer->GetBackendId();
1975  return m_WorkloadFactories.at(id)->CreateTensorHandle(tensorInfo, false);
1977  }
1978  else
1979  {
1980  ITensorHandleFactory* handleFactory = m_TensorHandleFactoryRegistry.GetFactory(factoryId);
1981  ARMNN_ASSERT(handleFactory);
1982  return handleFactory->CreateTensorHandle(tensorInfo, false);
1983  }
1984  };
1985 
1986  struct HandleInfo
1987  {
1988  ITensorHandle* m_TensorHandle;
1989 
1990  bool m_IsInputLayerHandle = false;
1991  bool m_IsOutputLayerHandle = false;
1992 
1993  WorkingMemHandle::InputMemDescriptorCoords m_InputMemDescriptorCoords;
1994  WorkingMemHandle::OutputMemDescriptorCoords m_OutputMemDescriptorCoords;
1995  };
1996 
1997  std::unordered_map<const OutputSlot*, HandleInfo> outputToHandleInfoMap;
1998 
1999  unsigned int layerIndex = 0;
2000  for (auto&& layer : order)
2001  {
2002  // Constant layers execution and management is handled during loaded network construction
2003  if (layer->GetType() == LayerType::Constant)
2004  {
2005  continue;
2006  }
2007 
2008  WorkingMemDescriptor workingMemDescriptor;
2009 
2010  bool isMemoryManaged = true;
2011  bool isInputLayer = false;
2012  bool isOutputLayer = false;
2013  bool isConnectedToOutputLayer = false;
2014 
2015  if (layer->GetType() == LayerType::Input || layer->GetType() == LayerType::MemImport)
2016  {
2017  // Input layers/workloads will not be executed so the descriptor is not added to workingMemDescriptors
2018  // However we will still need to manage the tensorHandle
2019  isInputLayer = true;
2020  isMemoryManaged = !m_NetworkProperties.m_ImportEnabled;
2021  }
2022  else if (layer->GetType() == LayerType::Output)
2023  {
2024  isOutputLayer = true;
2025  }
2026 
2027  unsigned int slotIndex = 0;
2028  // Create a tensor handle for each output slot of a layer
2029  // Once we create it, we start managing its lifetime
2030  for (auto& slot : layer->GetOutputSlots())
2031  {
2032  for (unsigned int i = 0; i < slot.GetNumConnections(); ++i)
2033  {
2034  if ((slot.GetConnection(i)->GetOwningLayer().GetType() == LayerType::Output))
2035  {
2036  if (!isConnectedToOutputLayer)
2037  {
2038  isConnectedToOutputLayer = true;
2039  // If Export is enabled disable memory management, so we can export, otherwise we do a copy
2040  isMemoryManaged = !m_NetworkProperties.m_ExportEnabled;
2041  }
2042  else
2043  {
2044  // Importing in this case would likely cause unexpected behaviour, so we disallow it.
2045  ARMNN_LOG(warning) <<
2046  fmt::format("Layer name: '{0}' guid: '{1}' has two or more OutputLayers connected to it. "
2047  "This will prevent importing on the connected OutputLayers.",
2048  layer->GetName(), layer->GetGuid());
2049  isMemoryManaged = true;
2050  }
2051  }
2052  }
2053 
2054  ITensorHandle* tensorHandle;
2055  if (isMemoryManaged)
2056  {
2057  managedTensorHandles.emplace_back(GetTensorHandle(layer, slot));
2058  tensorHandle = managedTensorHandles.back().get();
2059  }
2060  else
2061  {
2062  unmanagedTensorHandles.emplace_back(GetTensorHandle(layer, slot));
2063  tensorHandle = unmanagedTensorHandles.back().get();
2064  }
2065 
2066  workingMemDescriptor.m_Outputs.push_back(tensorHandle);
2067 
2068  HandleInfo& handleInfo = outputToHandleInfoMap[&slot];
2069  handleInfo.m_TensorHandle = tensorHandle;
2070 
2071  // Store the coordinates of the current layer's OutputSlot that is connected to the OutputLayer
2072  if (isConnectedToOutputLayer)
2073  {
2074  handleInfo.m_IsOutputLayerHandle = true;
2075  handleInfo.m_OutputMemDescriptorCoords.m_OutputSlotCoords = {layerIndex, slotIndex};
2076  }
2077  // Store the LayerBindingId of the InputLayer
2078  if (isInputLayer)
2079  {
2080  handleInfo.m_IsInputLayerHandle = true;
2081  LayerBindingId bindingId = static_cast<BindableLayer*>(layer)->GetBindingId();
2082  handleInfo.m_InputMemDescriptorCoords.m_LayerBindingId = bindingId;
2083  }
2084  slotIndex++;
2085  }
2086  // Loop through the input slots in the same layer and decrement the reference counter associated
2087  // to each tensor handle we encounter.
2088  // Once it reaches zero, the lifetime of the tensor handle has ended, and we mark its memory as available
2089  // so that the next tensor handle with a non overlapping lifetime can share its memory.
2090  for (auto& slot : layer->GetInputSlots())
2091  {
2092  ARMNN_ASSERT(slot.GetConnection());
2093  auto outputSlot = slot.GetConnectedOutputSlot();
2094  auto key = outputSlot->GetOwningLayer().GetGuid();
2095 
2096  // Constant layers execution and management is handled during loaded network construction
2097  auto found = m_ConstantTensorHandles.find(key);
2098  if (found != m_ConstantTensorHandles.end())
2099  {
2100  ITensorHandle* tensorHandle = found->second;
2101  workingMemDescriptor.m_Inputs.push_back(tensorHandle);
2102 
2103  // Odd case where a constant layer is connected to an output layer
2104  // We will need to create a HandleInfo to track it
2105  if (isOutputLayer)
2106  {
2107  LayerBindingId bindingId = static_cast<BindableLayer*>(layer)->GetBindingId();
2108 
2109  HandleInfo& handleInfo = outputToHandleInfoMap[outputSlot];
2110  handleInfo.m_TensorHandle = tensorHandle;
2111  handleInfo.m_IsOutputLayerHandle = true;
2112  handleInfo.m_OutputMemDescriptorCoords.m_LayerBindingIds.push_back(bindingId);
2113  handleInfo.m_OutputMemDescriptorCoords.m_InputSlotCoords.push_back({layerIndex, 0});
2114  }
2115  continue;
2116  }
2117 
2118  HandleInfo& handleInfo = outputToHandleInfoMap.at(outputSlot);
2119 
2120  ITensorHandle* inputTensorHandle = handleInfo.m_TensorHandle;
2121  workingMemDescriptor.m_Inputs.push_back(inputTensorHandle);
2122 
2123  // Store the LayerBindingId of the OutputLayer
2124  if (isOutputLayer)
2125  {
2126  LayerBindingId bindingId = static_cast<BindableLayer*>(layer)->GetBindingId();
2127  handleInfo.m_OutputMemDescriptorCoords.m_LayerBindingIds.push_back(bindingId);
2128  handleInfo.m_OutputMemDescriptorCoords.m_InputSlotCoords.push_back({layerIndex, 0});
2129  }
2130  // In this case the layer is not an Output Layer but shares its input tensorhandle with an OutputLayer
2131  // It will need to be updated as well, if we swap out the tensorhandle
2132  else if (handleInfo.m_IsOutputLayerHandle)
2133  {
2134  handleInfo.m_OutputMemDescriptorCoords.m_InputSlotCoords.push_back({layerIndex, slot.GetSlotIndex()});
2135  }
2136 
2137  // Store the coordinates of the InputSlots connected to the InputLayer
2138  // There can be more than one InputSlot connected to an InputLayer, so we use a vector
2139  if (handleInfo.m_IsInputLayerHandle)
2140  {
2141  std::pair<LayerGuid, unsigned int> connectionLocation{layerIndex, slot.GetSlotIndex()};
2142  handleInfo.m_InputMemDescriptorCoords.m_InputSlotCoords.emplace_back(connectionLocation);
2143  }
2144  }
2145 
2146  // Input/Output layers/workloads will not be executed, so the descriptor is not added to workingMemDescriptors
2147  // However we will still need to manage the tensorHandle
2148  if (!isInputLayer)
2149  {
2150  // Simply auto initialise ExecutionData here, so it's added only for the layer that require execution.
2151  // The memory and data will be allocated/assigned for the void* in WorkingMemHandle::Allocate.
2152  std::pair<BackendId, ExecutionData> dataPair;
2153  dataPair.first = layer->GetBackendId();
2154 
2155  executionDataVec.push_back(dataPair);
2156  workingMemDescriptors.push_back(workingMemDescriptor);
2157 
2158  layerIndex++;
2159  }
2160  }
2161 
2162  std::vector<std::pair<std::shared_ptr<TensorMemory>, MemorySource>> tensorMemory;
2163 
2164  auto externalMemoryManager = CreateExternalMemoryManger(tensorMemory);
2165 
2166  // Sort m_TensorMemory, so it's order matches the outputSlot order
2167  std::sort(tensorMemory.begin(), tensorMemory.end(),
2168  [](const std::pair<std::shared_ptr<TensorMemory>, MemorySource>& lhs,
2169  const std::pair<std::shared_ptr<TensorMemory>, MemorySource>& rhs)
2170  {
2171  return lhs.first->m_OutputSlotId < rhs.first->m_OutputSlotId;
2172  });
2173 
2174  std::vector<WorkingMemHandle::InputMemDescriptorCoords> inputConnectionsInfo;
2175  std::vector<WorkingMemHandle::OutputMemDescriptorCoords> outputConnectionsInfo;
2176 
2177  for (const auto& handleInfo: outputToHandleInfoMap)
2178  {
2179  if (handleInfo.second.m_IsOutputLayerHandle)
2180  {
2181  outputConnectionsInfo.emplace_back(handleInfo.second.m_OutputMemDescriptorCoords);
2182  }
2183 
2184  if (handleInfo.second.m_IsInputLayerHandle)
2185  {
2186  inputConnectionsInfo.emplace_back(handleInfo.second.m_InputMemDescriptorCoords);
2187  }
2188  }
2189 
2190  return std::make_unique<WorkingMemHandle>(networkId,
2191  inputConnectionsInfo,
2192  outputConnectionsInfo,
2193  workingMemDescriptors,
2194  std::move(externalMemoryManager),
2195  std::move(tensorMemory),
2196  std::move(managedTensorHandles),
2197  std::move(unmanagedTensorHandles),
2198  executionDataVec,
2199  &m_Backends);
2200 }
2201 
2203 {
2204  for (auto&& workloadPtr: m_WorkloadQueue)
2205  {
2206  workloadPtr.get()->RegisterDebugCallback(func);
2207  }
2208 }
2209 
2210 
2211 void LoadedNetwork::CreateMemoryProfileAsync()
2212 {
2213  struct PartialBlock
2214  {
2215  unsigned int m_StartOfLife;
2216  unsigned int m_Lifetime;
2217 
2218  size_t m_MemSize;
2219  unsigned int m_Index;
2220 
2221  BackendId m_BackendId;
2222  };
2223 
2224  auto align = [](size_t numToAlign)
2225  {
2226  const size_t alignment = sizeof(float);
2227  return ((numToAlign + alignment - 1) / alignment) * alignment;
2228  };
2229 
2230  std::unordered_map<const OutputSlot*, PartialBlock> memBlockTrackerMap;
2231 
2232  const bool inputImportingEnabled = m_NetworkProperties.m_InputSource != MemorySource::Undefined;
2233  const bool outputImportingEnabled = m_NetworkProperties.m_OutputSource != MemorySource::Undefined;
2234 
2235  unsigned int timestep = 0;
2236  unsigned int outputIndex = 0;
2237  Graph& order = m_OptimizedNetwork->pOptimizedNetworkImpl->GetGraph().TopologicalSort();
2238 
2239  for (auto&& layer : order)
2240  {
2241  const LayerType& layerType = layer->GetType();
2242  // Don't manage memory if importing.
2243  if (layerType == LayerType::Input && inputImportingEnabled)
2244  {
2245  continue;
2246  }
2247  // Don't manage memory if importing.
2248  if (layerType == LayerType::Output && outputImportingEnabled
2249  && layer->GetInputSlot(0).GetConnectedOutputSlot()->GetNumConnections() == 1)
2250  {
2251  continue;
2252  }
2253  // Because Constant Layer memory can not be shared, the memory must persist for the lifetime of execution,
2254  // management is done separately.
2255  if (layerType == LayerType::Constant)
2256  {
2257  continue;
2258  }
2259 
2260  BackendId backendId = layer->GetBackendId();
2261  for (auto& outputSlot : layer->GetOutputSlots())
2262  {
2263  if (!m_SupportsExternallyManagedMemory[backendId])
2264  {
2265  continue;
2266  }
2267 
2268  PartialBlock partialBlock;
2269 
2270  partialBlock.m_StartOfLife = timestep;
2271 
2272  size_t alignedSize = align(outputSlot.GetOutputHandler().GetTensorInfo().GetNumBytes());
2273  partialBlock.m_MemSize = alignedSize;
2274  partialBlock.m_Index = outputIndex++;
2275  partialBlock.m_Lifetime = outputSlot.GetNumConnections();
2276  partialBlock.m_BackendId = backendId;
2277 
2278  if (partialBlock.m_Lifetime == 0)
2279  {
2280  m_MemBlockMap[partialBlock.m_BackendId].emplace_back(partialBlock.m_StartOfLife,
2281  partialBlock.m_StartOfLife,
2282  partialBlock.m_MemSize,
2283  0,
2284  partialBlock.m_Index);
2285  }
2286  else
2287  {
2288  memBlockTrackerMap[&outputSlot] = partialBlock;
2289  }
2290  }
2291 
2292  for (auto& inputSlot : layer->GetInputSlots())
2293  {
2294  const Layer& connectedInputLayer = inputSlot.GetConnectedOutputSlot()->GetOwningLayer();
2295  const LayerType& owningLayerType = connectedInputLayer.GetType();
2296 
2297  if (owningLayerType == LayerType::Constant)
2298  {
2299  continue;
2300  }
2301  if (inputImportingEnabled && owningLayerType == LayerType::Input)
2302  {
2303  continue;
2304  }
2305 
2306  auto outputSlot = inputSlot.GetConnectedOutputSlot();
2307 
2308  PartialBlock& partialBlock = memBlockTrackerMap.at(outputSlot);
2309 
2310  auto& lifetime = partialBlock.m_Lifetime;
2311  --lifetime;
2312 
2313  if (lifetime == 0)
2314  {
2315  m_MemBlockMap[partialBlock.m_BackendId].emplace_back(partialBlock.m_StartOfLife,
2316  timestep,
2317  partialBlock.m_MemSize,
2318  0,
2319  partialBlock.m_Index);
2320  }
2321  }
2322  ++timestep;
2323  }
2324 }
2325 
2326 void LoadedNetwork::CreateMemoryProfile()
2327 {
2328  // Finds the first TensorHandle ancestor of a SubTensorHandle. If the ITensorHandle provided
2329  // is a TensorHandle, the function just returns it
2330  auto TraceSubTensorHandleAncestry = [](ITensorHandle* const subTensorHandle)
2331  {
2332  ITensorHandle* ancestor = subTensorHandle;
2333  while (ancestor && ancestor->GetParent())
2334  {
2335  ancestor = ancestor->GetParent();
2336  }
2337  return ancestor;
2338  };
2339 
2340  struct PartialBlock
2341  {
2342  unsigned int m_StartOfLife;
2343  unsigned int m_Lifetime;
2344 
2345  size_t m_MemSize;
2346  unsigned int m_Index;
2347 
2348  BackendId m_BackendId;
2349  };
2350 
2351  auto align = [](size_t numToAlign)
2352  {
2353  const size_t alignment = sizeof(float);
2354  return ((numToAlign + alignment - 1) / alignment) * alignment;
2355  };
2356 
2357  std::unordered_map<ITensorHandle*, PartialBlock> memBlockTrackerMap;
2358 
2359  const bool inputImportingEnabled = m_NetworkProperties.m_InputSource != MemorySource::Undefined;
2360  const bool outputImportingEnabled = m_NetworkProperties.m_OutputSource != MemorySource::Undefined;
2361 
2362  unsigned int timestep = 0;
2363  unsigned int outputIndex = 0;
2364  Graph& order = m_OptimizedNetwork->pOptimizedNetworkImpl->GetGraph().TopologicalSort();
2365 
2366  for (auto&& layer : order)
2367  {
2368  const LayerType& layerType = layer->GetType();
2369  // Don't manage memory if importing.
2370  if (layerType == LayerType::Input && inputImportingEnabled)
2371  {
2372  continue;
2373  }
2374  // Don't manage memory if importing.
2375  if (layerType == LayerType::Output && outputImportingEnabled
2376  && layer->GetInputSlot(0).GetConnectedOutputSlot()->GetNumConnections() == 1)
2377  {
2378  continue;
2379  }
2380  // Because Constant Layer memory can not be shared, the memory must persist for the lifetime of execution,
2381  // management is done separately.
2382  if (layerType == LayerType::Constant)
2383  {
2384  continue;
2385  }
2386 
2387  BackendId backendId = layer->GetBackendId();
2388  for (auto& outputSlot : layer->GetOutputSlots())
2389  {
2390  if (!m_SupportsExternallyManagedMemory[backendId])
2391  {
2392  continue;
2393  }
2394 
2395  ITensorHandle* tensorHandle = outputSlot.GetOutputHandler().GetData();
2396  tensorHandle = TraceSubTensorHandleAncestry(tensorHandle);
2397 
2398  if (memBlockTrackerMap.find(tensorHandle) == memBlockTrackerMap.end())
2399  {
2400  PartialBlock partialBlock;
2401 
2402  partialBlock.m_StartOfLife = timestep;
2403 
2404  size_t alignedSize = align(outputSlot.GetOutputHandler().GetTensorInfo().GetNumBytes());
2405  partialBlock.m_MemSize = alignedSize;
2406  partialBlock.m_Index = outputIndex++;
2407  partialBlock.m_Lifetime = outputSlot.GetNumConnections();
2408  partialBlock.m_BackendId = backendId;
2409 
2410  if (partialBlock.m_Lifetime == 0)
2411  {
2412  m_MemBlockMap[partialBlock.m_BackendId].emplace_back(partialBlock.m_StartOfLife,
2413  partialBlock.m_StartOfLife,
2414  partialBlock.m_MemSize,
2415  0,
2416  partialBlock.m_Index);
2417  }
2418  else
2419  {
2420  memBlockTrackerMap[tensorHandle] = partialBlock;
2421  }
2422  m_Tensorhandles.push_back(tensorHandle);
2423 
2424  }
2425  else
2426  {
2427  memBlockTrackerMap.at(tensorHandle).m_Lifetime += outputSlot.GetNumConnections();
2428  }
2429  }
2430 
2431  for (auto& inputSlot : layer->GetInputSlots())
2432  {
2433  const Layer& connectedInputLayer = inputSlot.GetConnectedOutputSlot()->GetOwningLayer();
2434  const LayerType& owningLayerType = connectedInputLayer.GetType();
2435 
2436  if (owningLayerType == LayerType::Constant)
2437  {
2438  continue;
2439  }
2440  if (inputImportingEnabled && owningLayerType == LayerType::Input)
2441  {
2442  continue;
2443  }
2444  if (!m_SupportsExternallyManagedMemory[connectedInputLayer.GetBackendId()])
2445  {
2446  continue;
2447  }
2448 
2449  auto outputSlot = inputSlot.GetConnectedOutputSlot();
2450 
2451  ITensorHandle* tensorHandle = outputSlot->GetOutputHandler().GetData();
2452  tensorHandle = TraceSubTensorHandleAncestry(tensorHandle);
2453 
2454  PartialBlock& partialBlock = memBlockTrackerMap.at(tensorHandle);
2455 
2456  auto& lifetime = partialBlock.m_Lifetime;
2457  --lifetime;
2458 
2459  if (lifetime == 0)
2460  {
2461  m_MemBlockMap[partialBlock.m_BackendId].emplace_back(partialBlock.m_StartOfLife,
2462  timestep,
2463  partialBlock.m_MemSize,
2464  0,
2465  partialBlock.m_Index);
2466  }
2467  }
2468  ++timestep;
2469  }
2470 
2471 }
2472 
2473 std::unique_ptr<MemoryManager> LoadedNetwork::CreateExternalMemoryManger(
2474  std::vector<std::pair<std::shared_ptr<TensorMemory>, MemorySource>>& tensorMemoryVec)
2475 {
2476  std::unique_ptr<MemoryManager> memoryManager = std::make_unique<MemoryManager>();
2477  auto allocatorMap = BackendRegistryInstance().GetAllocators();
2478 
2479  for (auto& backend : m_MemBinMap)
2480  {
2481  std::vector<BufferStorage> bufferStorageVec;
2482 
2483  std::shared_ptr<ICustomAllocator> backendAllocator;
2484  if (allocatorMap.find(backend.first) != allocatorMap.end())
2485  {
2486  backendAllocator = allocatorMap[backend.first];
2487  }
2488  else
2489  {
2490  backendAllocator = m_Backends[backend.first]->GetDefaultAllocator();
2491  }
2492 
2493  for (auto& memBin : backend.second)
2494  {
2495  BufferStorage bufferStorage;
2496  bufferStorage.m_BufferSize = memBin.m_MemSize;
2497  bufferStorage.m_TensorMemoryVector.reserve(memBin.m_MemBlocks.size());
2498 
2499  for (auto& memBlock : memBin.m_MemBlocks)
2500  {
2501  auto tensorMemory = std::make_shared<TensorMemory>(TensorMemory{memBlock.m_Offset, memBlock.m_Index});
2502 
2503  tensorMemoryVec.emplace_back(tensorMemory, backendAllocator->GetMemorySourceType());
2504  bufferStorage.m_TensorMemoryVector.emplace_back(tensorMemory);
2505  }
2506 
2507  bufferStorageVec.emplace_back(std::move(bufferStorage));
2508  }
2509 
2510  memoryManager->StoreMemToAllocate(bufferStorageVec, backendAllocator, 4);
2511  }
2512 
2513  return memoryManager;
2514 }
2515 
2516 LayerBindingId LoadedNetwork::ValidateImportedInputID(ImportedInputId id)
2517 {
2518  try
2519  {
2520  const auto& importedTensorHandlePin = m_PreImportedInputHandles.at(id);
2521  if (!importedTensorHandlePin.m_TensorHandle)
2522  {
2523  throw InvalidArgumentException(fmt::format("LoadedNetwork::Execute:"
2524  "PreImportedInput: {} has been deleted", id));
2525  }
2526  return importedTensorHandlePin.m_LayerBindingId;
2527  }
2528  catch (const std::out_of_range&)
2529  {
2530  throw InvalidArgumentException(fmt::format("LoadedNetwork::Execute: Unknown ImportedInputId: {}", id));
2531  }
2532 }
2533 
2534 LayerBindingId LoadedNetwork::ValidateImportedOutputID(ImportedOutputId id)
2535 {
2536  try
2537  {
2538  const auto& importedTensorHandlePin = m_PreImportedOutputHandles.at(id);
2539  if (!importedTensorHandlePin.m_TensorHandle)
2540  {
2541  throw InvalidArgumentException(fmt::format("LoadedNetwork::Execute: "
2542  "PreImportedOutput: {} has been deleted", id));
2543  }
2544  return importedTensorHandlePin.m_LayerBindingId;
2545  }
2546  catch (const std::out_of_range&)
2547  {
2548  throw InvalidArgumentException(fmt::format("LoadedNetwork::Execute: Unknown ImportedOutputId: {}", id));
2549  }
2550 }
2551 
2552 }
Status Execute(const InputTensors &inputTensors, const OutputTensors &outputTensors, IWorkingMemHandle &workingMemHandle, std::vector< ImportedInputId > preImportedInputs={}, std::vector< ImportedOutputId > preImportedOutputs={})
Thread safe execution of the loaded network.
std::vector< std::shared_ptr< TensorMemory > > m_TensorMemoryVector
Vector of pointer to .
std::unique_ptr< IWorkingMemHandle > CreateWorkingMemHandle(NetworkId networkId)
Create a new unique WorkingMemHandle object.
bool HasCapability(const std::string &name, const BackendCapabilities &capabilities)
Convenience function to check if a capability exists in a BackendCapabilites struct.
const MemorySource m_InputSource
Definition: IRuntime.hpp:64
virtual bool Import(void *memory, MemorySource source)
Import externally allocated memory.
FactoryFunction GetFactory(const BackendId &id) const
ConstIteratorInputs begin() const
Definition: Graph.hpp:65
std::unique_ptr< IWorkloadFactory > IWorkloadFactoryPtr
unsigned int GetNumInputSlots() const override
Returns the number of connectable input slots.
Definition: Layer.hpp:321
static ProfilerManager & GetInstance()
Definition: Profiling.cpp:572
#define ARMNN_NO_DEPRECATE_WARN_BEGIN
Definition: Deprecated.hpp:33
virtual IMemoryManagerUniquePtr CreateMemoryManager() const
LayerBindingId GetBindingId() const
Definition: Layer.hpp:465
virtual unsigned int GetImportFlags() const
Get flags describing supported import sources.
const armnn::Tensor GetOutputTensor(const LayerBindingId layerId, const OutputTensors &outputTensors)
MemoryOptimizerStrategiesMapRef GetMemoryOptimizerStrategies()
unsigned int ImportedOutputId
Definition: Types.hpp:292
std::pair< BackendId, ExecutionData > & GetExecutionDataAt(unsigned int id) override
Get the ExecutionData at an index.
size_t m_Offset
Number of bytes the value is away from the .m_Buffer.
virtual void Allocate()=0
Indicate to the memory manager that this resource is no longer active.
virtual const char * what() const noexcept override
Definition: Exceptions.cpp:32
TensorInfo GetInputTensorInfo(LayerBindingId layerId) const
#define ARMNN_LOG(severity)
Definition: Logging.hpp:205
size_t m_BufferSize
Total size of the buffer.
ITensorHandle * GetOutputHandle(LayerBindingId layerBindingId) const
BackendRegistry & BackendRegistryInstance()
std::vector< std::pair< LayerBindingId, class ConstTensor > > InputTensors
Definition: Tensor.hpp:392
const ProfilingDetailsMethod m_OutputNetworkDetailsMethod
Definition: IRuntime.hpp:62
unsigned int MemorySourceFlags
MemoryType GetMemoryArea() const
Definition: Tensor.hpp:305
size_t GetNumOutputs() const
Definition: Graph.hpp:188
void CopyToOutputTensor(const Tensor &outputTensor, ITensorHandle *outputTensorHandle)
TensorInfo GetOutputTensorInfo(LayerBindingId layerId) const
Copyright (c) 2021 ARM Limited and Contributors.
void IgnoreUnused(Ts &&...)
const std::vector< InputSlot > & GetInputSlots() const
Definition: Layer.hpp:245
std::vector< ImportedInputId > ImportInputs(const InputTensors &inputTensors, MemorySource forceImportMemorySource=MemorySource::Undefined)
std::function< void(LayerGuid guid, unsigned int slotIndex, ITensorHandle *tensorHandle)> DebugCallbackFunction
Define the type of callback for the Debug layer to call.
Definition: Types.hpp:379
unsigned int GetNumOutputSlots() const override
Returns the number of connectable output slots.
Definition: Layer.hpp:322
int LayerBindingId
Type of identifiers for bindable layers (inputs, outputs).
Definition: Types.hpp:290
#define ARMNN_SCOPED_PROFILING_EVENT(backendId, name)
Definition: Profiling.hpp:220
virtual const BackendId & GetId() const =0
ConstIteratorOutputs begin() const
Definition: Graph.hpp:84
A tensor defined by a TensorInfo (shape and data type) and a mutable backing store.
Definition: Tensor.hpp:319
virtual IWorkloadFactoryPtr CreateWorkloadFactory(const IMemoryManagerSharedPtr &memoryManager=nullptr) const =0
unsigned int GetNumConnections() const override
Definition: Layer.hpp:145
const InputSlot & GetInputSlot(unsigned int index) const override
Get a const input slot handle by slot index.
Definition: Layer.hpp:324
std::vector< TensorInfo > m_InputTensorInfos
const std::vector< std::vector< ITensorHandle * >::iterator > & GetOutputConnection(LayerBindingId layerBindingId) const
#define ARMNN_NO_DEPRECATE_WARN_END
Definition: Deprecated.hpp:34
#define ARMNN_ASSERT_MSG(COND, MSG)
Definition: Assert.hpp:15
bool SupportsTensorAllocatorAPI() const
#define ARMNN_SCOPED_HEAP_PROFILING(TAG)
int NetworkId
Definition: IRuntime.hpp:27
A tensor defined by a TensorInfo (shape and data type) and an immutable backing store.
Definition: Tensor.hpp:327
virtual ITensorHandle * GetParent() const =0
Get the parent tensor if this is a subtensor.
std::vector< std::pair< LayerBindingId, class Tensor > > OutputTensors
Definition: Tensor.hpp:393
const std::string & GetNameStr() const
Definition: Layer.hpp:227
LayerType GetType() const override
Returns the armnn::LayerType of this layer.
Definition: Layer.hpp:273
Status
enumeration
Definition: Types.hpp:42
void SendNetworkStructure(arm::pipe::IProfilingService &profilingService)
const std::vector< std::vector< ITensorHandle * >::iterator > & GetInputConnections(LayerBindingId layerBindingId) const
#define ARMNN_ASSERT(COND)
Definition: Assert.hpp:14
const OutputSlot * GetConnectedOutputSlot() const
Definition: Layer.hpp:56
void ClearImportedInputs(const std::vector< ImportedInputId > inputIds)
std::vector< TensorInfo > m_OutputTensorInfos
void SetLayersOutOfOrder()
Definition: Graph.cpp:655
static bool IsLayerSupported(const BackendId &backendId, const IConnectableLayer &layer, Optional< DataType > dataType, std::string &outReasonIfUnsupported)
ITensorHandle * GetData() const
Gets the allocated tensor memory.
std::vector< std::unique_ptr< IWorkload > > WorkloadQueue
const TensorInfo & GetInfo() const
Definition: Tensor.hpp:295
#define CHECK_LOCATION()
Definition: Exceptions.hpp:203
const BackendId & GetBackendId() const
Definition: Layer.hpp:277
void Allocate() override
Allocate the backing memory required for execution.
void ValidateSourcesMatchOptimizedNetwork(std::vector< BackendOptions > optimizedOptions, const INetworkProperties &networkProperties)
This function performs a sanity check to ensure that the combination of input and output memory sourc...
arm::pipe::ProfilingGuid GetNetworkGuid()
const std::vector< OutputSlot > & GetOutputSlots() const
Definition: Layer.hpp:246
virtual bool CanBeImported(void *memory, MemorySource source)
Implementations must determine if this memory block can be imported.
OutputLayersAccessor GetOutputLayers() const
Returns a wrapper object with begin(), end() methods to iterate over the output layers in a range-bas...
Definition: Graph.hpp:196
unsigned int ImportedInputId
Definition: Types.hpp:291
Struct for the users to pass backend specific options.
Status EnqueueWorkload(const InputTensors &inputTensors, const OutputTensors &outputTensors, std::vector< ImportedInputId > preImportedInputIds={}, std::vector< ImportedOutputId > preImportedOutputIds={})
Single thread execution of the loaded network.
void RegisterProfiler(IProfiler *profiler)
Definition: Profiling.cpp:579
virtual const void * Map(bool blocking=true) const =0
Map the tensor data for access.
std::vector< LayerBindingId > & GetBindingIdVector()
std::unordered_map< BackendId, std::shared_ptr< ICustomAllocator > > GetAllocators()
virtual BackendCapabilities GetCapabilities() const
Returns a BackendCapability if the backend lists the capability The BackendCapability must then be in...
virtual void Unmap() const =0
Unmap the tensor data.
bool AsBool() const
Value getters.
bool IsAllocated() override
IsAllocated returns true if the backing memory is currently allocated.
std::vector< ITensorHandle * > m_Outputs
Base class for all ArmNN exceptions so that users can filter to just those.
Definition: Exceptions.hpp:46
const OutputHandler & GetOutputHandler(unsigned int i=0) const
Definition: Layer.hpp:232
std::vector< ImportedOutputId > ImportOutputs(const OutputTensors &outputTensors, MemorySource forceImportMemorySource=MemorySource::Undefined)
MemorySource
Define the Memory Source to reduce copies.
Definition: Types.hpp:230
const std::string & Get() const
Definition: BackendId.hpp:138
void RegisterDebugCallback(const DebugCallbackFunction &func)
ConstIteratorOutputs end() const
Definition: Graph.hpp:90
std::enable_if_t< std::is_unsigned< Source >::value &&std::is_unsigned< Dest >::value, Dest > numeric_cast(Source source)
Definition: NumericCast.hpp:35
const MemorySource m_OutputSource
Definition: IRuntime.hpp:65
Contains information about TensorInfos of a layer.
const char * GetName() const override
Returns the name of the layer.
Definition: Layer.hpp:319
ITensorHandleFactory::FactoryId GetTensorHandleFactoryId() const
Definition: Layer.cpp:205
bool CheckFlag(MemorySourceFlags flags, MemorySource source)
void CopyTensorContentsGeneric(const ITensorHandle *srcTensor, ITensorHandle *dstTensor, CopyFunc copy)
Graph & TopologicalSort()
Sorts layers in topological order and return this.
Definition: Graph.hpp:184
InputLayersAccessor GetInputLayers() const
Returns a wrapper object with begin(), end() methods to iterate over the input layers in a range-base...
Definition: Graph.hpp:192
std::vector< ITensorHandle * > m_Inputs
size_t GetNumLayers() const
Definition: Graph.hpp:198
ConstIteratorInputs end() const
Definition: Graph.hpp:70
const armnn::ConstTensor GetInputTensor(const LayerBindingId layerId, const InputTensors &inputTensors)
const TensorInfo & GetTensorInfo(const ITensorHandle *tensorHandle)
float32 helpers
const TensorInfo & GetTensorInfo() const override
Definition: Layer.cpp:92
static std::unique_ptr< LoadedNetwork > MakeLoadedNetwork(std::unique_ptr< IOptimizedNetwork > net, std::string &errorMessage, const INetworkProperties &networkProperties, arm::pipe::IProfilingService *profilingService)
ITensorHandle * GetInputHandle(LayerBindingId layerBindingId) const
size_t GetNumInputs() const
Definition: Graph.hpp:187
virtual std::unique_ptr< ITensorHandle > CreateTensorHandle(const TensorInfo &tensorInfo) const =0
static const FactoryId LegacyFactoryId
const bool m_ProfilingEnabled
Definition: IRuntime.hpp:60
void ClearImportedOutputs(const std::vector< ImportedOutputId > outputIds)
LayerType
When adding a new layer, adapt also the LastLayer enum value in the enum class LayerType below...
Definition: Types.hpp:468
LayerGuid GetGuid() const final
Returns the unique id of the layer.
Definition: Layer.hpp:330