ArmNN
 24.02
LoadedNetwork.cpp
Go to the documentation of this file.
1 //
2 // Copyright © 2017-2023 Arm Ltd and Contributors. All rights reserved.
3 // SPDX-License-Identifier: MIT
4 //
5 
6 #include "LoadedNetwork.hpp"
7 #include "Layer.hpp"
8 #include "Graph.hpp"
9 #include "Profiling.hpp"
10 #include "HeapProfiling.hpp"
11 #include "WorkingMemHandle.hpp"
12 #include "ExecutionData.hpp"
13 
14 #include <armnn/BackendHelper.hpp>
16 #include <armnn/Logging.hpp>
17 
22 
24 
25 #include <armnn/utility/Assert.hpp>
26 
28 
29 #include <common/include/Processes.hpp>
30 
31 #include <fmt/format.h>
32 
33 namespace armnn
34 {
35 
36 using namespace std;
37 using namespace arm::pipe;
38 
39 namespace
40 {
41 
42 template <typename ExceptionType>
43 std::string ToErrorMessage(const char * prefix, const ExceptionType & error)
44 {
45  std::stringstream ss;
46  ss << prefix << " " << error.what();
47  return ss.str();
48 }
49 
50 void AddLayerStructure(std::unique_ptr<TimelineUtilityMethods>& timelineUtils,
51  const Layer& layer,
52  ProfilingGuid networkGuid)
53 {
54  // Add layer to the post-optimisation network structure
55  std::string layerName = layer.GetNameStr().empty() ? "<Unnamed>" : layer.GetNameStr();
56  timelineUtils->CreateNamedTypedChildEntity(layer.GetGuid(),
57  networkGuid,
58  layerName,
59  LabelsAndEventClasses::LAYER_GUID);
60  for (auto&& input : layer.GetInputSlots())
61  {
62  const IOutputSlot* source = input.GetConnectedOutputSlot();
63  ARMNN_ASSERT(source != NULL);
64  timelineUtils->CreateConnectionRelationship(ProfilingRelationshipType::RetentionLink,
65  source->GetOwningLayerGuid(),
66  layer.GetGuid());
67  }
68 }
69 
70 void AddWorkloadStructure(std::unique_ptr<TimelineUtilityMethods>& timelineUtils,
71  std::unique_ptr<IWorkload>& workload,
72  const Layer& layer)
73 {
74  // Add workload to the post-optimisation network structure
75  timelineUtils->CreateTypedEntity(workload->GetGuid(), LabelsAndEventClasses::WORKLOAD_GUID);
76  timelineUtils->MarkEntityWithLabel(workload->GetGuid(),
77  layer.GetBackendId().Get(),
78  LabelsAndEventClasses::BACKENDID_GUID);
79 
80  // Link the workload to the layer
81  timelineUtils->CreateRelationship(ProfilingRelationshipType::RetentionLink,
82  layer.GetGuid(),
83  workload->GetGuid(),
84  LabelsAndEventClasses::CHILD_GUID);
85 }
86 
87 } // anonymous
88 
89 /**
90  * This function performs a sanity check to ensure that the combination of input and output memory source matches the
91  * values for importEnabled and exportEnabled that were specified during optimization. During optimization the tensor
92  * handle factories are chosen based on whether import and export are enabled. If the user then specifies something
93  * incompatible here it can lead to problems.
94  *
95  * @param optimizedOptions
96  * @param networkProperties
97  */
98 void ValidateSourcesMatchOptimizedNetwork(std::vector<BackendOptions> optimizedOptions,
99  const INetworkProperties& networkProperties)
100 {
101  // Find the "Global" backend options. During the optimize phase the values of importEnabled and exportEnabled are
102  // added as backend options.
103  const vector<BackendOptions>::iterator& backendItr =
104  find_if(optimizedOptions.begin(), optimizedOptions.end(), [](const BackendOptions& backend) {
105  if (backend.GetBackendId().Get() == "Global")
106  {
107  return true;
108  }
109  else
110  {
111  return false;
112  }
113  });
114  bool importEnabled = false;
115  bool exportEnabled = false;
116  if (backendItr != optimizedOptions.end())
117  {
118  // Find the importEnabled and exportEnabled values.
119  for (size_t i = 0; i < backendItr->GetOptionCount(); i++)
120  {
121  const BackendOptions::BackendOption& option = backendItr->GetOption(i);
122  if (option.GetName() == "ImportEnabled")
123  {
124  importEnabled = option.GetValue().AsBool();
125  }
126  if (option.GetName() == "ExportEnabled")
127  {
128  exportEnabled = option.GetValue().AsBool();
129  }
130  }
131  }
132 
133  // Now that we have values for import and export compare them to the MemorySource variables.
134  // Any value of MemorySource that's not "Undefined" implies that we need to do an import of some kind.
135  if ((networkProperties.m_InputSource == MemorySource::Undefined && importEnabled) ||
136  (networkProperties.m_InputSource != MemorySource::Undefined && !importEnabled))
137  {
138  auto message = fmt::format("The input memory source specified, '{0}',", networkProperties.m_InputSource);
139  if (!importEnabled)
140  {
141  message.append(" requires that memory import be enabled. However, "
142  "it was disabled when this network was optimized.");
143  }
144  else
145  {
146  message.append(" requires that memory import be disabled. However, "
147  "it was enabled when this network was optimized.");
148  }
149  throw InvalidArgumentException(message);
150  }
151 
152  if ((networkProperties.m_OutputSource == MemorySource::Undefined && exportEnabled) ||
153  (networkProperties.m_OutputSource != MemorySource::Undefined && !exportEnabled))
154  {
155  auto message = fmt::format("The output memory source specified, '{0}',", networkProperties.m_OutputSource);
156  if (!exportEnabled)
157  {
158  message.append(" requires that memory export be enabled. However, "
159  "it was disabled when this network was optimized.");
160  }
161  else
162  {
163  message.append(" requires that memory export be disabled. However, "
164  "it was enabled when this network was optimized.");
165  }
166  throw InvalidArgumentException(message);
167  }
168 } // anonymous
169 
170 std::unique_ptr<LoadedNetwork> LoadedNetwork::MakeLoadedNetwork(std::unique_ptr<IOptimizedNetwork> net,
171  std::string& errorMessage,
172  const INetworkProperties& networkProperties,
173  arm::pipe::IProfilingService* profilingService)
174 {
175  std::unique_ptr<LoadedNetwork> loadedNetwork;
176 
177  auto Fail = [&](const std::exception& error) -> std::unique_ptr<LoadedNetwork>
178  {
179  errorMessage = ToErrorMessage("An error occurred when preparing the network workloads: ", error);
180  ARMNN_LOG(error) << errorMessage;
181 
182  return std::unique_ptr<LoadedNetwork>();
183  };
184 
185  try
186  {
187  loadedNetwork.reset(new LoadedNetwork(std::move(net), networkProperties, profilingService));
188  }
189  catch (const armnn::RuntimeException& error)
190  {
191  return Fail(error);
192  }
193  catch (const armnn::Exception& error)
194  {
195  return Fail(error);
196  }
197  catch (const std::runtime_error& error)
198  {
199  return Fail(error);
200  }
201 
202  return loadedNetwork;
203 }
204 
205 LoadedNetwork::LoadedNetwork(std::unique_ptr<IOptimizedNetwork> net,
206  const INetworkProperties& networkProperties,
207  arm::pipe::IProfilingService* profilingService) :
208  m_OptimizedNetwork(std::move(net)),
209  m_NetworkProperties(networkProperties),
210  m_TensorHandleFactoryRegistry(),
211  m_ProfilingService(profilingService)
212 {
214  // Get the profiler and register it for the current thread.
215  const std::shared_ptr<IProfiler>& profiler = m_OptimizedNetwork->GetProfiler();
217 
218  profiler->EnableProfiling(networkProperties.m_ProfilingEnabled);
219 
220  profiler->EnableNetworkDetailsToStdOut(networkProperties.m_OutputNetworkDetailsMethod);
221 
222  // We need to check that the memory sources match up with the values of import and export specified during the
223  // optimize phase. If they don't this will throw an exception.
224  ValidateSourcesMatchOptimizedNetwork(m_OptimizedNetwork.get()->pOptimizedNetworkImpl->GetModelOptions(),
225  m_NetworkProperties);
226 
227  //First create tensor handlers, backends and workload factories.
228  //Handlers are created before workloads are.
229  //Because workload creation can modify some of the handlers,
230  //(for example the splitter and concat layers).
231 
232  bool useExternalMemoryManager = false;
233  bool useInternalMemoryManager = false;
234  Graph& order = m_OptimizedNetwork->pOptimizedNetworkImpl->GetGraph();
235  // Ensure Topological order
236  order.SetLayersOutOfOrder();
237  order.TopologicalSort();
238 
239  if (!networkProperties.m_AsyncEnabled)
240  {
241  m_IsInputImported = std::vector<bool>(order.GetNumInputs(), false);
242  m_IsOutputImported = std::vector<bool>(order.GetNumOutputs(), false);
243  }
244 
245  for (auto&& layer : order)
246  {
247  auto const& backendId = layer->GetBackendId();
248  if (m_Backends.count(backendId) == 0)
249  {
250  auto createBackend = BackendRegistryInstance().GetFactory(backendId);
251  auto it = m_Backends.emplace(std::make_pair(backendId, createBackend()));
252 
253  IBackendInternal* backend = it.first->second.get();
254 
255  // If we're doing async execution verify that the backend supports it and ExternallyManagedMemory.
256  if (networkProperties.m_AsyncEnabled)
257  {
258  if (!HasMatchingCapability(BackendOptions::BackendOption{"AsyncExecution", true},
259  backend->GetCapabilities()))
260  {
261  std::string er = backend->GetId();
262  er += " does not support AsyncExecution";
263  throw BackendCapabilityException(er);
264  }
265  if (!HasMatchingCapability(BackendOptions::BackendOption{"ExternallyManagedMemory", true},
266  backend->GetCapabilities()))
267  {
268  std::string er = backend->GetId();
269  er += " does not support ExternallyManagedMemory\n";
270  er += "AsyncEnabled networks require all backends to support ExternallyManagedMemory";
271  throw BackendCapabilityException(er);
272  }
273  m_SupportsExternallyManagedMemory[backend->GetId()] = true;
274  useExternalMemoryManager = true;
275  }
276  else
277  {
278  m_SupportsExternallyManagedMemory[backend->GetId()] = false;
279  useInternalMemoryManager = true;
280  }
281 
283  if (backend->SupportsTensorAllocatorAPI())
284  {
285  workloadFactory = backend->CreateWorkloadFactory(
286  m_TensorHandleFactoryRegistry,
287  m_OptimizedNetwork->pOptimizedNetworkImpl->GetModelOptions(),
288  static_cast<MemorySourceFlags>(m_NetworkProperties.m_InputSource),
289  static_cast<MemorySourceFlags>(m_NetworkProperties.m_OutputSource));
290  }
291  else
292  {
293  m_BackendMemoryMangers.emplace_back(backend->CreateMemoryManager());
294  workloadFactory = backend->CreateWorkloadFactory(
295  m_BackendMemoryMangers.back(), m_OptimizedNetwork->pOptimizedNetworkImpl->GetModelOptions());
296  }
297  m_WorkloadFactories[backendId ] = std::move(workloadFactory);
298  }
299  }
300 
301  if (!networkProperties.m_AsyncEnabled)
302  {
303  for (auto&& layer : order)
304  {
305  auto& workloadFactory = GetWorkloadFactory(*layer);
306  bool supportsExternalManager = m_SupportsExternallyManagedMemory[layer->GetBackendId()];
307 
308  switch (layer->GetType())
309  {
310  case LayerType::Input:
312  {
313  // If IsImportEnabled is true then we need to set IsMemoryManaged
314  // to false when creating TensorHandles
315  layer->CreateTensorHandles(m_TensorHandleFactoryRegistry,
316  workloadFactory,
317  !supportsExternalManager &&
318  (m_NetworkProperties.m_InputSource == MemorySource::Undefined));
319  break;
320  }
321  case LayerType::Constant:
322  {
323  layer->CreateTensorHandles(m_TensorHandleFactoryRegistry, workloadFactory, true);
324  break;
325  }
326  default:
327  {
328  // Look for a layer with 1 OutputSlot which has 1 connection and that connection is an Output Layer
329  // If Export is enabled disable memory management so we can export, otherwise we do a copy
330  if ((layer->GetNumOutputSlots() == 1) &&
331  (layer->GetOutputSlots()[0].GetNumConnections() == 1) &&
332  (layer->GetOutputSlots()[0].GetConnection(0)->GetOwningLayer().GetType() == LayerType::Output))
333  {
334  layer->CreateTensorHandles(m_TensorHandleFactoryRegistry,
335  workloadFactory,
336  !supportsExternalManager &&
337  (m_NetworkProperties.m_OutputSource == MemorySource::Undefined));
338  }
339  else
340  {
341  layer->CreateTensorHandles(m_TensorHandleFactoryRegistry,
342  workloadFactory,
343  !supportsExternalManager);
344  }
345  }
346  }
347  }
348  }
349 
350  ProfilingGuid networkGuid = m_OptimizedNetwork->GetGuid();
351  std::unique_ptr<TimelineUtilityMethods> timelineUtils =
352  TimelineUtilityMethods::GetTimelineUtils(*m_ProfilingService);
353  if (timelineUtils)
354  {
355  timelineUtils->CreateTypedEntity(networkGuid, LabelsAndEventClasses::NETWORK_GUID);
356  // Mark the network with a start of life event
357  timelineUtils->RecordEvent(networkGuid, LabelsAndEventClasses::ARMNN_PROFILING_SOL_EVENT_CLASS);
358  // and with the process ID
359  int processID = arm::pipe::GetCurrentProcessId();
360  std::stringstream ss;
361  ss << processID;
362  timelineUtils->MarkEntityWithLabel(networkGuid, ss.str(), LabelsAndEventClasses::PROCESS_ID_GUID);
363  }
364 
365  std::vector<IWorkload*> ConstWorkloads;
366 
367  //Then create workloads.
368  {
369  ARMNN_SCOPED_PROFILING_EVENT(Compute::Undefined, "LoadNetwork_CreateWorkloads");
370  for (auto&& layer: order)
371  {
372  if (timelineUtils)
373  {
374  // Add layer to the post-optimisation network structure
375  AddLayerStructure(timelineUtils, *layer, networkGuid);
376  }
377 
378  const IWorkloadFactory& workloadFactory = GetWorkloadFactory(*layer);
379 
380  switch (layer->GetType())
381  {
382  case LayerType::Input:
383  case LayerType::Output:
384  {
385  // Inputs and outputs are treated in a special way - see EnqueueInput() and EnqueueOutput().
386  break;
387  }
388  default:
389  {
390  auto workload = layer->CreateWorkload(workloadFactory);
391 
392  if (!workload)
393  {
394  const char* const layerName =
395  layer->GetNameStr().length() != 0 ? layer->GetName() : "<Unnamed>";
396  throw InvalidArgumentException(
397  fmt::format("No workload created for layer (name: '{0}' type: '{1}') (compute '{2}')",
398  layerName, static_cast<int>(layer->GetType()), layer->GetBackendId().Get()
399  ));
400  }
401 
402  if (timelineUtils)
403  {
404  // Add workload to the post-optimisation network structure
405  AddWorkloadStructure(timelineUtils, workload, *layer);
406  }
407 
408  // For async networks ConstantWorkloads are managed exclusively by LoadedNetwork
409  // and are separated out from the other workloads
410  if((networkProperties.m_AsyncEnabled || useExternalMemoryManager) &&
411  layer->GetType() == LayerType::Constant)
412  {
413  m_ConstantTensorHandles[layer->GetGuid()] =
414  layer->GetOutputSlot(0).GetOutputHandler().GetData();
415  m_ConstantWorkloads[layer->GetGuid()] = std::move(workload);
416  }
417  else
418  {
419  m_WorkloadQueue.push_back(std::move(workload));
420 
421  if (layer->GetType() == LayerType::Constant)
422  {
423  // Place the Constant Workloads into a queue so that they can be executed first
424  ConstWorkloads.push_back(m_WorkloadQueue.back().get());
425  }
426  }
427  // release the constant data in the layer.
428  layer->ReleaseConstantData();
429  break;
430  }
431  }
432  }
433  }
434 
435  // Gather information about workloads for inputs & outputs
436  if (!networkProperties.m_AsyncEnabled && m_WorkloadQueue.size() != 0)
437  {
438  const int noOfInputs = armnn::numeric_cast<int>(order.GetNumInputs());
439 
440  // Get indices of all workloads connected to each input and
441  // check if they support tensor handle replacement
442  for (const BindableLayer* layer: order.GetInputLayers())
443  {
444  const auto bindingId = layer->GetBindingId();
445 
446  bool supportsReplacement = true;
447 
448  for (const auto inputSlot: layer->GetOutputSlot(0).GetConnections())
449  {
450  auto workloadIndex = std::distance(order.begin(), order.GetPosInGraph(inputSlot->GetOwningLayer()));
451  workloadIndex -= noOfInputs;
452 
453  m_InputWorkloadSlotPairs[bindingId].emplace_back(WorkloadIndices{
454  armnn::numeric_cast<unsigned int>(workloadIndex), inputSlot->GetSlotIndex()});
455 
456  // Avoid if input is connected directly to an output
457  if (inputSlot->GetOwningLayer().GetType() != LayerType::Output)
458  {
459  auto workload = m_WorkloadQueue[m_InputWorkloadSlotPairs[bindingId].back().m_WorkloadIndex].get();
460  supportsReplacement &= workload->SupportsTensorHandleReplacement();
461  }
462  }
463 
464  ITensorHandleFactory::FactoryId factoryId = layer->GetOutputSlot(0).GetTensorHandleFactoryId();
465  // Get matching import factory Id
466  ITensorHandleFactory::FactoryId importFactoryId =
467  m_TensorHandleFactoryRegistry.GetMatchingImportFactoryId(factoryId);
468 
469  ITensorHandleFactory *importFactory = m_TensorHandleFactoryRegistry.GetFactory(importFactoryId);
470 
471  if (supportsReplacement && importFactory)
472  {
473  m_PreImportedInputHandles.emplace_back(
474  bindingId, importFactory->CreateTensorHandle(layer->GetOutputSlot(0).GetTensorInfo(), false));
475  }
476  else
477  {
478  m_PreImportedInputHandles.emplace_back(bindingId, nullptr);
479  }
480  }
481 
482  // Get indices of all workloads connected to each output and
483  // check if they support tensor handle replacement
484  for (const BindableLayer* layer: order.GetOutputLayers())
485  {
486  const auto bindingId = layer->GetBindingId();
487 
488  const auto outputSlot = layer->GetInputSlot(0).GetConnectedOutputSlot();
489  auto& indices = m_OutputWorkloadSlotPairs[bindingId];
490 
491  // Avoid if output is connected directly to an input
492  if (outputSlot->GetOwningLayer().GetType() != LayerType::Input)
493  {
494  auto workloadIndex = std::distance(order.begin(), order.GetPosInGraph(outputSlot->GetOwningLayer()));
495  workloadIndex -= noOfInputs;
496 
497  indices.m_OutputSlotIndices = WorkloadIndices{numeric_cast<unsigned int>(workloadIndex),
498  outputSlot->CalculateIndexOnOwner()};
499 
500  bool supportsReplacement = true;
501  auto outputWorkload = m_WorkloadQueue[indices.m_OutputSlotIndices.m_WorkloadIndex].get();
502  supportsReplacement &= outputWorkload->SupportsTensorHandleReplacement();
503 
504  for (auto &inputSlot: outputSlot->GetConnections())
505  {
506  if (inputSlot->GetOwningLayer().GetType() != LayerType::Output)
507  {
508  auto inWorkloadIndex = std::distance(order.begin(),
509  order.GetPosInGraph(inputSlot->GetOwningLayer()));
510  inWorkloadIndex -= noOfInputs;
511  indices.m_InputSlotIndices.emplace_back(
512  WorkloadIndices{numeric_cast<unsigned int>(inWorkloadIndex),
513  inputSlot->GetSlotIndex()});
514  auto inputWorkload = m_WorkloadQueue[indices.m_InputSlotIndices.back().m_WorkloadIndex].get();
515  supportsReplacement &= inputWorkload->SupportsTensorHandleReplacement();
516  }
517  }
518 
519  ITensorHandleFactory::FactoryId factoryId = outputSlot->GetTensorHandleFactoryId();
520  // Get matching import factory Id
521  ITensorHandleFactory::FactoryId importFactoryId =
522  m_TensorHandleFactoryRegistry.GetMatchingImportFactoryId(factoryId);
523  ITensorHandleFactory *importFactory = m_TensorHandleFactoryRegistry.GetFactory(importFactoryId);
524 
525  if (supportsReplacement && importFactory)
526  {
527  m_PreImportedOutputHandles.emplace_back(
528  bindingId, importFactory->CreateTensorHandle(outputSlot->GetTensorInfo(), false));
529  }
530  else
531  {
532  m_PreImportedOutputHandles.emplace_back(bindingId, nullptr);
533  }
534  }
535  }
536  }
537 
538  for (auto&& workloadFactory : m_WorkloadFactories)
539  {
540  workloadFactory.second->AfterWorkloadsCreated();
541  }
542 
543  if (timelineUtils)
544  {
545  // Commit to send the post-optimisation network structure
546  timelineUtils->Commit();
547  }
548 
549  if (useExternalMemoryManager)
550  {
551  if (networkProperties.m_AsyncEnabled)
552  {
553  CreateMemoryProfileAsync();
554  }
555  else
556  {
557  CreateMemoryProfile();
558  }
559 
560  auto backendStrategyMap = BackendRegistryInstance().GetMemoryOptimizerStrategies();
561  for (auto& backendMemoryProfile : m_MemBlockMap)
562  {
563  const BackendId& backendId = backendMemoryProfile.first;
564  if (backendStrategyMap.find(backendId) != backendStrategyMap.end())
565  {
566  m_MemBinMap[backendId] = backendStrategyMap[backendId]->Optimize(backendMemoryProfile.second);
567  }
568  else
569  {
570  m_MemBinMap[backendId] = m_ConstantStrategy->Optimize(backendMemoryProfile.second);
571  }
572  }
573 
574  if (!networkProperties.m_AsyncEnabled)
575  {
576  m_ExternalMemoryManager = CreateExternalMemoryManger(m_TensorMemory);
577 
578  // Sort m_TensorMemory, so it's order matches m_Tensorhandles
579  std::sort(m_TensorMemory.begin(), m_TensorMemory.end(),
580  [](const std::pair<std::shared_ptr<TensorMemory>, MemorySource>& lhs,
581  const std::pair<std::shared_ptr<TensorMemory>, MemorySource>& rhs)
582  {
583  return lhs.first->m_OutputSlotId < rhs.first->m_OutputSlotId;
584  });
585  }
586  }
587 
588  // Now that the intermediate tensor memory has been set-up,
589  // do any post allocation configuration for each workload.
590  if (!networkProperties.m_AsyncEnabled)
591  {
592  if (useInternalMemoryManager)
593  {
594  // Set up memory.
595  m_OptimizedNetwork->pOptimizedNetworkImpl->GetGraph().AllocateDynamicBuffers();
596  }
597 
598  for (auto &workload : m_WorkloadQueue)
599  {
600  workload->PostAllocationConfigure();
601  }
602  }
603 
604  if (useExternalMemoryManager)
605  {
606  if (!networkProperties.m_AsyncEnabled)
607  {
608  AllocateAndExecuteConstantWorkloads();
609  }
610  else
611  {
612  AllocateAndExecuteConstantWorkloadsAsync();
613  }
614  }
615  // If synchronous, execute all constant layer workloads
616  if (!networkProperties.m_AsyncEnabled)
617  {
618  for (auto workload: ConstWorkloads)
619  {
620  workload->Execute();
621  }
622  }
623 }
624 
625 void LoadedNetwork::AllocateAndExecuteConstantWorkloads()
626 {
627  ARMNN_SCOPED_PROFILING_EVENT(Compute::Undefined, "LoadNetwork_AllocateAndExecuteConstants");
628  for (auto& pair : m_ConstantWorkloads)
629  {
630  auto tensorHandle = m_ConstantTensorHandles[pair.first];
631  tensorHandle->Allocate();
632  pair.second->Execute();
633  }
634 }
635 
636 void LoadedNetwork::AllocateAndExecuteConstantWorkloadsAsync()
637 {
638  ARMNN_SCOPED_PROFILING_EVENT(Compute::Undefined, "LoadNetwork_AllocateAndExecuteConstants");
639  Graph& order = m_OptimizedNetwork->pOptimizedNetworkImpl->GetGraph();
640  for (auto&& layer : order)
641  {
642  if (layer->GetType() == LayerType::Constant)
643  {
644  const auto& outSlot = layer->GetOutputSlots()[0];
645  const auto factoryId = outSlot.GetTensorHandleFactoryId();
647  auto& workloadFactory = GetWorkloadFactory(*layer);
648 
649  layer->CreateTensorHandles(m_TensorHandleFactoryRegistry, workloadFactory);
650  ITensorHandle* tensorHandle = outSlot.GetOutputHandler().GetData();
651 
652  m_ConstantTensorHandles[layer->GetGuid()] = tensorHandle;
653  tensorHandle->Allocate();
654 
655  auto& backend = m_Backends.at(layer->GetBackendId());
656 
657  WorkingMemDescriptor memDesc;
658  memDesc.m_Outputs.push_back(tensorHandle);
659 
660  ExecutionData executionData = backend->CreateExecutionData(memDesc);
661  m_ConstantWorkloads[layer->GetGuid()]->ExecuteAsync(executionData);
662  }
663  }
664 }
665 
666 void LoadedNetwork::SendNetworkStructure(arm::pipe::IProfilingService& profilingService)
667 {
668  ARMNN_SCOPED_PROFILING_EVENT(Compute::Undefined, "LoadNetwork_SendNetworkStructure");
669  Graph& order = m_OptimizedNetwork->pOptimizedNetworkImpl->GetGraph().TopologicalSort();
670  ProfilingGuid networkGuid = m_OptimizedNetwork->GetGuid();
671 
672  std::unique_ptr<TimelineUtilityMethods> timelineUtils =
673  TimelineUtilityMethods::GetTimelineUtils(profilingService);
674 
675  timelineUtils->CreateTypedEntity(networkGuid, LabelsAndEventClasses::NETWORK_GUID);
676 
677  for (auto&& layer : order)
678  {
679  // Add layer to the post-optimisation network structure
680  AddLayerStructure(timelineUtils, *layer, networkGuid);
681  switch (layer->GetType())
682  {
683  case LayerType::Input:
684  case LayerType::Output:
685  {
686  // Inputs and outputs are treated in a special way - see EnqueueInput() and EnqueueOutput().
687  break;
688  }
689  default:
690  {
691  for (auto& workload : m_WorkloadQueue)
692  {
693  // Add workload to the post-optimisation network structure
694  AddWorkloadStructure(timelineUtils, workload, *layer);
695  }
696  break;
697  }
698  }
699  }
700  // Commit to send the post-optimisation network structure
701  timelineUtils->Commit();
702 }
703 
705 {
706  return m_OptimizedNetwork->GetGuid();
707 }
708 
710 {
711  for (auto&& inputLayer : m_OptimizedNetwork->pOptimizedNetworkImpl->GetGraph().GetInputLayers())
712  {
713  ARMNN_ASSERT_MSG(inputLayer->GetNumOutputSlots() == 1, "Input layer should have exactly 1 output slot");
714  if (inputLayer->GetBindingId() == layerId)
715  {
716  return inputLayer->GetOutputSlot(0).GetTensorInfo();
717  }
718  }
719 
720  throw InvalidArgumentException(fmt::format("No input layer is associated with id {}", layerId));
721 }
722 
724 {
725  for (auto&& outputLayer : m_OptimizedNetwork->pOptimizedNetworkImpl->GetGraph().GetOutputLayers())
726  {
727  ARMNN_ASSERT_MSG(outputLayer->GetNumInputSlots() == 1, "Output layer should have exactly 1 input slot");
728  ARMNN_ASSERT_MSG(outputLayer->GetInputSlot(0).GetConnection(), "Input slot on Output layer must be connected");
729  if (outputLayer->GetBindingId() == layerId)
730  {
731  return outputLayer->GetInputSlot(0).GetTensorInfo();
732  }
733  }
734 
735  throw InvalidArgumentException(fmt::format("No output layer is associated with id {}", layerId));
736 }
737 
738 const IWorkloadFactory& LoadedNetwork::GetWorkloadFactory(const Layer& layer) const
739 {
740  const IWorkloadFactory* workloadFactory = nullptr;
741 
742  auto it = m_WorkloadFactories.find(layer.GetBackendId());
743  if (it == m_WorkloadFactories.end())
744  {
745  throw RuntimeException(fmt::format("No workload factory for {0} to be used for layer: {1}",
746  layer.GetBackendId().Get(),
747  layer.GetNameStr()),
748  CHECK_LOCATION());
749  }
750 
751  workloadFactory = it->second.get();
752 
753  ARMNN_ASSERT_MSG(workloadFactory, "No workload factory");
754 
755  return *workloadFactory;
756 }
757 
758 namespace {
759 
760 // Non-copyable class owning accelerator-specific tensor data.
761 class TensorPin
762 {
763 public:
764  TensorPin(std::unique_ptr<ITensorHandle> handle, const TensorInfo& info, LayerBindingId id)
765  : m_TensorHandle(std::move(handle))
766  , m_TensorInfo(info)
767  , m_Id(id)
768  {
769  }
770 
771  ITensorHandle* GetTensorHandle() const { return m_TensorHandle.get(); }
772  const TensorInfo& GetTensorInfo() const { return m_TensorInfo; }
773  LayerBindingId GetBindingId() const { return m_Id; }
774 
775 private:
776  std::unique_ptr<ITensorHandle> m_TensorHandle;
777  TensorInfo m_TensorInfo;
778  LayerBindingId m_Id;
779 };
780 
781 static const TensorPin& GetTensorPin(LayerBindingId id,
782  const std::vector<TensorPin>& pins,
783  char const* bindingPointDesc)
784 {
785  auto it = std::find_if(pins.begin(), pins.end(),
786  [id](const TensorPin& pin)
787  {
788  return pin.GetBindingId() == id;
789  });
790 
791  if (it != pins.end())
792  {
793  return *it;
794  }
795  else
796  {
797  throw InvalidArgumentException(fmt::format("No tensor supplied for {0} {1}", bindingPointDesc, id));
798  }
799 }
800 
801 // Stores data that needs to be kept accessible for the entire execution of a workload.
802 class WorkloadData
803 {
804 public:
805  WorkloadData(const InputTensors& inputTensors, const OutputTensors& outputTensors)
806  {
807  m_InputTensorPins.reserve(inputTensors.size());
808  m_OutputTensorPins.reserve(outputTensors.size());
809 
810  for (auto inputTensorPair : inputTensors)
811  {
812  auto inputTensor = inputTensorPair.second;
813 
814  std::unique_ptr<ITensorHandle> tensorHandle =
815  std::make_unique<ConstPassthroughTensorHandle>(inputTensor.GetInfo(),inputTensor.GetMemoryArea());
816  LayerBindingId layerId = inputTensorPair.first;
817 
818  m_InputTensorPins.emplace_back(std::move(tensorHandle), inputTensor.GetInfo(), layerId);
819  }
820 
821  for (auto outputTensorPair : outputTensors)
822  {
823  auto outputTensor = outputTensorPair.second;
824 
825  std::unique_ptr<ITensorHandle> tensorHandle =
826  std::make_unique<PassthroughTensorHandle>(outputTensor.GetInfo(), outputTensor.GetMemoryArea());
827  LayerBindingId layerId = outputTensorPair.first;
828 
829  m_OutputTensorPins.emplace_back(std::move(tensorHandle), outputTensor.GetInfo(), layerId);
830  }
831  }
832 
833  const TensorPin& GetInputTensorPin(LayerBindingId id) const
834  {
835  return GetTensorPin(id, m_InputTensorPins, "input");
836  }
837 
838  const TensorPin& GetOutputTensorPin(LayerBindingId id) const
839  {
840  return GetTensorPin(id, m_OutputTensorPins, "output");
841  }
842 
843 private:
844 
845  std::vector<TensorPin> m_InputTensorPins;
846  std::vector<TensorPin> m_OutputTensorPins;
847 };
848 
849 }
850 
852  const OutputTensors& outputTensors,
853  std::vector<ImportedInputId> preImportedInputIds,
854  std::vector<ImportedOutputId> preImportedOutputIds)
855 {
856  const Graph& graph = m_OptimizedNetwork->pOptimizedNetworkImpl->GetGraph();
857 
858  // Walk graph to determine the order of execution.
859  if (graph.GetNumLayers() < 2)
860  {
861  ARMNN_LOG(warning) << "IRuntime::EnqueueWorkload()::Less than two nodes in graph";
862  return Status::Failure;
863  }
864 
865  // Data that must be kept alive for the entire execution of the workload.
866  WorkloadData workloadData(inputTensors, outputTensors);
867 
868  // Input tensors can be provided as parameters or pre imported. Either way the number of
869  // tensors should match the number of inputs.
870  if (graph.GetNumInputs() != (inputTensors.size() + preImportedInputIds.size()))
871  {
872  throw InvalidArgumentException("Number of inputs provided does not match network.");
873  }
874 
875  // For each input to the network, call EnqueueInput with the data passed by the user.
876  {
878  m_InputQueue.clear();
879  m_InputQueue.reserve(graph.GetNumInputs());
880 
881  unsigned int inputIndex = 0;
882  unsigned int importedInputIdIndex = 0;
883  std::sort(preImportedInputIds.begin(), preImportedInputIds.end());
884  for (const BindableLayer* inputLayer : graph.GetInputLayers())
885  {
886  if (importedInputIdIndex < preImportedInputIds.size() &&
887  inputIndex == preImportedInputIds[importedInputIdIndex])
888  {
889  // Only replace tensorhandles if they have not already been replaced
890  if (!m_IsInputImported[inputIndex])
891  {
892  auto outputTensorHandle = m_PreImportedInputHandles[inputIndex].m_TensorHandle.get();
893 
894  for (const auto& workloadInfo: m_InputWorkloadSlotPairs[inputLayer->GetBindingId()])
895  {
896  auto workload = m_WorkloadQueue[workloadInfo.m_WorkloadIndex].get();
897  workload->ReplaceInputTensorHandle(outputTensorHandle, workloadInfo.m_SlotIndex);
898  }
899  m_IsInputImported[inputIndex] = true;
900  }
901  importedInputIdIndex++;
902  }
903  else
904  {
905  if (m_IsInputImported[inputIndex])
906  {
907  OutputHandler& handler = const_cast<OutputHandler&>(inputLayer->GetOutputHandler(0));
908 
909  for (const auto& workloadInfo: m_InputWorkloadSlotPairs[inputLayer->GetBindingId()])
910  {
911  auto workload = m_WorkloadQueue[workloadInfo.m_WorkloadIndex].get();
912  workload->ReplaceInputTensorHandle(handler.GetData(), workloadInfo.m_SlotIndex);
913  }
914 
915  m_IsInputImported[inputIndex] = false;
916  }
917 
918  // InputTensorHandle is not imported yet, process to enqueue input
919  const TensorPin& pin = workloadData.GetInputTensorPin(inputLayer->GetBindingId());
920  EnqueueInput(*inputLayer, pin.GetTensorHandle(), pin.GetTensorInfo());
921  }
922  inputIndex++;
923  }
924  }
925  // For each output to the network, call EnqueueOutput with the data passed by the user.
926  {
928  m_OutputQueue.clear();
929  m_OutputQueue.reserve(graph.GetNumOutputs());
930 
931  if (preImportedOutputIds.size() > graph.GetNumOutputs())
932  {
933  throw InvalidArgumentException("Invalid number of preImportedOutputIds");
934  }
935 
936  unsigned int outputIndex = 0;
937  unsigned int importedOutputIdIndex = 0;
938  std::sort(preImportedOutputIds.begin(), preImportedOutputIds.end());
939  for (const BindableLayer* outputLayer : graph.GetOutputLayers())
940  {
941  if (importedOutputIdIndex < preImportedOutputIds.size() &&
942  outputIndex == preImportedOutputIds[importedOutputIdIndex])
943  {
944  // Only replace tensorhandles if they have not already been replaced
945  ITensorHandle* inputTensorHandle = m_PreImportedOutputHandles[outputIndex].m_TensorHandle.get();
946 
947  if (!m_IsOutputImported[outputIndex])
948  {
949  const auto bindingId = outputLayer->GetBindingId();
950  const auto& indices = m_OutputWorkloadSlotPairs[bindingId];
951 
952  auto outputWorkload = m_WorkloadQueue[indices.m_OutputSlotIndices.m_WorkloadIndex].get();
953 
954  outputWorkload->ReplaceOutputTensorHandle(inputTensorHandle,
955  indices.m_OutputSlotIndices.m_SlotIndex);
956 
957  for (const auto& workloadInfo: indices.m_InputSlotIndices)
958  {
959  auto inputWorkload = m_WorkloadQueue[workloadInfo.m_WorkloadIndex].get();
960  inputWorkload->ReplaceInputTensorHandle(inputTensorHandle, workloadInfo.m_SlotIndex);
961  }
962  m_IsOutputImported[outputIndex] = true;
963  }
964 
965  ARMNN_ASSERT_MSG(inputTensorHandle != nullptr, "Data should have been allocated.");
966  MemSyncQueueDescriptor syncDesc;
967  syncDesc.m_Inputs.push_back(inputTensorHandle);
969  info.m_InputTensorInfos.push_back(
970  outputLayer->GetInputSlot(0).GetTensorInfo());
971  auto syncWorkload = std::make_unique<SyncMemGenericWorkload>(syncDesc, info);
972  ARMNN_ASSERT_MSG(syncWorkload, "No sync workload created");
973  m_OutputQueue.push_back(std::move(syncWorkload));
974  importedOutputIdIndex++;
975  }
976  else
977  {
978  if (m_IsOutputImported[outputIndex])
979  {
980  const auto bindingId = outputLayer->GetBindingId();
981  const auto& indices = m_OutputWorkloadSlotPairs[bindingId];
982 
983  auto outputWorkload = m_WorkloadQueue[indices.m_OutputSlotIndices.m_WorkloadIndex].get();
984  const OutputHandler& outputHandler =
985  outputLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetOutputHandler();
986 
987  outputWorkload->ReplaceOutputTensorHandle(
988  outputHandler.GetData(), indices.m_OutputSlotIndices.m_SlotIndex);
989 
990  for (const auto& workloadInfo: indices.m_InputSlotIndices)
991  {
992  auto inputWorkload = m_WorkloadQueue[workloadInfo.m_WorkloadIndex].get();
993  inputWorkload->ReplaceInputTensorHandle(outputHandler.GetData(), workloadInfo.m_SlotIndex);
994  }
995  m_IsOutputImported[outputIndex] = false;
996  }
997 
998  const TensorPin& pin = workloadData.GetOutputTensorPin(outputLayer->GetBindingId());
999  // OutputTensorHandle is not imported yet, process to enqueue Output
1000  EnqueueOutput(*outputLayer, pin.GetTensorHandle(), pin.GetTensorInfo());
1001  }
1002  outputIndex++;
1003  }
1004  }
1005 
1006  std::unique_ptr<TimelineUtilityMethods> timelineUtils =
1007  TimelineUtilityMethods::GetTimelineUtils(*m_ProfilingService);
1008  ProfilingGuid inferenceGuid = m_ProfilingService->GetNextGuid();
1009  if (timelineUtils)
1010  {
1011  // Add inference timeline trace if profiling is enabled.
1012  ProfilingGuid networkGuid = m_OptimizedNetwork->GetGuid();
1013  timelineUtils->CreateTypedEntity(inferenceGuid, LabelsAndEventClasses::INFERENCE_GUID);
1014  timelineUtils->CreateRelationship(ProfilingRelationshipType::RetentionLink,
1015  networkGuid,
1016  inferenceGuid,
1017  LabelsAndEventClasses::EXECUTION_OF_GUID);
1018  timelineUtils->RecordEvent(inferenceGuid, LabelsAndEventClasses::ARMNN_PROFILING_SOL_EVENT_CLASS);
1019  }
1020 
1021  bool executionSucceeded = true;
1022 
1023  {
1024  if (m_ProfilingService->IsProfilingEnabled())
1025  {
1026  m_ProfilingService->IncrementCounterValue(INFERENCES_RUN);
1027  }
1029  ARMNN_SCOPED_HEAP_PROFILING("Executing");
1030  executionSucceeded = Execute(timelineUtils, inferenceGuid);
1031  }
1032 
1033  if (timelineUtils)
1034  {
1035  // Add end of life of the inference timeline if profiling is enabled.
1036  timelineUtils->RecordEvent(inferenceGuid, LabelsAndEventClasses::ARMNN_PROFILING_EOL_EVENT_CLASS);
1037  timelineUtils->Commit();
1038  }
1039 
1040  return executionSucceeded ? Status::Success : Status::Failure;
1041 }
1042 
1043 void LoadedNetwork::EnqueueInput(const BindableLayer& layer, ITensorHandle* tensorHandle, const TensorInfo& tensorInfo)
1044 {
1045  if (layer.GetType() != LayerType::Input)
1046  {
1047  throw InvalidArgumentException("EnqueueInput: given layer not an InputLayer");
1048  }
1049 
1050  if (tensorHandle == nullptr)
1051  {
1052  throw InvalidArgumentException("EnqueueInput: tensorHandle must not be NULL");
1053  }
1054 
1055  InputQueueDescriptor inputQueueDescriptor;
1056  WorkloadInfo info;
1057 
1058  inputQueueDescriptor.m_Inputs.push_back(tensorHandle);
1059  info.m_InputTensorInfos.push_back(tensorInfo);
1060 
1061  ARMNN_ASSERT_MSG(layer.GetNumOutputSlots() == 1, "Can only handle Input Layer with one output");
1062  const OutputHandler& handler = layer.GetOutputHandler();
1063  const TensorInfo& outputTensorInfo = handler.GetTensorInfo();
1064  ITensorHandle* outputTensorHandle = handler.GetData();
1065  ARMNN_ASSERT_MSG(outputTensorHandle != nullptr,
1066  "Data should have been allocated.");
1067  inputQueueDescriptor.m_Outputs.push_back(outputTensorHandle);
1068  info.m_OutputTensorInfos.push_back(outputTensorInfo);
1069 
1070  MemorySourceFlags importFlags = outputTensorHandle->GetImportFlags();
1071  bool needMemCopy = true;
1072  if ((m_NetworkProperties.m_InputSource != MemorySource::Undefined)) // Try import the input tensor
1073  {
1074  if(CheckFlag(importFlags, m_NetworkProperties.m_InputSource))
1075  {
1076  needMemCopy = false;
1077  // This assumes a CPU Tensor handle
1078  void* mem = tensorHandle->Map(false);
1079  if (outputTensorHandle->Import(mem, m_NetworkProperties.m_InputSource))
1080  {
1081  tensorHandle->Unmap();
1082  return; // No need for a workload since the import has been done.
1083  }
1084  tensorHandle->Unmap();
1085  throw MemoryImportException("EnqueueInput: Memory Import failed");
1086  }
1087  }
1088  if (needMemCopy)
1089  {
1090  // Create a mem copy workload for input since we did not import
1091  std::unique_ptr<IWorkload> inputWorkload = std::make_unique<CopyMemGenericWorkload>(inputQueueDescriptor, info);
1092 
1093  ARMNN_ASSERT_MSG(inputWorkload, "No input workload created");
1094 
1095  std::unique_ptr<TimelineUtilityMethods> timelineUtils =
1096  TimelineUtilityMethods::GetTimelineUtils(*m_ProfilingService);
1097  if (timelineUtils)
1098  {
1099  // Add Input Workload to the post-optimisation network structure
1100  AddWorkloadStructure(timelineUtils, inputWorkload, layer);
1101  timelineUtils->Commit();
1102  }
1103 
1104  m_InputQueue.push_back(std::move(inputWorkload));
1105  }
1106 }
1107 
1108 void LoadedNetwork::EnqueueOutput(const BindableLayer& layer, ITensorHandle* tensorHandle, const TensorInfo& tensorInfo)
1109 {
1110  if (layer.GetType() != LayerType::Output)
1111  {
1112  throw InvalidArgumentException("EnqueueOutput: given layer not an OutputLayer");
1113  }
1114 
1115  if (tensorHandle == nullptr)
1116  {
1117  throw InvalidArgumentException("EnqueueOutput: tensorHandle must not be NULL");
1118  }
1119 
1120  OutputQueueDescriptor outputQueueDescriptor;
1121  WorkloadInfo info;
1122 
1123  outputQueueDescriptor.m_Outputs.push_back(tensorHandle);
1124  info.m_OutputTensorInfos.push_back(tensorInfo);
1125 
1126  ARMNN_ASSERT_MSG(layer.GetNumInputSlots() == 1, "Output Layer should have exactly one input.");
1127 
1128  // Gets the output handler from the previous node.
1129  const OutputHandler& outputHandler = layer.GetInputSlots()[0].GetConnectedOutputSlot()->GetOutputHandler();
1130 
1131  const TensorInfo& inputTensorInfo = outputHandler.GetTensorInfo();
1132  ITensorHandle* inputTensorHandle = outputHandler.GetData();
1133  ARMNN_ASSERT_MSG(inputTensorHandle != nullptr, "Data should have been allocated.");
1134 
1135  // Try import the output tensor.
1136  // Note: We can only import the output pointer if all of the following hold true:
1137  // a) The imported pointer is aligned sufficiently
1138  // b) The tensor has zero padding
1139  // c) There is only one connection to the OutputSlot and it is to an OutputLayer.
1140  // d) The output pointer is allocated via malloc. (Other types will be supported in a later release)
1141  // e) m_NetworkProperties.m_OutputSource != MemorySource::Undefined
1142  bool needMemCopy = true;
1143  if (m_NetworkProperties.m_OutputSource != MemorySource::Undefined &&
1144  (layer.GetInputSlots()[0].GetConnectedOutputSlot()->GetNumConnections() == 1))
1145  {
1146  if(layer.GetInputSlots()[0].GetConnectedOutputSlot()->GetOwningLayer().GetType() != LayerType::Input)
1147  {
1148  MemorySourceFlags importFlags = inputTensorHandle->GetImportFlags();
1149  if (CheckFlag(importFlags, m_NetworkProperties.m_OutputSource))
1150  {
1151  needMemCopy = false;
1152  void *mem = tensorHandle->Map(false);
1153  bool importOk = inputTensorHandle->Import(mem, m_NetworkProperties.m_OutputSource);
1154  tensorHandle->Unmap();
1155 
1156  if (importOk)
1157  {
1158  // Insert synchronization workload
1159  MemSyncQueueDescriptor syncDesc;
1160  syncDesc.m_Inputs.push_back(inputTensorHandle);
1161  info.m_InputTensorInfos.push_back(inputTensorInfo);
1162  auto syncWorkload = std::make_unique<SyncMemGenericWorkload>(syncDesc, info);
1163  ARMNN_ASSERT_MSG(syncWorkload, "No sync workload created");
1164  m_OutputQueue.push_back(std::move(syncWorkload));
1165  }
1166  else
1167  {
1168  throw MemoryExportException("EnqueueOutput: Memory Export failed");
1169  }
1170  }
1171  }
1172  }
1173  if (needMemCopy)
1174  {
1175  // If we got here then we didn't export the memory, so add an output workload which performs a memcopy.
1176  outputQueueDescriptor.m_Inputs.push_back(inputTensorHandle);
1177  info.m_InputTensorInfos.push_back(inputTensorInfo);
1178 
1179  std::unique_ptr<IWorkload> outputWorkload =
1180  std::make_unique<CopyMemGenericWorkload>(outputQueueDescriptor, info);
1181  ARMNN_ASSERT_MSG(outputWorkload, "No output workload created");
1182 
1183  std::unique_ptr<TimelineUtilityMethods> timelineUtils =
1184  TimelineUtilityMethods::GetTimelineUtils(*m_ProfilingService);
1185  if (timelineUtils)
1186  {
1187  // Add Output Workload to the post-optimisation network structure
1188  AddWorkloadStructure(timelineUtils, outputWorkload, layer);
1189  timelineUtils->Commit();
1190  }
1191 
1192  m_OutputQueue.push_back(std::move(outputWorkload));
1193  }
1194 }
1195 
1196 void LoadedNetwork::AllocateWorkingMemory(
1197 #if !defined(ARMNN_DISABLE_THREADS)
1198  std::lock_guard<std::mutex>& lock
1199 #endif
1200  )
1201 {
1202  ARMNN_SCOPED_PROFILING_EVENT(Compute::Undefined, "Working Memory Allocation");
1203 
1204 #if !defined(ARMNN_DISABLE_THREADS)
1205  // this unused parameter makes sure we can only call this function with a valid lock
1206  IgnoreUnused(lock);
1207 #endif
1208  if (m_IsWorkingMemAllocated)
1209  {
1210  return;
1211  }
1212 
1213  if (m_ExternalMemoryManager)
1214  {
1215  m_ExternalMemoryManager->Allocate();
1216 
1217  for (unsigned int i = 0; i < m_TensorMemory.size(); ++i)
1218  {
1219  m_Tensorhandles[i]->Import(m_TensorMemory[i].first->m_Data, m_TensorMemory[i].second);
1220  }
1221  }
1222 
1223  for (auto&& memoryManager : m_BackendMemoryMangers)
1224  {
1225  if (memoryManager)
1226  {
1227  memoryManager->Acquire();
1228  }
1229  }
1230  m_TensorHandleFactoryRegistry.AquireMemory();
1231  m_IsWorkingMemAllocated = true;
1232 }
1233 
1235 {
1236 #if !defined(ARMNN_DISABLE_THREADS)
1237  std::lock_guard<std::mutex> lockGuard(m_WorkingMemMutex);
1238 #endif
1239 
1240  if (!m_IsWorkingMemAllocated)
1241  {
1242  return;
1243  }
1244 
1245  if (m_ExternalMemoryManager)
1246  {
1247  m_ExternalMemoryManager->Deallocate();
1248  }
1249 
1250  // Informs the memory managers to release memory in its respective memory group
1251  for (auto&& memoryManager : m_BackendMemoryMangers)
1252  {
1253  if (memoryManager)
1254  {
1255  memoryManager->Release();
1256  }
1257  }
1258  m_TensorHandleFactoryRegistry.ReleaseMemory();
1259  m_IsWorkingMemAllocated = false;
1260 }
1261 
1262 bool LoadedNetwork::Execute(std::unique_ptr<TimelineUtilityMethods>& timelineUtils,
1263  ProfilingGuid inferenceGuid)
1264 {
1265  bool success = true;
1266 
1267  auto Fail = [&](const std::exception& error)
1268  {
1269  ARMNN_LOG(error) << "An error occurred attempting to execute a workload: " << error.what();
1270  success = false;
1271  };
1272 
1273  try
1274  {
1275 #if !defined(ARMNN_DISABLE_THREADS)
1276  std::lock_guard<std::mutex> lockGuard(m_WorkingMemMutex);
1277  AllocateWorkingMemory(lockGuard);
1278 #else
1279  AllocateWorkingMemory();
1280 #endif
1281 
1282  ProfilingDynamicGuid workloadInferenceID(0);
1283  auto ExecuteQueue = [&timelineUtils, &workloadInferenceID, &inferenceGuid](WorkloadQueue& queue)
1284  {
1285  for (auto& workload : queue)
1286  {
1287  if(timelineUtils)
1288  {
1289  workloadInferenceID = timelineUtils->RecordWorkloadInferenceAndStartOfLifeEvent(workload->GetGuid(),
1290  inferenceGuid);
1291  }
1292  workload->Execute();
1293  if(timelineUtils)
1294  {
1295  timelineUtils->RecordEndOfLifeEvent(workloadInferenceID);
1296  }
1297  }
1298  };
1299 
1300  ExecuteQueue(m_InputQueue);
1301  ExecuteQueue(m_WorkloadQueue);
1302  ExecuteQueue(m_OutputQueue);
1303  }
1304  catch (const RuntimeException& error)
1305  {
1306  Fail(error);
1307  }
1308  catch (const std::runtime_error& error)
1309  {
1310  Fail(error);
1311  }
1312 
1313  return success;
1314 }
1315 
1316 void LoadedNetwork::EnqueueInput(const ConstTensor& inputTensor, ITensorHandle* inputTensorHandle)
1317 {
1318  if (m_NetworkProperties.m_InputSource != MemorySource::Undefined) // Try import the input tensor
1319  {
1320  MemorySourceFlags importFlags = inputTensorHandle->GetImportFlags();
1321  if (CheckFlag(importFlags, m_NetworkProperties.m_InputSource) )
1322  {
1323  std::unique_ptr<ITensorHandle> tensorHandle =
1324  std::make_unique<ConstPassthroughTensorHandle>(inputTensor.GetInfo(),
1325  inputTensor.GetMemoryArea());
1326  void* mem = tensorHandle->Map(false);
1327 
1328  if (inputTensorHandle->Import(mem, m_NetworkProperties.m_InputSource))
1329  {
1330  tensorHandle->Unmap();
1331  return;
1332  }
1333  tensorHandle->Unmap();
1334  throw MemoryImportException("EnqueueInput: Memory Import failed");
1335  }
1336  else
1337  {
1338  throw MemoryImportException("EnqueueInput: Memory Import failed, backend does not support Import");
1339  }
1340  }
1341  else
1342  {
1344  std::unique_ptr<ITensorHandle> tensorHandle =
1345  std::make_unique<ConstPassthroughTensorHandle>(inputTensor.GetInfo(), inputTensor.GetMemoryArea());
1346 
1347  auto copyFunc = [](void* dst, const void* src, size_t size)
1348  {
1349  memcpy(dst, src, size);
1350  };
1351 
1352  CopyTensorContentsGeneric(tensorHandle.get(), inputTensorHandle, copyFunc);
1353  }
1354 }
1355 
1356 // Note: We can only import the output pointer if all of the following hold true:
1357 // a) The imported pointer is aligned sufficiently
1358 // b) The tensor has zero padding
1359 // c) There is only one connection to the OutputSlot and it is to an OutputLayer.
1360 // d) The output pointer is allocated via malloc. (Other types will be supported in a later release)
1361 // e) m_IsExportEnabled must be set to true
1362 void LoadedNetwork::ImportOutputTensor(const Tensor& outputTensor, ITensorHandle* outputTensorHandle)
1363 {
1364  ARMNN_ASSERT_MSG(outputTensorHandle != nullptr, "Data should have been allocated.");
1365  MemorySourceFlags importFlags = outputTensorHandle->GetImportFlags();
1366  if (CheckFlag(importFlags, m_NetworkProperties.m_OutputSource))
1367  {
1368  std::unique_ptr<ITensorHandle> tensorHandle =
1369  std::make_unique<PassthroughTensorHandle>(outputTensor.GetInfo(),
1370  outputTensor.GetMemoryArea());
1371 
1372  void* mem = tensorHandle->Map(false);
1373  bool importOk = outputTensorHandle->Import(mem, m_NetworkProperties.m_OutputSource);
1374  tensorHandle->Unmap();
1375 
1376  if (!importOk)
1377  {
1378  throw MemoryExportException("ImportOutputTensor: Memory Export failed");
1379  }
1380  }
1381  else
1382  {
1383  throw MemoryExportException("ImportOutputTensor: Memory Export failed, attempting to export Input Layer");
1384  }
1385 
1386 }
1387 
1388 void CopyToOutputTensor(const Tensor& outputTensor, ITensorHandle* outputTensorHandle)
1389 {
1391  auto copyFunc = [](void* dst, const void* src, size_t size)
1392  {
1393  memcpy(dst, src, size);
1394  };
1395 
1396  std::unique_ptr<ITensorHandle> tensorHandle =
1397  std::make_unique<PassthroughTensorHandle>(outputTensor.GetInfo(),
1398  outputTensor.GetMemoryArea());
1399 
1400  CopyTensorContentsGeneric(outputTensorHandle, tensorHandle.get(), copyFunc);
1401 }
1402 
1403 
1404 const armnn::ConstTensor GetInputTensor(const LayerBindingId layerId, const InputTensors& inputTensors)
1405 {
1406  for (auto inputTensorPair : inputTensors)
1407  {
1408  LayerBindingId id = inputTensorPair.first;
1409  if (id == layerId)
1410  {
1411  return inputTensorPair.second;
1412  }
1413  }
1414  throw InvalidArgumentException("Input does not exist.");
1415 }
1416 
1417 const armnn::Tensor GetOutputTensor(const LayerBindingId layerId, const OutputTensors& outputTensors)
1418 {
1419  for (auto outputTensorPair : outputTensors)
1420  {
1421  LayerBindingId id = outputTensorPair.first;
1422  if (id == layerId)
1423  {
1424  return outputTensorPair.second;
1425  }
1426  }
1427  throw InvalidArgumentException("Output does not exist.");
1428 }
1429 
1430 std::vector<ImportedInputId> LoadedNetwork::ImportInputs(const InputTensors& inputTensors,
1431  MemorySource forceImportMemorySource)
1432 {
1433  if (!m_NetworkProperties.m_AsyncEnabled)
1434  {
1435  // Cannot import if import is not enabled and forceImportMemorySource is undefined
1436  if (forceImportMemorySource == MemorySource::Undefined)
1437  {
1438  throw MemoryImportException("ImportInputs: Memory Import failed, NetworkProperties.m_ImportEnabled");
1439  }
1440  // The number of pre imported tensors should not exceed the number of inputs.
1441  if (inputTensors.size() > m_OptimizedNetwork->pOptimizedNetworkImpl->GetGraph().GetNumInputs())
1442  {
1443  throw MemoryImportException("ImportInputs: The number of tensors provided exceeds the number of inputs.");
1444  }
1445 
1446  std::vector<ImportedInputId> importedInputs;
1447  Graph& graph = m_OptimizedNetwork->pOptimizedNetworkImpl->GetGraph().TopologicalSort();
1448  unsigned int inputIndex = 0;
1449  for (const BindableLayer* inputLayer : graph.GetInputLayers())
1450  {
1451  auto outputTensorHandle = m_PreImportedInputHandles[inputIndex].m_TensorHandle.get();
1452 
1453  if (!outputTensorHandle)
1454  {
1455  inputIndex++;
1456  continue;
1457  }
1458 
1459  auto layerBindingId = inputLayer->GetBindingId();
1460  auto it = std::find_if(inputTensors.begin(), inputTensors.end(), [=](const auto& inputTensor)
1461  {
1462  return inputTensor.first == layerBindingId;
1463  });
1464 
1465  if (it == inputTensors.end())
1466  {
1467  inputIndex++;
1468  continue;
1469  }
1470 
1471  const auto& inputTensor = *it;
1472  std::unique_ptr<ITensorHandle> passThroughTensorHandle =
1473  std::make_unique<ConstPassthroughTensorHandle>(inputTensor.second.GetInfo(),
1474  inputTensor.second.GetMemoryArea());
1475 
1476  try
1477  {
1478  if (outputTensorHandle->CanBeImported(passThroughTensorHandle->Map(), forceImportMemorySource)
1479  && (outputTensorHandle->Import(passThroughTensorHandle->Map(), forceImportMemorySource)))
1480  {
1481  importedInputs.push_back(inputIndex);
1482  }
1483  passThroughTensorHandle->Unmap();
1484  }
1485  catch(const MemoryImportException& exception)
1486  {
1487  ARMNN_LOG(error) << "An error occurred attempting to import input_"
1488  << inputIndex << " : " << exception.what();
1489  passThroughTensorHandle->Unmap();
1490  }
1491  inputIndex++;
1492  }
1493 
1494  return importedInputs;
1495  }
1496  else
1497  {
1498  // Import when the import of network properties is enabled
1499  std::vector<ImportedInputId> importedInputs;
1500  Graph& graph = m_OptimizedNetwork->pOptimizedNetworkImpl->GetGraph().TopologicalSort();
1501 
1502  for (auto inputTensor : inputTensors)
1503  {
1504  auto layerBindingId = inputTensor.first;
1505  auto it = std::find_if(graph.GetInputLayers().begin(), graph.GetInputLayers().end(), [=](auto* layer)
1506  {
1507  return layer->GetBindingId() == layerBindingId;
1508  });
1509 
1510  if (it == graph.GetInputLayers().end())
1511  {
1512  throw MemoryImportException(fmt::format(
1513  "ImportInputs: Memory Import failed, unknown LayerBindingId: {}", layerBindingId));
1514  }
1515 
1516  const Layer* layer = *it;
1517  if (layer->GetType() != LayerType::Input)
1518  {
1519  throw InvalidArgumentException("ImportInputs: given layer not an InputLayer");
1520  }
1521 
1522  auto& backend = m_Backends.at(layer->GetBackendId());
1523  if (!HasMatchingCapability(BackendOptions::BackendOption{"PreImportIOTensors", true},
1524  backend->GetCapabilities()))
1525  {
1526  std::string er = backend->GetId();
1527  er += " does not have PreImportIOTensors capability";
1528  throw BackendCapabilityException(er);
1529  }
1530 
1531  const OutputSlot& outputSlot = layer->GetOutputSlots()[0];
1532 
1534  const TensorInfo& tensorInfo = outputSlot.GetTensorInfo();
1535 
1536  ITensorHandleFactory* handleFactory = m_TensorHandleFactoryRegistry.GetFactory(factoryId);
1537  ARMNN_ASSERT(handleFactory);
1538 
1539  ImportedTensorHandlePin importedTensorHandlePin{layerBindingId,
1540  handleFactory->CreateTensorHandle(tensorInfo, false)};
1541 
1542  ITensorHandle* tensorHandle = importedTensorHandlePin.m_TensorHandle.get();
1543 
1544  if (!CheckFlag(tensorHandle->GetImportFlags(), forceImportMemorySource))
1545  {
1546  throw MemoryImportException(
1547  fmt::format("ImportInputs: Memory Import failed, backend: "
1548  "{} does not support importing from source {}"
1549  , factoryId, m_NetworkProperties.m_InputSource));
1550  }
1551 
1552  std::unique_ptr<ITensorHandle> passThroughTensorHandle =
1553  std::make_unique<ConstPassthroughTensorHandle>(inputTensor.second.GetInfo(),
1554  inputTensor.second.GetMemoryArea());
1555 
1556  if (tensorHandle->Import(passThroughTensorHandle->Map(), forceImportMemorySource))
1557  {
1558  importedInputs.push_back(m_CurImportedInputId++);
1559  passThroughTensorHandle->Unmap();
1560  }
1561  else
1562  {
1563  passThroughTensorHandle->Unmap();
1564  throw MemoryImportException("ImportInputs: Memory Import failed");
1565  }
1566 
1567  m_PreImportedInputHandles.push_back(std::move(importedTensorHandlePin));
1568  }
1569  return importedInputs;
1570  }
1571 }
1572 
1573 std::vector<ImportedOutputId> LoadedNetwork::ImportOutputs(const OutputTensors& outputTensors,
1574  MemorySource forceImportMemorySource)
1575 {
1576  if (!m_NetworkProperties.m_AsyncEnabled)
1577  {
1578  // Cannot import if import is not enabled and forceImportMemorySource is undefined
1579  if (forceImportMemorySource == MemorySource::Undefined)
1580  {
1581  throw MemoryImportException("ImportOutputs: Memory Import failed, NetworkProperties.m_ImportEnabled");
1582  }
1583  // If forceImportMemorySource is defined, try import if memory is aligned
1584  if (outputTensors.size() != m_OptimizedNetwork->pOptimizedNetworkImpl->GetGraph().GetNumOutputs())
1585  {
1586  throw MemoryImportException("ImportOutputs: Force Import failed, incorrect number of tensors");
1587  }
1588  std::vector<ImportedOutputId> importedOutputs;
1589  Graph& graph = m_OptimizedNetwork->pOptimizedNetworkImpl->GetGraph().TopologicalSort();
1590 
1591  unsigned int outputIndex = 0;
1592  for (const BindableLayer* const outputLayer : graph.GetOutputLayers())
1593  {
1594  auto inputTensorHandle = m_PreImportedOutputHandles[outputIndex].m_TensorHandle.get();
1595  if (!inputTensorHandle)
1596  {
1597  outputIndex++;
1598  continue;
1599  }
1600 
1601  auto layerBindingId = outputLayer->GetBindingId();
1602  auto it = std::find_if(outputTensors.begin(), outputTensors.end(), [=] (const auto& outputTensor)
1603  {
1604  return outputTensor.first == layerBindingId;
1605  });
1606 
1607  if (it == outputTensors.end())
1608  {
1609  outputIndex++;
1610  continue;
1611  }
1612 
1613  const auto outputTensor = *it;
1614  try
1615  {
1616  // Check if the output memory can be imported
1617  if (inputTensorHandle->CanBeImported(outputTensor.second.GetMemoryArea(), forceImportMemorySource)
1618  && inputTensorHandle->Import(outputTensor.second.GetMemoryArea(), forceImportMemorySource))
1619  {
1620  importedOutputs.push_back(outputIndex);
1621  }
1622  }
1623  catch(const MemoryImportException& exception)
1624  {
1625  ARMNN_LOG(error) << "An error occurred attempting to import output_"
1626  << outputIndex << " : " << exception.what();
1627  }
1628  outputIndex++;
1629  }
1630  return importedOutputs;
1631  }
1632 
1633  std::vector<ImportedOutputId> importedOutputs;
1634  Graph& graph = m_OptimizedNetwork->pOptimizedNetworkImpl->GetGraph().TopologicalSort();
1635 
1636  for (const auto& outputTensor : outputTensors)
1637  {
1638  auto layerBindingId = outputTensor.first;
1639  auto it = std::find_if(graph.GetOutputLayers().begin(), graph.GetOutputLayers().end(), [=](auto* layer)
1640  {
1641  return layer->GetBindingId() == layerBindingId;
1642  });
1643 
1644  if (it == graph.GetOutputLayers().end())
1645  {
1646  throw MemoryImportException(fmt::format("ImportOutputs: Memory Import failed, unknown LayerBindingId: {}",
1647  layerBindingId));
1648  }
1649 
1650  const Layer* layer = *it;
1651  if (layer->GetType() != LayerType::Output)
1652  {
1653  throw InvalidArgumentException("ImportOutputs: given layer not an OutputLayer");
1654  }
1655 
1656  auto& backend = m_Backends.at(layer->GetBackendId());
1657  if (!HasMatchingCapability(BackendOptions::BackendOption{"PreImportIOTensors", true},
1658  backend->GetCapabilities()))
1659  {
1660  std::string er = backend->GetId();
1661  er += " does not have PreImportIOTensors capability";
1662  throw BackendCapabilityException(er);
1663  }
1664 
1665  const InputSlot& inputSlot = layer->GetInputSlots()[0];
1667  const TensorInfo& tensorInfo = inputSlot.GetTensorInfo();
1668 
1669  ITensorHandleFactory* handleFactory = m_TensorHandleFactoryRegistry.GetFactory(factoryId);
1670  ARMNN_ASSERT(handleFactory);
1671 
1672  ImportedTensorHandlePin importedTensorHandlePin{layerBindingId,
1673  handleFactory->CreateTensorHandle(tensorInfo, false)};
1674 
1675  ITensorHandle* tensorHandle = importedTensorHandlePin.m_TensorHandle.get();
1676 
1677  if (!CheckFlag(tensorHandle->GetImportFlags(), forceImportMemorySource))
1678  {
1679  throw MemoryImportException(fmt::format("ImportInputs: Memory Import failed, backend: "
1680  "{} does not support importing from source {}"
1681  , factoryId, forceImportMemorySource));
1682  }
1683 
1684  if (tensorHandle->Import(outputTensor.second.GetMemoryArea(), forceImportMemorySource))
1685  {
1686  importedOutputs.push_back(m_CurImportedOutputId++);
1687  }
1688  else
1689  {
1690  throw MemoryImportException("ImportInputs: Memory Import failed");
1691  }
1692 
1693  m_PreImportedOutputHandles.push_back(std::move(importedTensorHandlePin));
1694  }
1695 
1696  return importedOutputs;
1697 }
1698 
1699 void LoadedNetwork::ClearImportedInputs(const std::vector<ImportedInputId> inputIds)
1700 {
1701  for (auto id : inputIds)
1702  {
1703  if (id > m_PreImportedInputHandles.size())
1704  {
1705  throw InvalidArgumentException(fmt::format("ClearImportedInputs::Unknown ImportedInputId: {}", id));
1706  }
1707 
1708  auto& importedTensorHandle = m_PreImportedInputHandles[id].m_TensorHandle;
1709  if (!importedTensorHandle)
1710  {
1712  fmt::format("ClearImportedInputs::ImportedInput with id: {} has already been deleted", id));
1713  }
1714  // Call Unimport then destroy the tensorHandle
1715  importedTensorHandle->Unimport();
1716  importedTensorHandle = {};
1717  }
1718 }
1719 
1720 void LoadedNetwork::ClearImportedOutputs(const std::vector<ImportedOutputId> outputIds)
1721 {
1722  for (auto id : outputIds)
1723  {
1724  if (id > m_PreImportedOutputHandles.size())
1725  {
1726  throw InvalidArgumentException(fmt::format("ClearImportedOutputs::Unknown ImportedOutputId: {}", id));
1727  }
1728 
1729  auto& importedTensorHandle = m_PreImportedOutputHandles[id].m_TensorHandle;
1730  if (!importedTensorHandle)
1731  {
1733  fmt::format("ClearImportedOutputs::ImportedOutput with id: {} has already been deleted", id));
1734  }
1735  // Call Unimport then destroy the tensorHandle
1736  importedTensorHandle->Unimport();
1737  importedTensorHandle = {};
1738  }
1739 }
1740 
1742  const OutputTensors& outputTensors,
1743  IWorkingMemHandle& iWorkingMemHandle,
1744  std::vector<ImportedInputId> preImportedInputs,
1745  std::vector<ImportedOutputId> preImportedOutputs)
1746 {
1747  const Graph& graph = m_OptimizedNetwork->pOptimizedNetworkImpl->GetGraph();
1748 
1749  if (inputTensors.size() + preImportedInputs.size() != graph.GetNumInputs())
1750  {
1751  if (preImportedInputs.empty())
1752  {
1753  throw InvalidArgumentException("LoadedNetwork::Execute: Number of inputs provided does not match network.");
1754  }
1755  else
1756  {
1757  throw InvalidArgumentException("LoadedNetwork::Execute: "
1758  "Number of inputs + preImportedInputs provided does not match network.");
1759  }
1760  }
1761 
1762  if (outputTensors.size() + preImportedOutputs.size() != graph.GetNumOutputs())
1763  {
1764  if (preImportedOutputs.empty())
1765  {
1766  throw InvalidArgumentException("LoadedNetwork::Execute: "
1767  "Number of outputs provided does not match network.");
1768  }
1769  else
1770  {
1771  throw InvalidArgumentException("LoadedNetwork::Execute: "
1772  "Number of outputs + preImportedOutputs provided does not match network.");
1773  }
1774  }
1775 
1776  WorkingMemHandle& workingMemHandle = dynamic_cast<WorkingMemHandle&>(iWorkingMemHandle);
1777  // Collect all the given LayerBindingIds and check them for duplicates and unknowns.
1778  std::vector<LayerBindingId>& bindingIds = workingMemHandle.GetBindingIdVector();
1779  unsigned int index = 0;
1780  for (auto pair : inputTensors)
1781  {
1782  bindingIds[index++] = pair.first;
1783  }
1784  for (ImportedInputId id : preImportedInputs)
1785  {
1786  bindingIds[index++] = ValidateImportedInputID(id);
1787  }
1788  for (auto pair : outputTensors)
1789  {
1790  bindingIds[index++] = pair.first;
1791  }
1792  for (ImportedOutputId id : preImportedOutputs)
1793  {
1794  bindingIds[index++] = ValidateImportedOutputID(id);
1795  }
1796 
1797  workingMemHandle.ValidateBindingIds();
1798 
1799  auto resetMemHandle = [&]()
1800  {
1801  for (ImportedInputId id: preImportedInputs)
1802  {
1803  const LayerBindingId layerBindingId = m_PreImportedInputHandles[id].m_LayerBindingId;
1804 
1805  auto inputHandle = workingMemHandle.GetInputHandle(layerBindingId);
1806  auto inputConnections = workingMemHandle.GetInputConnections(layerBindingId);
1807  for (auto it : inputConnections)
1808  {
1809  *it = inputHandle;
1810  }
1811  }
1812 
1813  for (ImportedOutputId id: preImportedOutputs)
1814  {
1815  const LayerBindingId layerBindingId = m_PreImportedOutputHandles[id].m_LayerBindingId;
1816 
1817  auto outputHandle = workingMemHandle.GetOutputHandle(layerBindingId);
1818  auto outputConnections = workingMemHandle.GetOutputConnection(layerBindingId);
1819 
1820  for (auto it : outputConnections)
1821  {
1822  *it = outputHandle;
1823  }
1824  }
1825  };
1826 
1827  std::unique_ptr<TimelineUtilityMethods> timelineUtils =
1828  TimelineUtilityMethods::GetTimelineUtils(*m_ProfilingService);
1829  ProfilingGuid inferenceGuid = m_ProfilingService->GetNextGuid();
1830  if (timelineUtils)
1831  {
1832  // Add inference timeline trace if profiling is enabled.
1833  ProfilingGuid networkGuid = m_OptimizedNetwork->GetGuid();
1834  timelineUtils->CreateTypedEntity(inferenceGuid,LabelsAndEventClasses::INFERENCE_GUID);
1835  timelineUtils->CreateRelationship(ProfilingRelationshipType::RetentionLink,
1836  networkGuid,
1837  inferenceGuid,
1838  LabelsAndEventClasses::EXECUTION_OF_GUID);
1839  timelineUtils->RecordEvent(inferenceGuid,LabelsAndEventClasses::ARMNN_PROFILING_SOL_EVENT_CLASS);
1840  }
1841 
1842  bool executionSucceeded = true;
1843 
1844  if (timelineUtils)
1845  {
1846  // Add end of life of the inference timeline if profiling is enabled.
1847  timelineUtils->RecordEvent(inferenceGuid,LabelsAndEventClasses::ARMNN_PROFILING_EOL_EVENT_CLASS);
1848  timelineUtils->Commit();
1849  }
1850 
1851  if (!workingMemHandle.IsAllocated())
1852  {
1853  workingMemHandle.Allocate();
1854  }
1855 
1856  {
1858  for (auto pair : inputTensors)
1859  {
1860  EnqueueInput(pair.second, workingMemHandle.GetInputHandle(pair.first));
1861  }
1862 
1863  // Swap in the pre-imported inputs if any
1864  for (ImportedInputId id : preImportedInputs)
1865  {
1866  const ImportedTensorHandlePin& importedInputPin = m_PreImportedInputHandles[id];
1867  const LayerBindingId layerBindingId = m_PreImportedInputHandles[id].m_LayerBindingId;
1868  const auto& preimportedHandle = importedInputPin.m_TensorHandle;
1869 
1870  auto inputConnections = workingMemHandle.GetInputConnections(layerBindingId);
1871  for (auto it : inputConnections)
1872  {
1873  *it = preimportedHandle.get();
1874  }
1875  }
1876  }
1877  {
1879  if (m_NetworkProperties.m_OutputSource != MemorySource::Undefined)
1880  {
1881  for (auto pair: outputTensors)
1882  {
1883  ImportOutputTensor(pair.second, workingMemHandle.GetOutputHandle(pair.first));
1884  }
1885  }
1886 
1887  for (ImportedOutputId id : preImportedOutputs)
1888  {
1889  const ImportedTensorHandlePin& importedOutputPin = m_PreImportedOutputHandles[id];
1890  const LayerBindingId layerBindingId = m_PreImportedOutputHandles[id].m_LayerBindingId;
1891  const auto& preimportedHandle = importedOutputPin.m_TensorHandle;
1892 
1893  auto outputConnections = workingMemHandle.GetOutputConnection(layerBindingId);
1894  for (auto it : outputConnections)
1895  {
1896  *it = preimportedHandle.get();
1897  }
1898  }
1899  }
1900 
1901  auto Fail = [&](const std::exception& error)
1902  {
1903  ARMNN_LOG(error) << "An error occurred attempting to execute a workload: " << error.what();
1904  executionSucceeded = false;
1905  };
1906  ProfilingDynamicGuid workloadInferenceID(0);
1907 
1908  try
1909  {
1910  for (unsigned int i = 0; i < m_WorkloadQueue.size(); ++i)
1911  {
1912  auto& workload = m_WorkloadQueue[i];
1913  if (timelineUtils)
1914  {
1915  workloadInferenceID = timelineUtils->RecordWorkloadInferenceAndStartOfLifeEvent(workload->GetGuid(),
1916  inferenceGuid);
1917  }
1918 
1919  workload->ExecuteAsync(workingMemHandle.GetExecutionDataAt(i).second);
1920 
1921  if (timelineUtils)
1922  {
1923  timelineUtils->RecordEndOfLifeEvent(workloadInferenceID);
1924  }
1925  }
1926  }
1927  catch (const RuntimeException& error)
1928  {
1929  resetMemHandle();
1930  Fail(error);
1931  }
1932  catch (const std::runtime_error& error)
1933  {
1934  resetMemHandle();
1935  Fail(error);
1936  }
1937  catch (...)
1938  {
1939  resetMemHandle();
1940  throw;
1941  }
1942 
1943  if (m_NetworkProperties.m_OutputSource == MemorySource::Undefined)
1944  {
1945  for (auto pair: outputTensors)
1946  {
1947  CopyToOutputTensor(pair.second, workingMemHandle.GetOutputHandle(pair.first));
1948  }
1949  }
1950  else
1951  {
1952  ARMNN_SCOPED_PROFILING_EVENT(Compute::Undefined, "SyncMemGeneric_Execute");
1953  workingMemHandle.MemSyncOutputs();
1954  }
1955 
1956  resetMemHandle();
1957 
1958  return executionSucceeded ? Status::Success : Status::Failure;
1959 }
1960 
1961 /// Create a new unique WorkingMemHandle object. Create multiple handles if you wish to have
1962 /// overlapped Execution by calling this function from different threads.
1963 std::unique_ptr<IWorkingMemHandle> LoadedNetwork::CreateWorkingMemHandle(NetworkId networkId)
1964 {
1965  Graph& order = m_OptimizedNetwork->pOptimizedNetworkImpl->GetGraph();
1966 
1967  // Tensors that will need to be allocated internally within armnn
1968  std::vector<std::unique_ptr<ITensorHandle>> managedTensorHandles;
1969  // Tensors that will be allocated externally by the user
1970  std::vector<std::unique_ptr<ITensorHandle>> unmanagedTensorHandles;
1971 
1972  std::vector<WorkingMemDescriptor> workingMemDescriptors;
1973  std::vector<std::pair<BackendId, ExecutionData>> executionDataVec;
1974 
1975  auto GetTensorHandle = [&](Layer* layer, const OutputSlot& outputSlot)
1976  {
1977  ITensorHandleFactory::FactoryId factoryId = outputSlot.GetTensorHandleFactoryId();
1978  const TensorInfo& tensorInfo = outputSlot.GetTensorInfo();
1979 
1980  if (factoryId == ITensorHandleFactory::LegacyFactoryId)
1981  {
1982  BackendId id = layer->GetBackendId();
1984  return m_WorkloadFactories.at(id)->CreateTensorHandle(tensorInfo, false);
1986  }
1987  else
1988  {
1989  ITensorHandleFactory* handleFactory = m_TensorHandleFactoryRegistry.GetFactory(factoryId);
1990  ARMNN_ASSERT(handleFactory);
1991  return handleFactory->CreateTensorHandle(tensorInfo, false);
1992  }
1993  };
1994 
1995  struct HandleInfo
1996  {
1997  ITensorHandle* m_TensorHandle;
1998 
1999  bool m_IsInputLayerHandle = false;
2000  bool m_IsOutputLayerHandle = false;
2001 
2002  WorkingMemHandle::InputMemDescriptorCoords m_InputMemDescriptorCoords;
2003  WorkingMemHandle::OutputMemDescriptorCoords m_OutputMemDescriptorCoords;
2004  };
2005 
2006  std::unordered_map<const OutputSlot*, HandleInfo> outputToHandleInfoMap;
2007 
2008  unsigned int layerIndex = 0;
2009  for (auto&& layer : order)
2010  {
2011  // Constant layers execution and management is handled during loaded network construction
2012  if (layer->GetType() == LayerType::Constant)
2013  {
2014  continue;
2015  }
2016 
2017  WorkingMemDescriptor workingMemDescriptor;
2018 
2019  bool isMemoryManaged = true;
2020  bool isInputLayer = false;
2021  bool isOutputLayer = false;
2022  bool isConnectedToOutputLayer = false;
2023 
2024  if (layer->GetType() == LayerType::Input || layer->GetType() == LayerType::MemImport)
2025  {
2026  // Input layers/workloads will not be executed so the descriptor is not added to workingMemDescriptors
2027  // However we will still need to manage the tensorHandle
2028  isInputLayer = true;
2029  isMemoryManaged = m_NetworkProperties.m_InputSource == MemorySource::Undefined;
2030  }
2031  else if (layer->GetType() == LayerType::Output)
2032  {
2033  isOutputLayer = true;
2034  }
2035 
2036  unsigned int slotIndex = 0;
2037  // Create a tensor handle for each output slot of a layer
2038  // Once we create it, we start managing its lifetime
2039  for (auto& slot : layer->GetOutputSlots())
2040  {
2041  for (unsigned int i = 0; i < slot.GetNumConnections(); ++i)
2042  {
2043  if ((slot.GetConnection(i)->GetOwningLayer().GetType() == LayerType::Output))
2044  {
2045  if (!isConnectedToOutputLayer)
2046  {
2047  isConnectedToOutputLayer = true;
2048  // If Export is enabled disable memory management, so we can export, otherwise we do a copy
2049  isMemoryManaged = m_NetworkProperties.m_OutputSource == MemorySource::Undefined;
2050  }
2051  else
2052  {
2053  // Importing in this case would likely cause unexpected behaviour, so we disallow it.
2054  ARMNN_LOG(warning) <<
2055  fmt::format("Layer name: '{0}' guid: '{1}' has two or more OutputLayers connected to it. "
2056  "This will prevent importing on the connected OutputLayers.",
2057  layer->GetName(), layer->GetGuid());
2058  isMemoryManaged = true;
2059  }
2060  }
2061  }
2062 
2063  ITensorHandle* tensorHandle;
2064  if (isMemoryManaged)
2065  {
2066  managedTensorHandles.emplace_back(GetTensorHandle(layer, slot));
2067  tensorHandle = managedTensorHandles.back().get();
2068  }
2069  else
2070  {
2071  unmanagedTensorHandles.emplace_back(GetTensorHandle(layer, slot));
2072  tensorHandle = unmanagedTensorHandles.back().get();
2073  }
2074 
2075  workingMemDescriptor.m_Outputs.push_back(tensorHandle);
2076 
2077  HandleInfo& handleInfo = outputToHandleInfoMap[&slot];
2078  handleInfo.m_TensorHandle = tensorHandle;
2079 
2080  // Store the coordinates of the current layer's OutputSlot that is connected to the OutputLayer
2081  if (isConnectedToOutputLayer)
2082  {
2083  handleInfo.m_IsOutputLayerHandle = true;
2084  handleInfo.m_OutputMemDescriptorCoords.m_OutputSlotCoords = {layerIndex, slotIndex};
2085  }
2086  // Store the LayerBindingId of the InputLayer
2087  if (isInputLayer)
2088  {
2089  handleInfo.m_IsInputLayerHandle = true;
2090  LayerBindingId bindingId = static_cast<BindableLayer*>(layer)->GetBindingId();
2091  handleInfo.m_InputMemDescriptorCoords.m_LayerBindingId = bindingId;
2092  }
2093  slotIndex++;
2094  }
2095  // Loop through the input slots in the same layer and decrement the reference counter associated
2096  // to each tensor handle we encounter.
2097  // Once it reaches zero, the lifetime of the tensor handle has ended, and we mark its memory as available
2098  // so that the next tensor handle with a non overlapping lifetime can share its memory.
2099  for (auto& slot : layer->GetInputSlots())
2100  {
2101  ARMNN_ASSERT(slot.GetConnection());
2102  auto outputSlot = slot.GetConnectedOutputSlot();
2103  auto key = outputSlot->GetOwningLayer().GetGuid();
2104 
2105  // Constant layers execution and management is handled during loaded network construction
2106  auto found = m_ConstantTensorHandles.find(key);
2107  if (found != m_ConstantTensorHandles.end())
2108  {
2109  ITensorHandle* tensorHandle = found->second;
2110  if (slot.IsTensorInfoOverridden())
2111  {
2112  ITensorHandle* decorated = tensorHandle->DecorateTensorHandle(slot.GetTensorInfo()).get();
2113  if (decorated)
2114  {
2115  tensorHandle = decorated;
2116  }
2117  }
2118  workingMemDescriptor.m_Inputs.push_back(tensorHandle);
2119 
2120  // Odd case where a constant layer is connected to an output layer
2121  // We will need to create a HandleInfo to track it
2122  if (isOutputLayer)
2123  {
2124  LayerBindingId bindingId = static_cast<BindableLayer*>(layer)->GetBindingId();
2125 
2126  HandleInfo& handleInfo = outputToHandleInfoMap[outputSlot];
2127  handleInfo.m_TensorHandle = tensorHandle;
2128  handleInfo.m_IsOutputLayerHandle = true;
2129  handleInfo.m_OutputMemDescriptorCoords.m_LayerBindingIds.push_back(bindingId);
2130  handleInfo.m_OutputMemDescriptorCoords.m_InputSlotCoords.push_back({layerIndex, 0});
2131  }
2132  continue;
2133  }
2134 
2135  HandleInfo& handleInfo = outputToHandleInfoMap.at(outputSlot);
2136 
2137  ITensorHandle* inputTensorHandle = handleInfo.m_TensorHandle;
2138  if (slot.IsTensorInfoOverridden())
2139  {
2140  ITensorHandle* decorated = inputTensorHandle->DecorateTensorHandle(slot.GetTensorInfo()).get();
2141  if (decorated)
2142  {
2143  inputTensorHandle = decorated;
2144  }
2145  }
2146  workingMemDescriptor.m_Inputs.push_back(inputTensorHandle);
2147 
2148  // Store the LayerBindingId of the OutputLayer
2149  if (isOutputLayer)
2150  {
2151  LayerBindingId bindingId = static_cast<BindableLayer*>(layer)->GetBindingId();
2152  handleInfo.m_OutputMemDescriptorCoords.m_LayerBindingIds.push_back(bindingId);
2153  handleInfo.m_OutputMemDescriptorCoords.m_InputSlotCoords.push_back({layerIndex, 0});
2154  }
2155  // In this case the layer is not an Output Layer but shares its input tensorhandle with an OutputLayer
2156  // It will need to be updated as well, if we swap out the tensorhandle
2157  else if (handleInfo.m_IsOutputLayerHandle)
2158  {
2159  handleInfo.m_OutputMemDescriptorCoords.m_InputSlotCoords.push_back({layerIndex, slot.GetSlotIndex()});
2160  }
2161 
2162  // Store the coordinates of the InputSlots connected to the InputLayer
2163  // There can be more than one InputSlot connected to an InputLayer, so we use a vector
2164  if (handleInfo.m_IsInputLayerHandle)
2165  {
2166  std::pair<LayerGuid, unsigned int> connectionLocation{layerIndex, slot.GetSlotIndex()};
2167  handleInfo.m_InputMemDescriptorCoords.m_InputSlotCoords.emplace_back(connectionLocation);
2168  }
2169  }
2170 
2171  // Input/Output layers/workloads will not be executed, so the descriptor is not added to workingMemDescriptors
2172  // However we will still need to manage the tensorHandle
2173  if (!isInputLayer)
2174  {
2175  // Simply auto initialise ExecutionData here, so it's added only for the layer that require execution.
2176  // The memory and data will be allocated/assigned for the void* in WorkingMemHandle::Allocate.
2177  std::pair<BackendId, ExecutionData> dataPair;
2178  dataPair.first = layer->GetBackendId();
2179 
2180  executionDataVec.push_back(dataPair);
2181  workingMemDescriptors.push_back(workingMemDescriptor);
2182 
2183  layerIndex++;
2184  }
2185  }
2186 
2187  std::vector<std::pair<std::shared_ptr<TensorMemory>, MemorySource>> tensorMemory;
2188 
2189  auto externalMemoryManager = CreateExternalMemoryManger(tensorMemory);
2190 
2191  // Sort m_TensorMemory, so it's order matches the outputSlot order
2192  std::sort(tensorMemory.begin(), tensorMemory.end(),
2193  [](const std::pair<std::shared_ptr<TensorMemory>, MemorySource>& lhs,
2194  const std::pair<std::shared_ptr<TensorMemory>, MemorySource>& rhs)
2195  {
2196  return lhs.first->m_OutputSlotId < rhs.first->m_OutputSlotId;
2197  });
2198 
2199  std::vector<WorkingMemHandle::InputMemDescriptorCoords> inputConnectionsInfo;
2200  std::vector<WorkingMemHandle::OutputMemDescriptorCoords> outputConnectionsInfo;
2201 
2202  for (const auto& handleInfo: outputToHandleInfoMap)
2203  {
2204  if (handleInfo.second.m_IsOutputLayerHandle)
2205  {
2206  outputConnectionsInfo.emplace_back(handleInfo.second.m_OutputMemDescriptorCoords);
2207  }
2208 
2209  if (handleInfo.second.m_IsInputLayerHandle)
2210  {
2211  inputConnectionsInfo.emplace_back(handleInfo.second.m_InputMemDescriptorCoords);
2212  }
2213  }
2214 
2215  return std::make_unique<WorkingMemHandle>(networkId,
2216  inputConnectionsInfo,
2217  outputConnectionsInfo,
2218  workingMemDescriptors,
2219  std::move(externalMemoryManager),
2220  std::move(tensorMemory),
2221  std::move(managedTensorHandles),
2222  std::move(unmanagedTensorHandles),
2223  executionDataVec,
2224  &m_Backends);
2225 }
2226 
2228 {
2229  for (auto&& workloadPtr: m_WorkloadQueue)
2230  {
2231  workloadPtr.get()->RegisterDebugCallback(func);
2232  }
2233 }
2234 
2235 
2236 void LoadedNetwork::CreateMemoryProfileAsync()
2237 {
2238  struct PartialBlock
2239  {
2240  unsigned int m_StartOfLife;
2241  unsigned int m_Lifetime;
2242 
2243  size_t m_MemSize;
2244  unsigned int m_Index;
2245 
2246  BackendId m_BackendId;
2247  };
2248 
2249  auto align = [](size_t numToAlign)
2250  {
2251  const size_t alignment = sizeof(float);
2252  return ((numToAlign + alignment - 1) / alignment) * alignment;
2253  };
2254 
2255  std::unordered_map<const OutputSlot*, PartialBlock> memBlockTrackerMap;
2256 
2257  const bool inputImportingEnabled = m_NetworkProperties.m_InputSource != MemorySource::Undefined;
2258  const bool outputImportingEnabled = m_NetworkProperties.m_OutputSource != MemorySource::Undefined;
2259 
2260  unsigned int timestep = 0;
2261  unsigned int outputIndex = 0;
2262  Graph& order = m_OptimizedNetwork->pOptimizedNetworkImpl->GetGraph().TopologicalSort();
2263 
2264  for (auto&& layer : order)
2265  {
2266  const LayerType& layerType = layer->GetType();
2267  // Don't manage memory if importing.
2268  if (layerType == LayerType::Input && inputImportingEnabled)
2269  {
2270  continue;
2271  }
2272  // Don't manage memory if importing.
2273  if (layerType == LayerType::Output && outputImportingEnabled
2274  && layer->GetInputSlot(0).GetConnectedOutputSlot()->GetNumConnections() == 1)
2275  {
2276  continue;
2277  }
2278  // Because Constant Layer memory can not be shared, the memory must persist for the lifetime of execution,
2279  // management is done separately.
2280  if (layerType == LayerType::Constant)
2281  {
2282  continue;
2283  }
2284 
2285  BackendId backendId = layer->GetBackendId();
2286  for (auto& outputSlot : layer->GetOutputSlots())
2287  {
2288  if (!m_SupportsExternallyManagedMemory[backendId])
2289  {
2290  continue;
2291  }
2292 
2293  PartialBlock partialBlock;
2294 
2295  partialBlock.m_StartOfLife = timestep;
2296 
2297  size_t alignedSize = align(outputSlot.GetOutputHandler().GetTensorInfo().GetNumBytes());
2298  partialBlock.m_MemSize = alignedSize;
2299  partialBlock.m_Index = outputIndex++;
2300  partialBlock.m_Lifetime = outputSlot.GetNumConnections();
2301  partialBlock.m_BackendId = backendId;
2302 
2303  if (partialBlock.m_Lifetime == 0)
2304  {
2305  m_MemBlockMap[partialBlock.m_BackendId].emplace_back(partialBlock.m_StartOfLife,
2306  partialBlock.m_StartOfLife,
2307  partialBlock.m_MemSize,
2308  0,
2309  partialBlock.m_Index);
2310  }
2311  else
2312  {
2313  memBlockTrackerMap[&outputSlot] = partialBlock;
2314  }
2315  }
2316 
2317  for (auto& inputSlot : layer->GetInputSlots())
2318  {
2319  const Layer& connectedInputLayer = inputSlot.GetConnectedOutputSlot()->GetOwningLayer();
2320  const LayerType& owningLayerType = connectedInputLayer.GetType();
2321 
2322  if (owningLayerType == LayerType::Constant)
2323  {
2324  continue;
2325  }
2326  if (inputImportingEnabled && owningLayerType == LayerType::Input)
2327  {
2328  continue;
2329  }
2330 
2331  auto outputSlot = inputSlot.GetConnectedOutputSlot();
2332 
2333  PartialBlock& partialBlock = memBlockTrackerMap.at(outputSlot);
2334 
2335  auto& lifetime = partialBlock.m_Lifetime;
2336  --lifetime;
2337 
2338  if (lifetime == 0)
2339  {
2340  m_MemBlockMap[partialBlock.m_BackendId].emplace_back(partialBlock.m_StartOfLife,
2341  timestep,
2342  partialBlock.m_MemSize,
2343  0,
2344  partialBlock.m_Index);
2345  }
2346  }
2347  ++timestep;
2348  }
2349 }
2350 
2351 void LoadedNetwork::CreateMemoryProfile()
2352 {
2353  // Finds the first TensorHandle ancestor of a SubTensorHandle. If the ITensorHandle provided
2354  // is a TensorHandle, the function just returns it
2355  auto TraceSubTensorHandleAncestry = [](ITensorHandle* const subTensorHandle)
2356  {
2357  ITensorHandle* ancestor = subTensorHandle;
2358  while (ancestor && ancestor->GetParent())
2359  {
2360  ancestor = ancestor->GetParent();
2361  }
2362  return ancestor;
2363  };
2364 
2365  struct PartialBlock
2366  {
2367  unsigned int m_StartOfLife;
2368  unsigned int m_Lifetime;
2369 
2370  size_t m_MemSize;
2371  unsigned int m_Index;
2372 
2373  BackendId m_BackendId;
2374  };
2375 
2376  auto align = [](size_t numToAlign)
2377  {
2378  const size_t alignment = sizeof(float);
2379  return ((numToAlign + alignment - 1) / alignment) * alignment;
2380  };
2381 
2382  std::unordered_map<ITensorHandle*, PartialBlock> memBlockTrackerMap;
2383 
2384  const bool inputImportingEnabled = m_NetworkProperties.m_InputSource != MemorySource::Undefined;
2385  const bool outputImportingEnabled = m_NetworkProperties.m_OutputSource != MemorySource::Undefined;
2386 
2387  unsigned int timestep = 0;
2388  unsigned int outputIndex = 0;
2389  Graph& order = m_OptimizedNetwork->pOptimizedNetworkImpl->GetGraph().TopologicalSort();
2390 
2391  for (auto&& layer : order)
2392  {
2393  const LayerType& layerType = layer->GetType();
2394  // Don't manage memory if importing.
2395  if (layerType == LayerType::Input && inputImportingEnabled)
2396  {
2397  continue;
2398  }
2399  // Don't manage memory if importing.
2400  if (layerType == LayerType::Output && outputImportingEnabled
2401  && layer->GetInputSlot(0).GetConnectedOutputSlot()->GetNumConnections() == 1)
2402  {
2403  continue;
2404  }
2405  // Because Constant Layer memory can not be shared, the memory must persist for the lifetime of execution,
2406  // management is done separately.
2407  if (layerType == LayerType::Constant)
2408  {
2409  continue;
2410  }
2411 
2412  BackendId backendId = layer->GetBackendId();
2413  for (auto& outputSlot : layer->GetOutputSlots())
2414  {
2415  if (!m_SupportsExternallyManagedMemory[backendId])
2416  {
2417  continue;
2418  }
2419 
2420  ITensorHandle* tensorHandle = outputSlot.GetOutputHandler().GetData();
2421  tensorHandle = TraceSubTensorHandleAncestry(tensorHandle);
2422 
2423  if (memBlockTrackerMap.find(tensorHandle) == memBlockTrackerMap.end())
2424  {
2425  PartialBlock partialBlock;
2426 
2427  partialBlock.m_StartOfLife = timestep;
2428 
2429  size_t alignedSize = align(outputSlot.GetOutputHandler().GetTensorInfo().GetNumBytes());
2430  partialBlock.m_MemSize = alignedSize;
2431  partialBlock.m_Index = outputIndex++;
2432  partialBlock.m_Lifetime = outputSlot.GetNumConnections();
2433  partialBlock.m_BackendId = backendId;
2434 
2435  if (partialBlock.m_Lifetime == 0)
2436  {
2437  m_MemBlockMap[partialBlock.m_BackendId].emplace_back(partialBlock.m_StartOfLife,
2438  partialBlock.m_StartOfLife,
2439  partialBlock.m_MemSize,
2440  0,
2441  partialBlock.m_Index);
2442  }
2443  else
2444  {
2445  memBlockTrackerMap[tensorHandle] = partialBlock;
2446  }
2447  m_Tensorhandles.push_back(tensorHandle);
2448 
2449  }
2450  else
2451  {
2452  memBlockTrackerMap.at(tensorHandle).m_Lifetime += outputSlot.GetNumConnections();
2453  }
2454  }
2455 
2456  for (auto& inputSlot : layer->GetInputSlots())
2457  {
2458  const Layer& connectedInputLayer = inputSlot.GetConnectedOutputSlot()->GetOwningLayer();
2459  const LayerType& owningLayerType = connectedInputLayer.GetType();
2460 
2461  if (owningLayerType == LayerType::Constant)
2462  {
2463  continue;
2464  }
2465  if (inputImportingEnabled && owningLayerType == LayerType::Input)
2466  {
2467  continue;
2468  }
2469  if (!m_SupportsExternallyManagedMemory[connectedInputLayer.GetBackendId()])
2470  {
2471  continue;
2472  }
2473 
2474  auto outputSlot = inputSlot.GetConnectedOutputSlot();
2475 
2476  ITensorHandle* tensorHandle = outputSlot->GetOutputHandler().GetData();
2477  tensorHandle = TraceSubTensorHandleAncestry(tensorHandle);
2478 
2479  PartialBlock& partialBlock = memBlockTrackerMap.at(tensorHandle);
2480 
2481  auto& lifetime = partialBlock.m_Lifetime;
2482  --lifetime;
2483 
2484  if (lifetime == 0)
2485  {
2486  m_MemBlockMap[partialBlock.m_BackendId].emplace_back(partialBlock.m_StartOfLife,
2487  timestep,
2488  partialBlock.m_MemSize,
2489  0,
2490  partialBlock.m_Index);
2491  }
2492  }
2493  ++timestep;
2494  }
2495 
2496 }
2497 
2498 std::unique_ptr<MemoryManager> LoadedNetwork::CreateExternalMemoryManger(
2499  std::vector<std::pair<std::shared_ptr<TensorMemory>, MemorySource>>& tensorMemoryVec)
2500 {
2501  std::unique_ptr<MemoryManager> memoryManager = std::make_unique<MemoryManager>();
2502  auto allocatorMap = BackendRegistryInstance().GetAllocators();
2503 
2504  for (auto& backend : m_MemBinMap)
2505  {
2506  std::vector<BufferStorage> bufferStorageVec;
2507 
2508  std::shared_ptr<ICustomAllocator> backendAllocator;
2509  if (allocatorMap.find(backend.first) != allocatorMap.end())
2510  {
2511  backendAllocator = allocatorMap[backend.first];
2512  }
2513  else
2514  {
2515  backendAllocator = m_Backends[backend.first]->GetDefaultAllocator();
2516  }
2517 
2518  for (auto& memBin : backend.second)
2519  {
2520  BufferStorage bufferStorage;
2521  bufferStorage.m_BufferSize = memBin.m_MemSize;
2522  bufferStorage.m_TensorMemoryVector.reserve(memBin.m_MemBlocks.size());
2523 
2524  for (auto& memBlock : memBin.m_MemBlocks)
2525  {
2526  auto tensorMemory = std::make_shared<TensorMemory>(TensorMemory{memBlock.m_Offset, memBlock.m_Index});
2527 
2528  tensorMemoryVec.emplace_back(tensorMemory, backendAllocator->GetMemorySourceType());
2529  bufferStorage.m_TensorMemoryVector.emplace_back(tensorMemory);
2530  }
2531 
2532  bufferStorageVec.emplace_back(std::move(bufferStorage));
2533  }
2534 
2535  memoryManager->StoreMemToAllocate(bufferStorageVec, backendAllocator, 4);
2536  }
2537 
2538  return memoryManager;
2539 }
2540 
2541 LayerBindingId LoadedNetwork::ValidateImportedInputID(ImportedInputId id)
2542 {
2543  try
2544  {
2545  const auto& importedTensorHandlePin = m_PreImportedInputHandles.at(id);
2546  if (!importedTensorHandlePin.m_TensorHandle)
2547  {
2548  throw InvalidArgumentException(fmt::format("LoadedNetwork::Execute:"
2549  "PreImportedInput: {} has been deleted", id));
2550  }
2551  return importedTensorHandlePin.m_LayerBindingId;
2552  }
2553  catch (const std::out_of_range&)
2554  {
2555  throw InvalidArgumentException(fmt::format("LoadedNetwork::Execute: Unknown ImportedInputId: {}", id));
2556  }
2557 }
2558 
2559 LayerBindingId LoadedNetwork::ValidateImportedOutputID(ImportedOutputId id)
2560 {
2561  try
2562  {
2563  const auto& importedTensorHandlePin = m_PreImportedOutputHandles.at(id);
2564  if (!importedTensorHandlePin.m_TensorHandle)
2565  {
2566  throw InvalidArgumentException(fmt::format("LoadedNetwork::Execute: "
2567  "PreImportedOutput: {} has been deleted", id));
2568  }
2569  return importedTensorHandlePin.m_LayerBindingId;
2570  }
2571  catch (const std::out_of_range&)
2572  {
2573  throw InvalidArgumentException(fmt::format("LoadedNetwork::Execute: Unknown ImportedOutputId: {}", id));
2574  }
2575 }
2576 
2577 }
armnn::BindableLayer
Definition: Layer.hpp:470
ARMNN_ASSERT
#define ARMNN_ASSERT(COND)
Definition: Assert.hpp:14
BackendHelper.hpp
armnn::ImportedInputId
unsigned int ImportedInputId
Definition: Types.hpp:310
armnn::Graph::SetLayersOutOfOrder
void SetLayersOutOfOrder()
Definition: Graph.cpp:697
armnn::Compute::Undefined
@ Undefined
armnn::Tensor
A tensor defined by a TensorInfo (shape and data type) and a mutable backing store.
Definition: Tensor.hpp:321
armnn::INetworkProperties::m_AsyncEnabled
const bool m_AsyncEnabled
Definition: IRuntime.hpp:59
armnn::BaseTensor::GetMemoryArea
MemoryType GetMemoryArea() const
Definition: Tensor.hpp:307
armnn::Graph::OutputLayersAccessor::begin
ConstIteratorOutputs begin() const
Definition: Graph.hpp:84
arm::pipe
Definition: BackendRegistry.hpp:17
armnn::LoadedNetwork::GetOutputTensorInfo
TensorInfo GetOutputTensorInfo(LayerBindingId layerId) const
Definition: LoadedNetwork.cpp:723
armnn::INetworkProperties::m_InputSource
const MemorySource m_InputSource
Definition: IRuntime.hpp:65
armnn::ProfilerManager::RegisterProfiler
void RegisterProfiler(IProfiler *profiler)
Definition: Profiling.cpp:600
armnn::experimental::WorkingMemHandle::GetExecutionDataAt
std::pair< BackendId, ExecutionData > & GetExecutionDataAt(unsigned int id) override
Get the ExecutionData at an index.
Definition: WorkingMemHandle.hpp:92
armnn::OutputSlot::GetTensorInfo
const TensorInfo & GetTensorInfo() const override
Definition: Layer.cpp:92
armnn::InputTensors
std::vector< std::pair< LayerBindingId, class ConstTensor > > InputTensors
Definition: Tensor.hpp:394
armnn::LoadedNetwork::EnqueueWorkload
Status EnqueueWorkload(const InputTensors &inputTensors, const OutputTensors &outputTensors, std::vector< ImportedInputId > preImportedInputIds={}, std::vector< ImportedOutputId > preImportedOutputIds={})
Single thread execution of the loaded network.
Definition: LoadedNetwork.cpp:851
armnn::LoadedNetwork::RegisterDebugCallback
void RegisterDebugCallback(const DebugCallbackFunction &func)
Definition: LoadedNetwork.cpp:2227
ExecutionData.hpp
armnn::TensorHandleFactoryRegistry::ReleaseMemory
void ReleaseMemory()
Release memory required for inference.
Definition: TensorHandleFactoryRegistry.cpp:86
armnn::ValidateSourcesMatchOptimizedNetwork
void ValidateSourcesMatchOptimizedNetwork(std::vector< BackendOptions > optimizedOptions, const INetworkProperties &networkProperties)
This function performs a sanity check to ensure that the combination of input and output memory sourc...
Definition: LoadedNetwork.cpp:98
LoadedNetwork.hpp
armnn::OutputSlot
Definition: Layer.hpp:100
armnn::TensorHandleFactoryRegistry::GetFactory
ITensorHandleFactory * GetFactory(ITensorHandleFactory::FactoryId id) const
Find a TensorHandleFactory by Id Returns nullptr if not found.
Definition: TensorHandleFactoryRegistry.cpp:39
armnn::TensorInfo
Definition: Tensor.hpp:152
MemSyncWorkload.hpp
Graph.hpp
CHECK_LOCATION
#define CHECK_LOCATION()
Definition: Exceptions.hpp:203
armnn::MemorySourceFlags
unsigned int MemorySourceFlags
Definition: MemorySources.hpp:15
Profiling.hpp
armnn::INetworkProperties::m_ProfilingEnabled
const bool m_ProfilingEnabled
Definition: IRuntime.hpp:61
armnn::LoadedNetwork
Definition: LoadedNetwork.hpp:42
armnn::ITensorHandle
Definition: ITensorHandle.hpp:16
armnn::BackendOptions::BackendOption
Definition: BackendOptions.hpp:215
armnn::experimental::WorkingMemHandle::GetOutputConnection
const std::vector< std::vector< ITensorHandle * >::iterator > & GetOutputConnection(LayerBindingId layerBindingId) const
Definition: WorkingMemHandle.hpp:112
ARMNN_NO_DEPRECATE_WARN_BEGIN
#define ARMNN_NO_DEPRECATE_WARN_BEGIN
Definition: Deprecated.hpp:33
armnn::experimental::WorkingMemHandle::IsAllocated
bool IsAllocated() override
IsAllocated returns true if the backing memory is currently allocated.
Definition: WorkingMemHandle.hpp:77
armnn::ITensorHandle::GetImportFlags
virtual unsigned int GetImportFlags() const
Get flags describing supported import sources.
Definition: ITensorHandle.hpp:70
armnn::LoadedNetwork::SendNetworkStructure
void SendNetworkStructure(arm::pipe::IProfilingService &profilingService)
Definition: LoadedNetwork.cpp:666
BackendRegistry.hpp
armnn::experimental::WorkingMemHandle::OutputMemDescriptorCoords
Definition: WorkingMemHandle.hpp:40
armnn::OutputTensors
std::vector< std::pair< LayerBindingId, class Tensor > > OutputTensors
Definition: Tensor.hpp:395
armnn::Layer::GetInputSlots
const std::vector< InputSlot > & GetInputSlots() const
Definition: Layer.hpp:258
armnn::Graph::InputLayersAccessor::end
ConstIteratorInputs end() const
Definition: Graph.hpp:70
ARMNN_ASSERT_MSG
#define ARMNN_ASSERT_MSG(COND, MSG)
Definition: Assert.hpp:15
armnn::experimental::IWorkingMemHandle
Definition: IWorkingMemHandle.hpp:20
armnn::BoostLogSeverityMapping::error
@ error
armnn::LoadedNetwork::WorkloadQueue
std::vector< std::unique_ptr< IWorkload > > WorkloadQueue
Definition: LoadedNetwork.hpp:45
armnn::BackendRegistry::GetMemoryOptimizerStrategies
MemoryOptimizerStrategiesMapRef GetMemoryOptimizerStrategies()
Definition: BackendRegistry.cpp:150
armnn::LoadedNetwork::ImportInputs
std::vector< ImportedInputId > ImportInputs(const InputTensors &inputTensors, MemorySource forceImportMemorySource=MemorySource::Undefined)
Definition: LoadedNetwork.cpp:1430
armnn::OutputHandler::GetData
ITensorHandle * GetData() const
Gets the allocated tensor memory.
Definition: OutputHandler.hpp:46
armnn::Layer::GetName
const char * GetName() const override
Returns the name of the layer.
Definition: Layer.hpp:332
armnn::ITensorHandleFactory::LegacyFactoryId
static const FactoryId LegacyFactoryId
Definition: ITensorHandleFactory.hpp:50
armnn::Exception::what
virtual const char * what() const noexcept override
Definition: Exceptions.cpp:32
armnn::Layer
Definition: Layer.hpp:230
ARMNN_LOG
#define ARMNN_LOG(severity)
Definition: Logging.hpp:212
armnn::InputSlot::GetTensorInfo
const TensorInfo & GetTensorInfo() const override
Gets the TensorInfo for this InputSlot.
Definition: Layer.cpp:592
Assert.hpp
armnn::ITensorHandle::DecorateTensorHandle
virtual std::shared_ptr< ITensorHandle > DecorateTensorHandle(const TensorInfo &tensorInfo)
Returns a decorated version of this TensorHandle allowing us to override the TensorInfo for it.
Definition: ITensorHandle.hpp:98
armnn::ITensorHandle::Import
virtual bool Import(void *memory, MemorySource source)
Import externally allocated memory.
Definition: ITensorHandle.hpp:76
armnn::experimental::WorkingMemHandle::GetBindingIdVector
std::vector< LayerBindingId > & GetBindingIdVector()
Definition: WorkingMemHandle.hpp:119
armnn::INetworkProperties::m_OutputNetworkDetailsMethod
const ProfilingDetailsMethod m_OutputNetworkDetailsMethod
Definition: IRuntime.hpp:63
armnn::NetworkId
int NetworkId
Definition: IRuntime.hpp:35
armnn::OutputQueueDescriptor
MemCopyQueueDescriptor OutputQueueDescriptor
Definition: WorkloadData.hpp:92
Logging.hpp
ARMNN_SCOPED_PROFILING_EVENT
#define ARMNN_SCOPED_PROFILING_EVENT(backendId, name)
Definition: Profiling.hpp:220
armnn::MemorySource::Undefined
@ Undefined
armnn::WorkloadInfo
Contains information about TensorInfos of a layer.
Definition: WorkloadInfo.hpp:16
armnn::experimental::WorkingMemHandle::GetInputHandle
ITensorHandle * GetInputHandle(LayerBindingId layerBindingId) const
Definition: WorkingMemHandle.hpp:97
IBackendInternal.hpp
armnn::LoadedNetwork::ImportOutputs
std::vector< ImportedOutputId > ImportOutputs(const OutputTensors &outputTensors, MemorySource forceImportMemorySource=MemorySource::Undefined)
Definition: LoadedNetwork.cpp:1573
armnn::BackendRegistryInstance
BackendRegistry & BackendRegistryInstance()
Definition: BackendRegistry.cpp:15
armnn::experimental::WorkingMemHandle::GetInputConnections
const std::vector< std::vector< ITensorHandle * >::iterator > & GetInputConnections(LayerBindingId layerBindingId) const
Definition: WorkingMemHandle.hpp:107
armnn::Graph::GetOutputLayers
OutputLayersAccessor GetOutputLayers() const
Returns a wrapper object with begin(), end() methods to iterate over the output layers in a range-bas...
Definition: Graph.hpp:196
armnn::IWorkloadFactory
Definition: WorkloadFactory.hpp:22
armnn::InvalidArgumentException
Definition: Exceptions.hpp:80
armnn::LayerBindingId
int LayerBindingId
Type of identifiers for bindable layers (inputs, outputs).
Definition: Types.hpp:309
armnn::BackendId::Get
const std::string & Get() const
Definition: BackendId.hpp:138
armnn::Layer::GetGuid
LayerGuid GetGuid() const final
Returns the unique id of the layer.
Definition: Layer.hpp:343
armnn::Layer::GetOutputHandler
const OutputHandler & GetOutputHandler(unsigned int i=0) const
Definition: Layer.hpp:245
armnn::CopyToOutputTensor
void CopyToOutputTensor(const Tensor &outputTensor, ITensorHandle *outputTensorHandle)
Definition: LoadedNetwork.cpp:1388
armnn::Layer::GetNumOutputSlots
unsigned int GetNumOutputSlots() const override
Returns the number of connectable output slots.
Definition: Layer.hpp:335
armnn::GetTensorInfo
const TensorInfo & GetTensorInfo(const ITensorHandle *tensorHandle)
float32 helpers
Definition: RefWorkloadUtils.hpp:33
armnn::Graph::GetNumLayers
size_t GetNumLayers() const
Definition: Graph.hpp:198
armnn::ITensorHandleFactory
Definition: ITensorHandleFactory.hpp:46
armnn::TensorHandleFactoryRegistry::GetMatchingImportFactoryId
ITensorHandleFactory::FactoryId GetMatchingImportFactoryId(ITensorHandleFactory::FactoryId copyFactoryId)
Get a matching TensorHandleFatory Id for Memory Import given TensorHandleFactory Id for Memory Copy.
Definition: TensorHandleFactoryRegistry.cpp:72
ARMNN_SCOPED_HEAP_PROFILING
#define ARMNN_SCOPED_HEAP_PROFILING(TAG)
Definition: HeapProfiling.hpp:45
armnn::GetOutputTensor
const armnn::Tensor GetOutputTensor(const LayerBindingId layerId, const OutputTensors &outputTensors)
Definition: LoadedNetwork.cpp:1417
armnn::CheckFlag
bool CheckFlag(MemorySourceFlags flags, MemorySource source)
Definition: MemorySources.hpp:41
armnn::Status::Success
@ Success
armnn::INetworkProperties
Definition: IRuntime.hpp:43
armnn::Layer::GetOutputSlots
const std::vector< OutputSlot > & GetOutputSlots() const
Definition: Layer.hpp:259
armnn::Graph::GetNumInputs
size_t GetNumInputs() const
Definition: Graph.hpp:187
armnn::experimental::WorkingMemHandle::MemSyncOutputs
void MemSyncOutputs()
Definition: WorkingMemHandle.cpp:136
armnn::Exception
Base class for all ArmNN exceptions so that users can filter to just those.
Definition: Exceptions.hpp:46
armnn::ITensorHandle::Unmap
virtual void Unmap() const =0
Unmap the tensor data.
armnn::experimental::WorkingMemHandle
Definition: WorkingMemHandle.hpp:29
armnn::RuntimeException
Definition: Exceptions.hpp:120
armnn::experimental::WorkingMemHandle::InputMemDescriptorCoords
Definition: WorkingMemHandle.hpp:33
armnn::LoadedNetwork::GetNetworkGuid
arm::pipe::ProfilingGuid GetNetworkGuid()
Definition: LoadedNetwork.cpp:704
armnn::BaseTensor::GetInfo
const TensorInfo & GetInfo() const
Definition: Tensor.hpp:297
armnn::OutputHandler
Definition: OutputHandler.hpp:28
armnn::BoostLogSeverityMapping::info
@ info
armnn::LoadedNetwork::GetInputTensorInfo
TensorInfo GetInputTensorInfo(LayerBindingId layerId) const
Definition: LoadedNetwork.cpp:709
armnn::LayerType::MemImport
@ MemImport
armnn::CopyTensorContentsGeneric
void CopyTensorContentsGeneric(const ITensorHandle *srcTensor, ITensorHandle *dstTensor, CopyFunc copy)
Definition: WorkloadUtils.hpp:46
armnn::Layer::GetNameStr
const std::string & GetNameStr() const
Definition: Layer.hpp:240
armnn::InputQueueDescriptor
MemCopyQueueDescriptor InputQueueDescriptor
Definition: WorkloadData.hpp:91
armnn::TensorHandleFactoryRegistry::AquireMemory
void AquireMemory()
Aquire memory required for inference.
Definition: TensorHandleFactoryRegistry.cpp:78
armnn::InputSlot
Definition: Layer.hpp:42
ArmNNProfiling.hpp
armnn::BackendRegistry::GetFactory
FactoryFunction GetFactory(const BackendId &id) const
Definition: BackendRegistry.cpp:57
HeapProfiling.hpp
armnn::ImportedOutputId
unsigned int ImportedOutputId
Definition: Types.hpp:311
armnn::BackendOptions
Struct for the users to pass backend specific options.
Definition: BackendOptions.hpp:22
WorkingMemHandle.hpp
armnn::Layer::GetType
LayerType GetType() const override
Returns the armnn::LayerType of this layer.
Definition: Layer.hpp:286
armnn::Graph::GetInputLayers
InputLayersAccessor GetInputLayers() const
Returns a wrapper object with begin(), end() methods to iterate over the input layers in a range-base...
Definition: Graph.hpp:192
armnn::Graph::InputLayersAccessor::begin
ConstIteratorInputs begin() const
Definition: Graph.hpp:65
TensorHandle.hpp
armnn::Status
Status
Definition: Types.hpp:42
armnn::ITensorHandleFactory::CreateTensorHandle
virtual std::unique_ptr< ITensorHandle > CreateTensorHandle(const TensorInfo &tensorInfo) const =0
armnn::LoadedNetwork::MakeLoadedNetwork
static std::unique_ptr< LoadedNetwork > MakeLoadedNetwork(std::unique_ptr< IOptimizedNetwork > net, std::string &errorMessage, const INetworkProperties &networkProperties, arm::pipe::IProfilingService *profilingService)
Definition: LoadedNetwork.cpp:170
armnn::LoadedNetwork::CreateWorkingMemHandle
std::unique_ptr< IWorkingMemHandle > CreateWorkingMemHandle(NetworkId networkId)
Create a new unique WorkingMemHandle object.
Definition: LoadedNetwork.cpp:1963
armnn::ProfilerManager::GetInstance
static ProfilerManager & GetInstance()
Definition: Profiling.cpp:593
armnn::experimental::WorkingMemHandle::GetOutputHandle
ITensorHandle * GetOutputHandle(LayerBindingId layerBindingId) const
Definition: WorkingMemHandle.hpp:102
ARMNN_NO_DEPRECATE_WARN_END
#define ARMNN_NO_DEPRECATE_WARN_END
Definition: Deprecated.hpp:34
armnn::MemoryImportException
Definition: Exceptions.hpp:125
std
Definition: BackendId.hpp:149
MemCopyWorkload.hpp
armnn::BackendCapabilityException
Definition: Exceptions.hpp:152
armnn::Graph::TopologicalSort
Graph & TopologicalSort()
Sorts layers in topological order and return this.
Definition: Graph.hpp:184
armnn::LoadedNetwork::Execute
Status Execute(const InputTensors &inputTensors, const OutputTensors &outputTensors, IWorkingMemHandle &workingMemHandle, std::vector< ImportedInputId > preImportedInputs={}, std::vector< ImportedOutputId > preImportedOutputs={})
Thread safe execution of the loaded network.
Definition: LoadedNetwork.cpp:1741
armnn::IgnoreUnused
void IgnoreUnused(Ts &&...)
Definition: IgnoreUnused.hpp:14
armnn::DebugCallbackFunction
std::function< void(LayerGuid guid, unsigned int slotIndex, ITensorHandle *tensorHandle)> DebugCallbackFunction
Define the type of callback for the Debug layer to call.
Definition: Types.hpp:398
armnn::MemorySource
MemorySource
Define the Memory Source to reduce copies.
Definition: Types.hpp:244
armnn::BackendRegistry::GetAllocators
std::unordered_map< BackendId, std::shared_ptr< ICustomAllocator > > GetAllocators()
Definition: BackendRegistry.cpp:128
armnn::Layer::GetBackendId
const BackendId & GetBackendId() const
Definition: Layer.hpp:290
armnn::BackendId
Definition: BackendId.hpp:75
armnn::LoadedNetwork::ClearImportedOutputs
void ClearImportedOutputs(const std::vector< ImportedOutputId > outputIds)
Definition: LoadedNetwork.cpp:1720
armnn::experimental::WorkingMemHandle::ValidateBindingIds
void ValidateBindingIds()
Definition: WorkingMemHandle.cpp:145
armnn::OutputSlot::GetTensorHandleFactoryId
ITensorHandleFactory::FactoryId GetTensorHandleFactoryId() const
Definition: Layer.cpp:205
armnn::Graph::OutputLayersAccessor::end
ConstIteratorOutputs end() const
Definition: Graph.hpp:90
armnn::InputSlot::GetConnectedOutputSlot
const OutputSlot * GetConnectedOutputSlot() const
Definition: Layer.hpp:56
armnn::experimental::WorkingMemDescriptor::m_Inputs
std::vector< ITensorHandle * > m_Inputs
Definition: WorkingMemDescriptor.hpp:20
armnn
Copyright (c) 2021 ARM Limited and Contributors.
Definition: 01_00_quick_start.dox:6
armnn::experimental::WorkingMemDescriptor
Definition: WorkingMemDescriptor.hpp:18
Layer.hpp
armnn::LoadedNetwork::FreeWorkingMemory
void FreeWorkingMemory()
Definition: LoadedNetwork.cpp:1234
armnn::MemSyncQueueDescriptor
Definition: WorkloadData.hpp:99
armnn::ITensorHandleFactory::FactoryId
std::string FactoryId
Definition: ITensorHandleFactory.hpp:49
armnn::BoostLogSeverityMapping::warning
@ warning
armnn::ConstTensor
A tensor defined by a TensorInfo (shape and data type) and an immutable backing store.
Definition: Tensor.hpp:329
armnn::LayerType::Input
@ Input
armnn::experimental::WorkingMemHandle::Allocate
void Allocate() override
Allocate the backing memory required for execution.
Definition: WorkingMemHandle.cpp:100
armnn::OutputHandler::GetTensorInfo
const TensorInfo & GetTensorInfo() const
Gets the matching TensorInfo for the output.
Definition: OutputHandler.hpp:42
armnn::LoadedNetwork::ClearImportedInputs
void ClearImportedInputs(const std::vector< ImportedInputId > inputIds)
Definition: LoadedNetwork.cpp:1699
armnn::IBackendInternal::IWorkloadFactoryPtr
std::unique_ptr< IWorkloadFactory > IWorkloadFactoryPtr
Definition: IBackendInternal.hpp:89
armnn::Graph::GetNumOutputs
size_t GetNumOutputs() const
Definition: Graph.hpp:188
armnn::LayerType
LayerType
When adding a new layer, adapt also the LastLayer enum value in the enum class LayerType below.
Definition: Types.hpp:491
armnn::Graph
Definition: Graph.hpp:30
IMemoryManager.hpp
armnn::INetworkProperties::m_OutputSource
const MemorySource m_OutputSource
Definition: IRuntime.hpp:66
armnn::Status::Failure
@ Failure
armnn::experimental::WorkingMemDescriptor::m_Outputs
std::vector< ITensorHandle * > m_Outputs
Definition: WorkingMemDescriptor.hpp:21
armnn::QueueDescriptor::m_Inputs
std::vector< ITensorHandle * > m_Inputs
Definition: WorkloadData.hpp:26
armnn::LayerType::Output
@ Output
armnn::LayerType::Constant
@ Constant
armnn::ITensorHandle::Map
virtual const void * Map(bool blocking=true) const =0
Map the tensor data for access.
armnn::GetInputTensor
const armnn::ConstTensor GetInputTensor(const LayerBindingId layerId, const InputTensors &inputTensors)
Definition: LoadedNetwork.cpp:1404
armnn::HasMatchingCapability
bool HasMatchingCapability(const BackendOptions::BackendOption &capability, const BackendCapabilities &capabilities)
Convenience function to check if a given capability matches a capability in a BackendCapabilities str...
Definition: BackendHelper.cpp:85