ArmNN
 23.02
LoadedNetwork.cpp
Go to the documentation of this file.
1 //
2 // Copyright © 2022 Arm Ltd and Contributors. All rights reserved.
3 // SPDX-License-Identifier: MIT
4 //
5 
6 #include "LoadedNetwork.hpp"
7 #include "Layer.hpp"
8 #include "Graph.hpp"
9 #include "Profiling.hpp"
10 #include "HeapProfiling.hpp"
11 #include "WorkingMemHandle.hpp"
12 #include "ExecutionData.hpp"
13 
14 #include <armnn/BackendHelper.hpp>
16 #include <armnn/Logging.hpp>
17 
22 
24 
25 #include <armnn/utility/Assert.hpp>
26 
28 
29 #include <common/include/Processes.hpp>
30 
31 #include <fmt/format.h>
32 
33 namespace armnn
34 {
35 
36 using namespace std;
37 using namespace arm::pipe;
38 
39 namespace
40 {
41 
42 template <typename ExceptionType>
43 std::string ToErrorMessage(const char * prefix, const ExceptionType & error)
44 {
45  std::stringstream ss;
46  ss << prefix << " " << error.what();
47  return ss.str();
48 }
49 
50 void AddLayerStructure(std::unique_ptr<TimelineUtilityMethods>& timelineUtils,
51  const Layer& layer,
52  ProfilingGuid networkGuid)
53 {
54  // Add layer to the post-optimisation network structure
55  std::string layerName = layer.GetNameStr().empty() ? "<Unnamed>" : layer.GetNameStr();
56  timelineUtils->CreateNamedTypedChildEntity(layer.GetGuid(),
57  networkGuid,
58  layerName,
59  LabelsAndEventClasses::LAYER_GUID);
60  for (auto&& input : layer.GetInputSlots())
61  {
62  const IOutputSlot* source = input.GetConnectedOutputSlot();
63  ARMNN_ASSERT(source != NULL);
64  timelineUtils->CreateConnectionRelationship(ProfilingRelationshipType::RetentionLink,
65  source->GetOwningLayerGuid(),
66  layer.GetGuid());
67  }
68 }
69 
70 void AddWorkloadStructure(std::unique_ptr<TimelineUtilityMethods>& timelineUtils,
71  std::unique_ptr<IWorkload>& workload,
72  const Layer& layer)
73 {
74  // Add workload to the post-optimisation network structure
75  timelineUtils->CreateTypedEntity(workload->GetGuid(), LabelsAndEventClasses::WORKLOAD_GUID);
76  timelineUtils->MarkEntityWithLabel(workload->GetGuid(),
77  layer.GetBackendId().Get(),
78  LabelsAndEventClasses::BACKENDID_GUID);
79 
80  // Link the workload to the layer
81  timelineUtils->CreateRelationship(ProfilingRelationshipType::RetentionLink,
82  layer.GetGuid(),
83  workload->GetGuid(),
84  LabelsAndEventClasses::CHILD_GUID);
85 }
86 
87 } // anonymous
88 
89 /**
90  * This function performs a sanity check to ensure that the combination of input and output memory source matches the
91  * values for importEnabled and exportEnabled that were specified during optimization. During optimization the tensor
92  * handle factories are chosen based on whether import and export are enabled. If the user then specifies something
93  * incompatible here it can lead to problems.
94  *
95  * @param optimizedOptions
96  * @param networkProperties
97  */
98 void ValidateSourcesMatchOptimizedNetwork(std::vector<BackendOptions> optimizedOptions,
99  const INetworkProperties& networkProperties)
100 {
101  // Find the "Global" backend options. During the optimize phase the values of importEnabled and exportEnabled are
102  // added as backend options.
103  const vector<BackendOptions>::iterator& backendItr =
104  find_if(optimizedOptions.begin(), optimizedOptions.end(), [](const BackendOptions& backend) {
105  if (backend.GetBackendId().Get() == "Global")
106  {
107  return true;
108  }
109  else
110  {
111  return false;
112  }
113  });
114  bool importEnabled = false;
115  bool exportEnabled = false;
116  if (backendItr != optimizedOptions.end())
117  {
118  // Find the importEnabled and exportEnabled values.
119  for (size_t i = 0; i < backendItr->GetOptionCount(); i++)
120  {
121  const BackendOptions::BackendOption& option = backendItr->GetOption(i);
122  if (option.GetName() == "ImportEnabled")
123  {
124  importEnabled = option.GetValue().AsBool();
125  }
126  if (option.GetName() == "ExportEnabled")
127  {
128  exportEnabled = option.GetValue().AsBool();
129  }
130  }
131  }
132 
133  // Now that we have values for import and export compare them to the MemorySource variables.
134  // Any value of MemorySource that's not "Undefined" implies that we need to do an import of some kind.
135  if ((networkProperties.m_InputSource == MemorySource::Undefined && importEnabled) ||
136  (networkProperties.m_InputSource != MemorySource::Undefined && !importEnabled))
137  {
138  auto message = fmt::format("The input memory source specified, '{0}',", networkProperties.m_InputSource);
139  if (!importEnabled)
140  {
141  message.append(" requires that memory import be enabled. However, "
142  "it was disabled when this network was optimized.");
143  }
144  else
145  {
146  message.append(" requires that memory import be disabled. However, "
147  "it was enabled when this network was optimized.");
148  }
149  throw InvalidArgumentException(message);
150  }
151 
152  if ((networkProperties.m_OutputSource == MemorySource::Undefined && exportEnabled) ||
153  (networkProperties.m_OutputSource != MemorySource::Undefined && !exportEnabled))
154  {
155  auto message = fmt::format("The output memory source specified, '{0}',", networkProperties.m_OutputSource);
156  if (!exportEnabled)
157  {
158  message.append(" requires that memory export be enabled. However, "
159  "it was disabled when this network was optimized.");
160  }
161  else
162  {
163  message.append(" requires that memory export be disabled. However, "
164  "it was enabled when this network was optimized.");
165  }
166  throw InvalidArgumentException(message);
167  }
168 } // anonymous
169 
170 std::unique_ptr<LoadedNetwork> LoadedNetwork::MakeLoadedNetwork(std::unique_ptr<IOptimizedNetwork> net,
171  std::string& errorMessage,
172  const INetworkProperties& networkProperties,
173  arm::pipe::IProfilingService* profilingService)
174 {
175  std::unique_ptr<LoadedNetwork> loadedNetwork;
176 
177  auto Fail = [&](const std::exception& error) -> std::unique_ptr<LoadedNetwork>
178  {
179  errorMessage = ToErrorMessage("An error occurred when preparing the network workloads: ", error);
180  ARMNN_LOG(error) << errorMessage;
181 
182  return std::unique_ptr<LoadedNetwork>();
183  };
184 
185  try
186  {
187  loadedNetwork.reset(new LoadedNetwork(std::move(net), networkProperties, profilingService));
188  }
189  catch (const armnn::RuntimeException& error)
190  {
191  return Fail(error);
192  }
193  catch (const armnn::Exception& error)
194  {
195  return Fail(error);
196  }
197  catch (const std::runtime_error& error)
198  {
199  return Fail(error);
200  }
201 
202  return loadedNetwork;
203 }
204 
205 LoadedNetwork::LoadedNetwork(std::unique_ptr<IOptimizedNetwork> net,
206  const INetworkProperties& networkProperties,
207  arm::pipe::IProfilingService* profilingService) :
208  m_OptimizedNetwork(std::move(net)),
209  m_NetworkProperties(networkProperties),
210  m_TensorHandleFactoryRegistry(),
211  m_ProfilingService(profilingService)
212 {
214  // Get the profiler and register it for the current thread.
215  const std::shared_ptr<IProfiler>& profiler = m_OptimizedNetwork->GetProfiler();
217 
218  profiler->EnableProfiling(networkProperties.m_ProfilingEnabled);
219 
220  profiler->EnableNetworkDetailsToStdOut(networkProperties.m_OutputNetworkDetailsMethod);
221 
222  // We need to check that the memory sources match up with the values of import and export specified during the
223  // optimize phase. If they don't this will throw an exception.
224  ValidateSourcesMatchOptimizedNetwork(m_OptimizedNetwork.get()->pOptimizedNetworkImpl->GetModelOptions(),
225  m_NetworkProperties);
226 
227  //First create tensor handlers, backends and workload factories.
228  //Handlers are created before workloads are.
229  //Because workload creation can modify some of the handlers,
230  //(for example the splitter and concat layers).
231 
232  bool useExternalMemoryManager = false;
233  bool useInternalMemoryManager = false;
234  Graph& order = m_OptimizedNetwork->pOptimizedNetworkImpl->GetGraph();
235  // Ensure Topological order
236  order.SetLayersOutOfOrder();
237  order.TopologicalSort();
238 
239  if (!networkProperties.m_AsyncEnabled)
240  {
241  m_IsInputImported = std::vector<bool>(order.GetNumInputs(), false);
242  m_IsOutputImported = std::vector<bool>(order.GetNumOutputs(), false);
243  }
244 
245  for (auto&& layer : order)
246  {
247  auto const& backendId = layer->GetBackendId();
248  if (m_Backends.count(backendId) == 0)
249  {
250  auto createBackend = BackendRegistryInstance().GetFactory(backendId);
251  auto it = m_Backends.emplace(std::make_pair(backendId, createBackend()));
252 
253  IBackendInternal* backend = it.first->second.get();
254 
255  // If we're doing async execution verify that the backend supports it and ExternallyManagedMemory.
256  if (networkProperties.m_AsyncEnabled)
257  {
258  if (!HasCapability(BackendOptions::BackendOption{"AsyncExecution", true}, backend->GetCapabilities()))
259  {
260  std::string er = backend->GetId();
261  er += " does not support AsyncExecution";
262  throw BackendCapabilityException(er);
263  }
264  if (!HasCapability(BackendOptions::BackendOption{"ExternallyManagedMemory", true},
265  backend->GetCapabilities()))
266  {
267  std::string er = backend->GetId();
268  er += " does not support ExternallyManagedMemory\n";
269  er += "AsyncEnabled networks require all backends to support ExternallyManagedMemory";
270  throw BackendCapabilityException(er);
271  }
272  m_SupportsExternallyManagedMemory[backend->GetId()] = true;
273  useExternalMemoryManager = true;
274  }
275  else
276  {
277  m_SupportsExternallyManagedMemory[backend->GetId()] = false;
278  useInternalMemoryManager = true;
279  }
280 
282  if (backend->SupportsTensorAllocatorAPI())
283  {
284  workloadFactory = backend->CreateWorkloadFactory(
285  m_TensorHandleFactoryRegistry,
286  m_OptimizedNetwork->pOptimizedNetworkImpl->GetModelOptions(),
287  static_cast<MemorySourceFlags>(m_NetworkProperties.m_InputSource),
288  static_cast<MemorySourceFlags>(m_NetworkProperties.m_OutputSource));
289  }
290  else
291  {
292  m_BackendMemoryMangers.emplace_back(backend->CreateMemoryManager());
293  workloadFactory = backend->CreateWorkloadFactory(
294  m_BackendMemoryMangers.back(), m_OptimizedNetwork->pOptimizedNetworkImpl->GetModelOptions());
295  }
296  m_WorkloadFactories[backendId ] = std::move(workloadFactory);
297  }
298  }
299 
300  if (!networkProperties.m_AsyncEnabled)
301  {
302  for (auto&& layer : order)
303  {
304  auto& workloadFactory = GetWorkloadFactory(*layer);
305  bool supportsExternalManager = m_SupportsExternallyManagedMemory[layer->GetBackendId()];
306 
307  switch (layer->GetType())
308  {
309  case LayerType::Input:
311  {
312  // If IsImportEnabled is true then we need to set IsMemoryManaged
313  // to false when creating TensorHandles
314  layer->CreateTensorHandles(m_TensorHandleFactoryRegistry,
315  workloadFactory,
316  !supportsExternalManager && !m_NetworkProperties.m_ImportEnabled);
317  break;
318  }
319  case LayerType::Constant:
320  {
321  layer->CreateTensorHandles(m_TensorHandleFactoryRegistry, workloadFactory, true);
322  break;
323  }
324  default:
325  {
326  // Look for a layer with 1 OutputSlot which has 1 connection and that connection is an Output Layer
327  // If Export is enabled disable memory management so we can export, otherwise we do a copy
328  if ((layer->GetNumOutputSlots() == 1) &&
329  (layer->GetOutputSlots()[0].GetNumConnections() == 1) &&
330  (layer->GetOutputSlots()[0].GetConnection(0)->GetOwningLayer().GetType() == LayerType::Output))
331  {
332  layer->CreateTensorHandles(m_TensorHandleFactoryRegistry,
333  workloadFactory,
334  !supportsExternalManager && !m_NetworkProperties.m_ExportEnabled);
335  }
336  else
337  {
338  layer->CreateTensorHandles(m_TensorHandleFactoryRegistry,
339  workloadFactory,
340  !supportsExternalManager);
341  }
342  }
343  }
344  }
345  }
346 
347  ProfilingGuid networkGuid = m_OptimizedNetwork->GetGuid();
348  std::unique_ptr<TimelineUtilityMethods> timelineUtils =
349  TimelineUtilityMethods::GetTimelineUtils(*m_ProfilingService);
350  if (timelineUtils)
351  {
352  timelineUtils->CreateTypedEntity(networkGuid, LabelsAndEventClasses::NETWORK_GUID);
353  // Mark the network with a start of life event
354  timelineUtils->RecordEvent(networkGuid, LabelsAndEventClasses::ARMNN_PROFILING_SOL_EVENT_CLASS);
355  // and with the process ID
356  int processID = arm::pipe::GetCurrentProcessId();
357  std::stringstream ss;
358  ss << processID;
359  timelineUtils->MarkEntityWithLabel(networkGuid, ss.str(), LabelsAndEventClasses::PROCESS_ID_GUID);
360  }
361 
362  std::vector<IWorkload*> ConstWorkloads;
363 
364  //Then create workloads.
365  {
366  ARMNN_SCOPED_PROFILING_EVENT(Compute::Undefined, "LoadNetwork_CreateWorkloads");
367  for (auto&& layer: order)
368  {
369  if (timelineUtils)
370  {
371  // Add layer to the post-optimisation network structure
372  AddLayerStructure(timelineUtils, *layer, networkGuid);
373  }
374 
375  const IWorkloadFactory& workloadFactory = GetWorkloadFactory(*layer);
376 
377  switch (layer->GetType())
378  {
379  case LayerType::Input:
380  case LayerType::Output:
381  {
382  // Inputs and outputs are treated in a special way - see EnqueueInput() and EnqueueOutput().
383  break;
384  }
385  default:
386  {
387  auto workload = layer->CreateWorkload(workloadFactory);
388 
389  if (!workload)
390  {
391  const char* const layerName =
392  layer->GetNameStr().length() != 0 ? layer->GetName() : "<Unnamed>";
393  throw InvalidArgumentException(
394  fmt::format("No workload created for layer (name: '{0}' type: '{1}') (compute '{2}')",
395  layerName, static_cast<int>(layer->GetType()), layer->GetBackendId().Get()
396  ));
397  }
398 
399  if (timelineUtils)
400  {
401  // Add workload to the post-optimisation network structure
402  AddWorkloadStructure(timelineUtils, workload, *layer);
403  }
404 
405  // For async networks ConstantWorkloads are managed exclusively by LoadedNetwork
406  // and are separated out from the other workloads
407  if((networkProperties.m_AsyncEnabled || useExternalMemoryManager) &&
408  layer->GetType() == LayerType::Constant)
409  {
410  m_ConstantTensorHandles[layer->GetGuid()] =
411  layer->GetOutputSlot(0).GetOutputHandler().GetData();
412  m_ConstantWorkloads[layer->GetGuid()] = std::move(workload);
413  }
414  else
415  {
416  m_WorkloadQueue.push_back(std::move(workload));
417 
418  if (layer->GetType() == LayerType::Constant)
419  {
420  // Place the Constant Workloads into a queue so that they can be executed first
421  ConstWorkloads.push_back(m_WorkloadQueue.back().get());
422  }
423  }
424  // release the constant data in the layer.
425  layer->ReleaseConstantData();
426  break;
427  }
428  }
429  }
430  }
431 
432  // Gather information about workloads for inputs & outputs
433  if (!networkProperties.m_AsyncEnabled && m_WorkloadQueue.size() != 0)
434  {
435  const int noOfInputs = armnn::numeric_cast<int>(order.GetNumInputs());
436 
437  // Get indices of all workloads connected to each input and
438  // check if they support tensor handle replacement
439  for (const BindableLayer* layer: order.GetInputLayers())
440  {
441  const auto bindingId = layer->GetBindingId();
442 
443  bool supportsReplacement = true;
444 
445  for (const auto inputSlot: layer->GetOutputSlot(0).GetConnections())
446  {
447  auto workloadIndex = std::distance(order.begin(), order.GetPosInGraph(inputSlot->GetOwningLayer()));
448  workloadIndex -= noOfInputs;
449 
450  m_InputWorkloadSlotPairs[bindingId].emplace_back(WorkloadIndices{
451  armnn::numeric_cast<unsigned int>(workloadIndex), inputSlot->GetSlotIndex()});
452 
453  auto workload = m_WorkloadQueue[m_InputWorkloadSlotPairs[bindingId].back().m_WorkloadIndex].get();
454  supportsReplacement &= workload->SupportsTensorHandleReplacement();
455  }
456 
457  ITensorHandleFactory::FactoryId factoryId = layer->GetOutputSlot(0).GetTensorHandleFactoryId();
458  // Get matching import factory Id
459  ITensorHandleFactory::FactoryId importFactoryId =
460  m_TensorHandleFactoryRegistry.GetMatchingImportFactoryId(factoryId);
461 
462  ITensorHandleFactory *importFactory = m_TensorHandleFactoryRegistry.GetFactory(importFactoryId);
463 
464  if (supportsReplacement && importFactory)
465  {
466  m_PreImportedInputHandles.emplace_back(
467  bindingId, importFactory->CreateTensorHandle(layer->GetOutputSlot(0).GetTensorInfo(), false));
468  }
469  else
470  {
471  m_PreImportedInputHandles.emplace_back(bindingId, nullptr);
472  }
473  }
474 
475  // Get indices of all workloads connected to each output and
476  // check if they support tensor handle replacement
477  for (const BindableLayer* layer: order.GetOutputLayers())
478  {
479  const auto bindingId = layer->GetBindingId();
480 
481  const auto outputSlot = layer->GetInputSlot(0).GetConnectedOutputSlot();
482  auto& indices = m_OutputWorkloadSlotPairs[bindingId];
483 
484  auto workloadIndex = std::distance(order.begin(), order.GetPosInGraph(outputSlot->GetOwningLayer()));
485  workloadIndex -= noOfInputs;
486 
487  indices.m_OutputSlotIndices = WorkloadIndices{numeric_cast<unsigned int>(workloadIndex),
488  outputSlot->CalculateIndexOnOwner()};
489 
490  bool supportsReplacement = true;
491  auto outputWorkload = m_WorkloadQueue[indices.m_OutputSlotIndices.m_WorkloadIndex].get();
492  supportsReplacement &= outputWorkload->SupportsTensorHandleReplacement();
493 
494  for (auto &inputSlot: outputSlot->GetConnections())
495  {
496  if(inputSlot->GetOwningLayer().GetType() != LayerType::Output)
497  {
498  auto inWorkloadIndex = std::distance(order.begin(),
499  order.GetPosInGraph(inputSlot->GetOwningLayer()));
500  inWorkloadIndex -= noOfInputs;
501  indices.m_InputSlotIndices.emplace_back(WorkloadIndices{numeric_cast<unsigned int>(inWorkloadIndex),
502  inputSlot->GetSlotIndex()});
503  auto inputWorkload = m_WorkloadQueue[indices.m_InputSlotIndices.back().m_WorkloadIndex].get();
504  supportsReplacement &= inputWorkload->SupportsTensorHandleReplacement();
505  }
506  }
507 
508  ITensorHandleFactory::FactoryId factoryId = outputSlot->GetTensorHandleFactoryId();
509  // Get matching import factory Id
510  ITensorHandleFactory::FactoryId importFactoryId =
511  m_TensorHandleFactoryRegistry.GetMatchingImportFactoryId(factoryId);
512  ITensorHandleFactory *importFactory = m_TensorHandleFactoryRegistry.GetFactory(importFactoryId);
513 
514  if (supportsReplacement && importFactory)
515  {
516  m_PreImportedOutputHandles.emplace_back(
517  bindingId, importFactory->CreateTensorHandle(outputSlot->GetTensorInfo(), false));
518  }
519  else
520  {
521  m_PreImportedOutputHandles.emplace_back(bindingId, nullptr);
522  }
523  }
524  }
525 
526  for (auto&& workloadFactory : m_WorkloadFactories)
527  {
528  workloadFactory.second->AfterWorkloadsCreated();
529  }
530 
531  if (timelineUtils)
532  {
533  // Commit to send the post-optimisation network structure
534  timelineUtils->Commit();
535  }
536 
537  if (useExternalMemoryManager)
538  {
539  if (networkProperties.m_AsyncEnabled)
540  {
541  CreateMemoryProfileAsync();
542  }
543  else
544  {
545  CreateMemoryProfile();
546  }
547 
548  auto backendStrategyMap = BackendRegistryInstance().GetMemoryOptimizerStrategies();
549  for (auto& backendMemoryProfile : m_MemBlockMap)
550  {
551  const BackendId& backendId = backendMemoryProfile.first;
552  if (backendStrategyMap.find(backendId) != backendStrategyMap.end())
553  {
554  m_MemBinMap[backendId] = backendStrategyMap[backendId]->Optimize(backendMemoryProfile.second);
555  }
556  else
557  {
558  m_MemBinMap[backendId] = m_ConstantStrategy->Optimize(backendMemoryProfile.second);
559  }
560  }
561 
562  if (!networkProperties.m_AsyncEnabled)
563  {
564  m_ExternalMemoryManager = CreateExternalMemoryManger(m_TensorMemory);
565 
566  // Sort m_TensorMemory, so it's order matches m_Tensorhandles
567  std::sort(m_TensorMemory.begin(), m_TensorMemory.end(),
568  [](const std::pair<std::shared_ptr<TensorMemory>, MemorySource>& lhs,
569  const std::pair<std::shared_ptr<TensorMemory>, MemorySource>& rhs)
570  {
571  return lhs.first->m_OutputSlotId < rhs.first->m_OutputSlotId;
572  });
573  }
574  }
575 
576  // Now that the intermediate tensor memory has been set-up,
577  // do any post allocation configuration for each workload.
578  if (!networkProperties.m_AsyncEnabled)
579  {
580  if (useInternalMemoryManager)
581  {
582  // Set up memory.
583  m_OptimizedNetwork->pOptimizedNetworkImpl->GetGraph().AllocateDynamicBuffers();
584  }
585 
586  for (auto &workload : m_WorkloadQueue)
587  {
588  workload->PostAllocationConfigure();
589  }
590  }
591 
592  if (useExternalMemoryManager)
593  {
594  if (!networkProperties.m_AsyncEnabled)
595  {
596  AllocateAndExecuteConstantWorkloads();
597  }
598  else
599  {
600  AllocateAndExecuteConstantWorkloadsAsync();
601  }
602  }
603  // If synchronous, execute all constant layer workloads
604  if (!networkProperties.m_AsyncEnabled)
605  {
606  for (auto workload: ConstWorkloads)
607  {
608  workload->Execute();
609  }
610  }
611 }
612 
613 void LoadedNetwork::AllocateAndExecuteConstantWorkloads()
614 {
615  ARMNN_SCOPED_PROFILING_EVENT(Compute::Undefined, "LoadNetwork_AllocateAndExecuteConstants");
616  for (auto& pair : m_ConstantWorkloads)
617  {
618  auto tensorHandle = m_ConstantTensorHandles[pair.first];
619  tensorHandle->Allocate();
620  pair.second->Execute();
621  }
622 }
623 
624 void LoadedNetwork::AllocateAndExecuteConstantWorkloadsAsync()
625 {
626  ARMNN_SCOPED_PROFILING_EVENT(Compute::Undefined, "LoadNetwork_AllocateAndExecuteConstants");
627  Graph& order = m_OptimizedNetwork->pOptimizedNetworkImpl->GetGraph();
628  for (auto&& layer : order)
629  {
630  if (layer->GetType() == LayerType::Constant)
631  {
632  const auto& outSlot = layer->GetOutputSlots()[0];
633  const auto factoryId = outSlot.GetTensorHandleFactoryId();
635  auto& workloadFactory = GetWorkloadFactory(*layer);
636 
637  layer->CreateTensorHandles(m_TensorHandleFactoryRegistry, workloadFactory);
638  ITensorHandle* tensorHandle = outSlot.GetOutputHandler().GetData();
639 
640  m_ConstantTensorHandles[layer->GetGuid()] = tensorHandle;
641  tensorHandle->Allocate();
642 
643  auto& backend = m_Backends.at(layer->GetBackendId());
644 
645  WorkingMemDescriptor memDesc;
646  memDesc.m_Outputs.push_back(tensorHandle);
647 
648  ExecutionData executionData = backend->CreateExecutionData(memDesc);
649  m_ConstantWorkloads[layer->GetGuid()]->ExecuteAsync(executionData);
650  }
651  }
652 }
653 
654 void LoadedNetwork::SendNetworkStructure(arm::pipe::IProfilingService& profilingService)
655 {
656  ARMNN_SCOPED_PROFILING_EVENT(Compute::Undefined, "LoadNetwork_SendNetworkStructure");
657  Graph& order = m_OptimizedNetwork->pOptimizedNetworkImpl->GetGraph().TopologicalSort();
658  ProfilingGuid networkGuid = m_OptimizedNetwork->GetGuid();
659 
660  std::unique_ptr<TimelineUtilityMethods> timelineUtils =
661  TimelineUtilityMethods::GetTimelineUtils(profilingService);
662 
663  timelineUtils->CreateTypedEntity(networkGuid, LabelsAndEventClasses::NETWORK_GUID);
664 
665  for (auto&& layer : order)
666  {
667  // Add layer to the post-optimisation network structure
668  AddLayerStructure(timelineUtils, *layer, networkGuid);
669  switch (layer->GetType())
670  {
671  case LayerType::Input:
672  case LayerType::Output:
673  {
674  // Inputs and outputs are treated in a special way - see EnqueueInput() and EnqueueOutput().
675  break;
676  }
677  default:
678  {
679  for (auto& workload : m_WorkloadQueue)
680  {
681  // Add workload to the post-optimisation network structure
682  AddWorkloadStructure(timelineUtils, workload, *layer);
683  }
684  break;
685  }
686  }
687  }
688  // Commit to send the post-optimisation network structure
689  timelineUtils->Commit();
690 }
691 
693 {
694  return m_OptimizedNetwork->GetGuid();
695 }
696 
698 {
699  for (auto&& inputLayer : m_OptimizedNetwork->pOptimizedNetworkImpl->GetGraph().GetInputLayers())
700  {
701  ARMNN_ASSERT_MSG(inputLayer->GetNumOutputSlots() == 1, "Input layer should have exactly 1 output slot");
702  if (inputLayer->GetBindingId() == layerId)
703  {
704  return inputLayer->GetOutputSlot(0).GetTensorInfo();
705  }
706  }
707 
708  throw InvalidArgumentException(fmt::format("No input layer is associated with id {}", layerId));
709 }
710 
712 {
713  for (auto&& outputLayer : m_OptimizedNetwork->pOptimizedNetworkImpl->GetGraph().GetOutputLayers())
714  {
715  ARMNN_ASSERT_MSG(outputLayer->GetNumInputSlots() == 1, "Output layer should have exactly 1 input slot");
716  ARMNN_ASSERT_MSG(outputLayer->GetInputSlot(0).GetConnection(), "Input slot on Output layer must be connected");
717  if (outputLayer->GetBindingId() == layerId)
718  {
719  return outputLayer->GetInputSlot(0).GetConnection()->GetTensorInfo();
720  }
721  }
722 
723  throw InvalidArgumentException(fmt::format("No output layer is associated with id {}", layerId));
724 }
725 
726 const IWorkloadFactory& LoadedNetwork::GetWorkloadFactory(const Layer& layer) const
727 {
728  const IWorkloadFactory* workloadFactory = nullptr;
729 
730  auto it = m_WorkloadFactories.find(layer.GetBackendId());
731  if (it == m_WorkloadFactories.end())
732  {
733  throw RuntimeException(fmt::format("No workload factory for {0} to be used for layer: {1}",
734  layer.GetBackendId().Get(),
735  layer.GetNameStr()),
736  CHECK_LOCATION());
737  }
738 
739  workloadFactory = it->second.get();
740 
741  ARMNN_ASSERT_MSG(workloadFactory, "No workload factory");
742 
743  return *workloadFactory;
744 }
745 
746 namespace {
747 
748 // Non-copyable class owning accelerator-specific tensor data.
749 class TensorPin
750 {
751 public:
752  TensorPin(std::unique_ptr<ITensorHandle> handle, const TensorInfo& info, LayerBindingId id)
753  : m_TensorHandle(std::move(handle))
754  , m_TensorInfo(info)
755  , m_Id(id)
756  {
757  }
758 
759  ITensorHandle* GetTensorHandle() const { return m_TensorHandle.get(); }
760  const TensorInfo& GetTensorInfo() const { return m_TensorInfo; }
761  LayerBindingId GetBindingId() const { return m_Id; }
762 
763 private:
764  std::unique_ptr<ITensorHandle> m_TensorHandle;
765  TensorInfo m_TensorInfo;
766  LayerBindingId m_Id;
767 };
768 
769 static const TensorPin& GetTensorPin(LayerBindingId id,
770  const std::vector<TensorPin>& pins,
771  char const* bindingPointDesc)
772 {
773  auto it = std::find_if(pins.begin(), pins.end(),
774  [id](const TensorPin& pin)
775  {
776  return pin.GetBindingId() == id;
777  });
778 
779  if (it != pins.end())
780  {
781  return *it;
782  }
783  else
784  {
785  throw InvalidArgumentException(fmt::format("No tensor supplied for {0} {1}", bindingPointDesc, id));
786  }
787 }
788 
789 // Stores data that needs to be kept accessible for the entire execution of a workload.
790 class WorkloadData
791 {
792 public:
793  WorkloadData(const InputTensors& inputTensors, const OutputTensors& outputTensors)
794  {
795  m_InputTensorPins.reserve(inputTensors.size());
796  m_OutputTensorPins.reserve(outputTensors.size());
797 
798  for (auto inputTensorPair : inputTensors)
799  {
800  auto inputTensor = inputTensorPair.second;
801 
802  std::unique_ptr<ITensorHandle> tensorHandle =
803  std::make_unique<ConstPassthroughTensorHandle>(inputTensor.GetInfo(),inputTensor.GetMemoryArea());
804  LayerBindingId layerId = inputTensorPair.first;
805 
806  m_InputTensorPins.emplace_back(std::move(tensorHandle), inputTensor.GetInfo(), layerId);
807  }
808 
809  for (auto outputTensorPair : outputTensors)
810  {
811  auto outputTensor = outputTensorPair.second;
812 
813  std::unique_ptr<ITensorHandle> tensorHandle =
814  std::make_unique<PassthroughTensorHandle>(outputTensor.GetInfo(), outputTensor.GetMemoryArea());
815  LayerBindingId layerId = outputTensorPair.first;
816 
817  m_OutputTensorPins.emplace_back(std::move(tensorHandle), outputTensor.GetInfo(), layerId);
818  }
819  }
820 
821  const TensorPin& GetInputTensorPin(LayerBindingId id) const
822  {
823  return GetTensorPin(id, m_InputTensorPins, "input");
824  }
825 
826  const TensorPin& GetOutputTensorPin(LayerBindingId id) const
827  {
828  return GetTensorPin(id, m_OutputTensorPins, "output");
829  }
830 
831 private:
832 
833  std::vector<TensorPin> m_InputTensorPins;
834  std::vector<TensorPin> m_OutputTensorPins;
835 };
836 
837 }
838 
840  const OutputTensors& outputTensors,
841  std::vector<ImportedInputId> preImportedInputIds,
842  std::vector<ImportedOutputId> preImportedOutputIds)
843 {
844  const Graph& graph = m_OptimizedNetwork->pOptimizedNetworkImpl->GetGraph();
845 
846  // Walk graph to determine the order of execution.
847  if (graph.GetNumLayers() < 2)
848  {
849  ARMNN_LOG(warning) << "IRuntime::EnqueueWorkload()::Less than two nodes in graph";
850  return Status::Failure;
851  }
852 
853  // Data that must be kept alive for the entire execution of the workload.
854  WorkloadData workloadData(inputTensors, outputTensors);
855 
856  // Input tensors can be provided as parameters or pre imported. Either way the number of
857  // tensors should match the number of inputs.
858  if (graph.GetNumInputs() != (inputTensors.size() + preImportedInputIds.size()))
859  {
860  throw InvalidArgumentException("Number of inputs provided does not match network.");
861  }
862 
863  // For each input to the network, call EnqueueInput with the data passed by the user.
864  {
866  m_InputQueue.clear();
867  m_InputQueue.reserve(graph.GetNumInputs());
868 
869  unsigned int inputIndex = 0;
870  unsigned int importedInputIdIndex = 0;
871  std::sort(preImportedInputIds.begin(), preImportedInputIds.end());
872  for (const BindableLayer* inputLayer : graph.GetInputLayers())
873  {
874  if (importedInputIdIndex < preImportedInputIds.size() &&
875  inputIndex == preImportedInputIds[importedInputIdIndex])
876  {
877  // Only replace tensorhandles if they have not already been replaced
878  if (!m_IsInputImported[inputIndex])
879  {
880  auto outputTensorHandle = m_PreImportedInputHandles[inputIndex].m_TensorHandle.get();
881 
882  for (const auto& workloadInfo: m_InputWorkloadSlotPairs[inputLayer->GetBindingId()])
883  {
884  auto workload = m_WorkloadQueue[workloadInfo.m_WorkloadIndex].get();
885  workload->ReplaceInputTensorHandle(outputTensorHandle, workloadInfo.m_SlotIndex);
886  }
887  m_IsInputImported[inputIndex] = true;
888  }
889  importedInputIdIndex++;
890  }
891  else
892  {
893  if (m_IsInputImported[inputIndex])
894  {
895  OutputHandler& handler = const_cast<OutputHandler&>(inputLayer->GetOutputHandler(0));
896 
897  for (const auto& workloadInfo: m_InputWorkloadSlotPairs[inputLayer->GetBindingId()])
898  {
899  auto workload = m_WorkloadQueue[workloadInfo.m_WorkloadIndex].get();
900  workload->ReplaceInputTensorHandle(handler.GetData(), workloadInfo.m_SlotIndex);
901  }
902 
903  m_IsInputImported[inputIndex] = false;
904  }
905 
906  // InputTensorHandle is not imported yet, process to enqueue input
907  const TensorPin& pin = workloadData.GetInputTensorPin(inputLayer->GetBindingId());
908  EnqueueInput(*inputLayer, pin.GetTensorHandle(), pin.GetTensorInfo());
909  }
910  inputIndex++;
911  }
912  }
913  // For each output to the network, call EnqueueOutput with the data passed by the user.
914  {
916  m_OutputQueue.clear();
917  m_OutputQueue.reserve(graph.GetNumOutputs());
918 
919  if (preImportedOutputIds.size() > graph.GetNumOutputs())
920  {
921  throw InvalidArgumentException("Invalid number of preImportedOutputIds");
922  }
923 
924  unsigned int outputIndex = 0;
925  unsigned int importedOutputIdIndex = 0;
926  std::sort(preImportedOutputIds.begin(), preImportedOutputIds.end());
927  for (const BindableLayer* outputLayer : graph.GetOutputLayers())
928  {
929  if (importedOutputIdIndex < preImportedOutputIds.size() &&
930  outputIndex == preImportedOutputIds[importedOutputIdIndex])
931  {
932  // Only replace tensorhandles if they have not already been replaced
933  ITensorHandle* inputTensorHandle = m_PreImportedOutputHandles[outputIndex].m_TensorHandle.get();
934 
935  if (!m_IsOutputImported[outputIndex])
936  {
937  const auto bindingId = outputLayer->GetBindingId();
938  const auto& indices = m_OutputWorkloadSlotPairs[bindingId];
939 
940  auto outputWorkload = m_WorkloadQueue[indices.m_OutputSlotIndices.m_WorkloadIndex].get();
941 
942  outputWorkload->ReplaceOutputTensorHandle(inputTensorHandle,
943  indices.m_OutputSlotIndices.m_SlotIndex);
944 
945  for (const auto& workloadInfo: indices.m_InputSlotIndices)
946  {
947  auto inputWorkload = m_WorkloadQueue[workloadInfo.m_WorkloadIndex].get();
948  inputWorkload->ReplaceInputTensorHandle(inputTensorHandle, workloadInfo.m_SlotIndex);
949  }
950  m_IsOutputImported[outputIndex] = true;
951  }
952 
953  ARMNN_ASSERT_MSG(inputTensorHandle != nullptr, "Data should have been allocated.");
954  MemSyncQueueDescriptor syncDesc;
955  syncDesc.m_Inputs.push_back(inputTensorHandle);
957  info.m_InputTensorInfos.push_back(
958  outputLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo());
959  auto syncWorkload = std::make_unique<SyncMemGenericWorkload>(syncDesc, info);
960  ARMNN_ASSERT_MSG(syncWorkload, "No sync workload created");
961  m_OutputQueue.push_back(move(syncWorkload));
962  importedOutputIdIndex++;
963  }
964  else
965  {
966  if (m_IsOutputImported[outputIndex])
967  {
968  const auto bindingId = outputLayer->GetBindingId();
969  const auto& indices = m_OutputWorkloadSlotPairs[bindingId];
970 
971  auto outputWorkload = m_WorkloadQueue[indices.m_OutputSlotIndices.m_WorkloadIndex].get();
972  const OutputHandler& outputHandler =
973  outputLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetOutputHandler();
974 
975  outputWorkload->ReplaceOutputTensorHandle(
976  outputHandler.GetData(), indices.m_OutputSlotIndices.m_SlotIndex);
977 
978  for (const auto& workloadInfo: indices.m_InputSlotIndices)
979  {
980  auto inputWorkload = m_WorkloadQueue[workloadInfo.m_WorkloadIndex].get();
981  inputWorkload->ReplaceInputTensorHandle(outputHandler.GetData(), workloadInfo.m_SlotIndex);
982  }
983  m_IsOutputImported[outputIndex] = false;
984  }
985 
986  const TensorPin& pin = workloadData.GetOutputTensorPin(outputLayer->GetBindingId());
987  // OutputTensorHandle is not imported yet, process to enqueue Output
988  EnqueueOutput(*outputLayer, pin.GetTensorHandle(), pin.GetTensorInfo());
989  }
990  outputIndex++;
991  }
992  }
993 
994  std::unique_ptr<TimelineUtilityMethods> timelineUtils =
995  TimelineUtilityMethods::GetTimelineUtils(*m_ProfilingService);
996  ProfilingGuid inferenceGuid = m_ProfilingService->GetNextGuid();
997  if (timelineUtils)
998  {
999  // Add inference timeline trace if profiling is enabled.
1000  ProfilingGuid networkGuid = m_OptimizedNetwork->GetGuid();
1001  timelineUtils->CreateTypedEntity(inferenceGuid, LabelsAndEventClasses::INFERENCE_GUID);
1002  timelineUtils->CreateRelationship(ProfilingRelationshipType::RetentionLink,
1003  networkGuid,
1004  inferenceGuid,
1005  LabelsAndEventClasses::EXECUTION_OF_GUID);
1006  timelineUtils->RecordEvent(inferenceGuid, LabelsAndEventClasses::ARMNN_PROFILING_SOL_EVENT_CLASS);
1007  }
1008 
1009  bool executionSucceeded = true;
1010 
1011  {
1012  if (m_ProfilingService->IsProfilingEnabled())
1013  {
1014  m_ProfilingService->IncrementCounterValue(INFERENCES_RUN);
1015  }
1017  ARMNN_SCOPED_HEAP_PROFILING("Executing");
1018  executionSucceeded = Execute(timelineUtils, inferenceGuid);
1019  }
1020 
1021  if (timelineUtils)
1022  {
1023  // Add end of life of the inference timeline if profiling is enabled.
1024  timelineUtils->RecordEvent(inferenceGuid, LabelsAndEventClasses::ARMNN_PROFILING_EOL_EVENT_CLASS);
1025  timelineUtils->Commit();
1026  }
1027 
1028  return executionSucceeded ? Status::Success : Status::Failure;
1029 }
1030 
1031 void LoadedNetwork::EnqueueInput(const BindableLayer& layer, ITensorHandle* tensorHandle, const TensorInfo& tensorInfo)
1032 {
1033  if (layer.GetType() != LayerType::Input)
1034  {
1035  throw InvalidArgumentException("EnqueueInput: given layer not an InputLayer");
1036  }
1037 
1038  if (tensorHandle == nullptr)
1039  {
1040  throw InvalidArgumentException("EnqueueInput: tensorHandle must not be NULL");
1041  }
1042 
1043  InputQueueDescriptor inputQueueDescriptor;
1044  WorkloadInfo info;
1045 
1046  inputQueueDescriptor.m_Inputs.push_back(tensorHandle);
1047  info.m_InputTensorInfos.push_back(tensorInfo);
1048 
1049  ARMNN_ASSERT_MSG(layer.GetNumOutputSlots() == 1, "Can only handle Input Layer with one output");
1050  const OutputHandler& handler = layer.GetOutputHandler();
1051  const TensorInfo& outputTensorInfo = handler.GetTensorInfo();
1052  ITensorHandle* outputTensorHandle = handler.GetData();
1053  ARMNN_ASSERT_MSG(outputTensorHandle != nullptr,
1054  "Data should have been allocated.");
1055  inputQueueDescriptor.m_Outputs.push_back(outputTensorHandle);
1056  info.m_OutputTensorInfos.push_back(outputTensorInfo);
1057 
1058  MemorySourceFlags importFlags = outputTensorHandle->GetImportFlags();
1059  bool needMemCopy = true;
1060  if (m_NetworkProperties.m_ImportEnabled) // Try import the input tensor
1061  {
1062  if(CheckFlag(importFlags, m_NetworkProperties.m_InputSource))
1063  {
1064  needMemCopy = false;
1065  // This assumes a CPU Tensor handle
1066  void* mem = tensorHandle->Map(false);
1067  if (outputTensorHandle->Import(mem, m_NetworkProperties.m_InputSource))
1068  {
1069  tensorHandle->Unmap();
1070  return; // No need for a workload since the import has been done.
1071  }
1072  tensorHandle->Unmap();
1073  throw MemoryImportException("EnqueueInput: Memory Import failed");
1074  }
1075  }
1076  if (needMemCopy)
1077  {
1078  // Create a mem copy workload for input since we did not import
1079  std::unique_ptr<IWorkload> inputWorkload = std::make_unique<CopyMemGenericWorkload>(inputQueueDescriptor, info);
1080 
1081  ARMNN_ASSERT_MSG(inputWorkload, "No input workload created");
1082 
1083  std::unique_ptr<TimelineUtilityMethods> timelineUtils =
1084  TimelineUtilityMethods::GetTimelineUtils(*m_ProfilingService);
1085  if (timelineUtils)
1086  {
1087  // Add Input Workload to the post-optimisation network structure
1088  AddWorkloadStructure(timelineUtils, inputWorkload, layer);
1089  timelineUtils->Commit();
1090  }
1091 
1092  m_InputQueue.push_back(move(inputWorkload));
1093  }
1094 }
1095 
1096 void LoadedNetwork::EnqueueOutput(const BindableLayer& layer, ITensorHandle* tensorHandle, const TensorInfo& tensorInfo)
1097 {
1098  if (layer.GetType() != LayerType::Output)
1099  {
1100  throw InvalidArgumentException("EnqueueOutput: given layer not an OutputLayer");
1101  }
1102 
1103  if (tensorHandle == nullptr)
1104  {
1105  throw InvalidArgumentException("EnqueueOutput: tensorHandle must not be NULL");
1106  }
1107 
1108  OutputQueueDescriptor outputQueueDescriptor;
1109  WorkloadInfo info;
1110 
1111  outputQueueDescriptor.m_Outputs.push_back(tensorHandle);
1112  info.m_OutputTensorInfos.push_back(tensorInfo);
1113 
1114  ARMNN_ASSERT_MSG(layer.GetNumInputSlots() == 1, "Output Layer should have exactly one input.");
1115 
1116  // Gets the output handler from the previous node.
1117  const OutputHandler& outputHandler = layer.GetInputSlots()[0].GetConnectedOutputSlot()->GetOutputHandler();
1118 
1119  const TensorInfo& inputTensorInfo = outputHandler.GetTensorInfo();
1120  ITensorHandle* inputTensorHandle = outputHandler.GetData();
1121  ARMNN_ASSERT_MSG(inputTensorHandle != nullptr, "Data should have been allocated.");
1122 
1123  // Try import the output tensor.
1124  // Note: We can only import the output pointer if all of the following hold true:
1125  // a) The imported pointer is aligned sufficiently
1126  // b) The tensor has zero padding
1127  // c) There is only one connection to the OutputSlot and it is to an OutputLayer.
1128  // d) The output pointer is allocated via malloc. (Other types will be supported in a later release)
1129  // e) m_IsExportEnabled must be set to true
1130  bool needMemCopy = true;
1131  if (m_NetworkProperties.m_ExportEnabled &&
1132  (layer.GetInputSlots()[0].GetConnectedOutputSlot()->GetNumConnections() == 1))
1133  {
1134  if(layer.GetInputSlots()[0].GetConnectedOutputSlot()->GetOwningLayer().GetType() != LayerType::Input)
1135  {
1136  MemorySourceFlags importFlags = inputTensorHandle->GetImportFlags();
1137  if (CheckFlag(importFlags, m_NetworkProperties.m_OutputSource))
1138  {
1139  needMemCopy = false;
1140  void *mem = tensorHandle->Map(false);
1141  bool importOk = inputTensorHandle->Import(mem, m_NetworkProperties.m_OutputSource);
1142  tensorHandle->Unmap();
1143 
1144  if (importOk)
1145  {
1146  // Insert synchronization workload
1147  MemSyncQueueDescriptor syncDesc;
1148  syncDesc.m_Inputs.push_back(inputTensorHandle);
1149  info.m_InputTensorInfos.push_back(inputTensorInfo);
1150  auto syncWorkload = std::make_unique<SyncMemGenericWorkload>(syncDesc, info);
1151  ARMNN_ASSERT_MSG(syncWorkload, "No sync workload created");
1152  m_OutputQueue.push_back(move(syncWorkload));
1153  }
1154  else
1155  {
1156  throw MemoryExportException("EnqueueOutput: Memory Export failed");
1157  }
1158  }
1159  }
1160  }
1161  if (needMemCopy)
1162  {
1163  // If we got here then we didn't export the memory, so add an output workload which performs a memcopy.
1164  outputQueueDescriptor.m_Inputs.push_back(inputTensorHandle);
1165  info.m_InputTensorInfos.push_back(inputTensorInfo);
1166 
1167  std::unique_ptr<IWorkload> outputWorkload =
1168  std::make_unique<CopyMemGenericWorkload>(outputQueueDescriptor, info);
1169  ARMNN_ASSERT_MSG(outputWorkload, "No output workload created");
1170 
1171  std::unique_ptr<TimelineUtilityMethods> timelineUtils =
1172  TimelineUtilityMethods::GetTimelineUtils(*m_ProfilingService);
1173  if (timelineUtils)
1174  {
1175  // Add Output Workload to the post-optimisation network structure
1176  AddWorkloadStructure(timelineUtils, outputWorkload, layer);
1177  timelineUtils->Commit();
1178  }
1179 
1180  m_OutputQueue.push_back(move(outputWorkload));
1181  }
1182 }
1183 
1184 void LoadedNetwork::AllocateWorkingMemory(
1185 #if !defined(ARMNN_DISABLE_THREADS)
1186  std::lock_guard<std::mutex>& lock
1187 #endif
1188  )
1189 {
1190  ARMNN_SCOPED_PROFILING_EVENT(Compute::Undefined, "Working Memory Allocation");
1191 
1192 #if !defined(ARMNN_DISABLE_THREADS)
1193  // this unused parameter makes sure we can only call this function with a valid lock
1194  IgnoreUnused(lock);
1195 #endif
1196  if (m_IsWorkingMemAllocated)
1197  {
1198  return;
1199  }
1200 
1201  if (m_ExternalMemoryManager)
1202  {
1203  m_ExternalMemoryManager->Allocate();
1204 
1205  for (unsigned int i = 0; i < m_TensorMemory.size(); ++i)
1206  {
1207  m_Tensorhandles[i]->Import(m_TensorMemory[i].first->m_Data, m_TensorMemory[i].second);
1208  }
1209  }
1210 
1211  for (auto&& memoryManager : m_BackendMemoryMangers)
1212  {
1213  if (memoryManager)
1214  {
1215  memoryManager->Acquire();
1216  }
1217  }
1218  m_TensorHandleFactoryRegistry.AquireMemory();
1219  m_IsWorkingMemAllocated = true;
1220 }
1221 
1223 {
1224 #if !defined(ARMNN_DISABLE_THREADS)
1225  std::lock_guard<std::mutex> lockGuard(m_WorkingMemMutex);
1226 #endif
1227 
1228  if (!m_IsWorkingMemAllocated)
1229  {
1230  return;
1231  }
1232 
1233  if (m_ExternalMemoryManager)
1234  {
1235  m_ExternalMemoryManager->Deallocate();
1236  }
1237 
1238  // Informs the memory managers to release memory in its respective memory group
1239  for (auto&& memoryManager : m_BackendMemoryMangers)
1240  {
1241  if (memoryManager)
1242  {
1243  memoryManager->Release();
1244  }
1245  }
1246  m_TensorHandleFactoryRegistry.ReleaseMemory();
1247  m_IsWorkingMemAllocated = false;
1248 }
1249 
1250 bool LoadedNetwork::Execute(std::unique_ptr<TimelineUtilityMethods>& timelineUtils,
1251  ProfilingGuid inferenceGuid)
1252 {
1253  bool success = true;
1254 
1255  auto Fail = [&](const std::exception& error)
1256  {
1257  ARMNN_LOG(error) << "An error occurred attempting to execute a workload: " << error.what();
1258  success = false;
1259  };
1260 
1261  try
1262  {
1263 #if !defined(ARMNN_DISABLE_THREADS)
1264  std::lock_guard<std::mutex> lockGuard(m_WorkingMemMutex);
1265  AllocateWorkingMemory(lockGuard);
1266 #else
1267  AllocateWorkingMemory();
1268 #endif
1269 
1270  ProfilingDynamicGuid workloadInferenceID(0);
1271  auto ExecuteQueue = [&timelineUtils, &workloadInferenceID, &inferenceGuid](WorkloadQueue& queue)
1272  {
1273  for (auto& workload : queue)
1274  {
1275  if(timelineUtils)
1276  {
1277  workloadInferenceID = timelineUtils->RecordWorkloadInferenceAndStartOfLifeEvent(workload->GetGuid(),
1278  inferenceGuid);
1279  }
1280  workload->Execute();
1281  if(timelineUtils)
1282  {
1283  timelineUtils->RecordEndOfLifeEvent(workloadInferenceID);
1284  }
1285  }
1286  };
1287 
1288  ExecuteQueue(m_InputQueue);
1289  ExecuteQueue(m_WorkloadQueue);
1290  ExecuteQueue(m_OutputQueue);
1291  }
1292  catch (const RuntimeException& error)
1293  {
1294  Fail(error);
1295  }
1296  catch (const std::runtime_error& error)
1297  {
1298  Fail(error);
1299  }
1300 
1301  return success;
1302 }
1303 
1304 void LoadedNetwork::EnqueueInput(const ConstTensor& inputTensor, ITensorHandle* inputTensorHandle)
1305 {
1306  if (m_NetworkProperties.m_ImportEnabled) // Try import the input tensor
1307  {
1308  MemorySourceFlags importFlags = inputTensorHandle->GetImportFlags();
1309  if (CheckFlag(importFlags, m_NetworkProperties.m_InputSource) )
1310  {
1311  std::unique_ptr<ITensorHandle> tensorHandle =
1312  std::make_unique<ConstPassthroughTensorHandle>(inputTensor.GetInfo(),
1313  inputTensor.GetMemoryArea());
1314  void* mem = tensorHandle->Map(false);
1315 
1316  if (inputTensorHandle->Import(mem, m_NetworkProperties.m_InputSource))
1317  {
1318  tensorHandle->Unmap();
1319  return;
1320  }
1321  tensorHandle->Unmap();
1322  throw MemoryImportException("EnqueueInput: Memory Import failed");
1323  }
1324  else
1325  {
1326  throw MemoryImportException("EnqueueInput: Memory Import failed, backend does not support Import");
1327  }
1328  }
1329  else
1330  {
1332  std::unique_ptr<ITensorHandle> tensorHandle =
1333  std::make_unique<ConstPassthroughTensorHandle>(inputTensor.GetInfo(), inputTensor.GetMemoryArea());
1334 
1335  auto copyFunc = [](void* dst, const void* src, size_t size)
1336  {
1337  memcpy(dst, src, size);
1338  };
1339 
1340  CopyTensorContentsGeneric(tensorHandle.get(), inputTensorHandle, copyFunc);
1341  }
1342 }
1343 
1344 // Note: We can only import the output pointer if all of the following hold true:
1345 // a) The imported pointer is aligned sufficiently
1346 // b) The tensor has zero padding
1347 // c) There is only one connection to the OutputSlot and it is to an OutputLayer.
1348 // d) The output pointer is allocated via malloc. (Other types will be supported in a later release)
1349 // e) m_IsExportEnabled must be set to true
1350 void LoadedNetwork::ImportOutputTensor(const Tensor& outputTensor, ITensorHandle* outputTensorHandle)
1351 {
1352  ARMNN_ASSERT_MSG(outputTensorHandle != nullptr, "Data should have been allocated.");
1353  MemorySourceFlags importFlags = outputTensorHandle->GetImportFlags();
1354  if (CheckFlag(importFlags, m_NetworkProperties.m_OutputSource))
1355  {
1356  std::unique_ptr<ITensorHandle> tensorHandle =
1357  std::make_unique<PassthroughTensorHandle>(outputTensor.GetInfo(),
1358  outputTensor.GetMemoryArea());
1359 
1360  void* mem = tensorHandle->Map(false);
1361  bool importOk = outputTensorHandle->Import(mem, m_NetworkProperties.m_OutputSource);
1362  tensorHandle->Unmap();
1363 
1364  if (!importOk)
1365  {
1366  throw MemoryExportException("ImportOutputTensor: Memory Export failed");
1367  }
1368  }
1369  else
1370  {
1371  throw MemoryExportException("ImportOutputTensor: Memory Export failed, attempting to export Input Layer");
1372  }
1373 
1374 }
1375 
1376 void CopyToOutputTensor(const Tensor& outputTensor, ITensorHandle* outputTensorHandle)
1377 {
1379  auto copyFunc = [](void* dst, const void* src, size_t size)
1380  {
1381  memcpy(dst, src, size);
1382  };
1383 
1384  std::unique_ptr<ITensorHandle> tensorHandle =
1385  std::make_unique<PassthroughTensorHandle>(outputTensor.GetInfo(),
1386  outputTensor.GetMemoryArea());
1387 
1388  CopyTensorContentsGeneric(outputTensorHandle, tensorHandle.get(), copyFunc);
1389 }
1390 
1391 
1392 const armnn::ConstTensor GetInputTensor(const LayerBindingId layerId, const InputTensors& inputTensors)
1393 {
1394  for (auto inputTensorPair : inputTensors)
1395  {
1396  LayerBindingId id = inputTensorPair.first;
1397  if (id == layerId)
1398  {
1399  return inputTensorPair.second;
1400  }
1401  }
1402  throw InvalidArgumentException("Input does not exist.");
1403 }
1404 
1405 const armnn::Tensor GetOutputTensor(const LayerBindingId layerId, const OutputTensors& outputTensors)
1406 {
1407  for (auto outputTensorPair : outputTensors)
1408  {
1409  LayerBindingId id = outputTensorPair.first;
1410  if (id == layerId)
1411  {
1412  return outputTensorPair.second;
1413  }
1414  }
1415  throw InvalidArgumentException("Output does not exist.");
1416 }
1417 
1418 std::vector<ImportedInputId> LoadedNetwork::ImportInputs(const InputTensors& inputTensors,
1419  MemorySource forceImportMemorySource)
1420 {
1421  if (!m_NetworkProperties.m_AsyncEnabled)
1422  {
1423  // Cannot import if import is not enabled and forceImportMemorySource is undefined
1424  if (forceImportMemorySource == MemorySource::Undefined)
1425  {
1426  throw MemoryImportException("ImportInputs: Memory Import failed, NetworkProperties.m_ImportEnabled");
1427  }
1428  // The number of pre imported tensors should not exceed the number of inputs.
1429  if (inputTensors.size() > m_OptimizedNetwork->pOptimizedNetworkImpl->GetGraph().GetNumInputs())
1430  {
1431  throw MemoryImportException("ImportInputs: The number of tensors provided exceeds the number of inputs.");
1432  }
1433 
1434  std::vector<ImportedInputId> importedInputs;
1435  Graph& graph = m_OptimizedNetwork->pOptimizedNetworkImpl->GetGraph().TopologicalSort();
1436  unsigned int inputIndex = 0;
1437  for (const BindableLayer* inputLayer : graph.GetInputLayers())
1438  {
1439  auto outputTensorHandle = m_PreImportedInputHandles[inputIndex].m_TensorHandle.get();
1440 
1441  if (!outputTensorHandle)
1442  {
1443  inputIndex++;
1444  continue;
1445  }
1446 
1447  auto layerBindingId = inputLayer->GetBindingId();
1448  auto it = std::find_if(inputTensors.begin(), inputTensors.end(), [=](const auto& inputTensor)
1449  {
1450  return inputTensor.first == layerBindingId;
1451  });
1452 
1453  if (it == inputTensors.end())
1454  {
1455  inputIndex++;
1456  continue;
1457  }
1458 
1459  const auto& inputTensor = *it;
1460  std::unique_ptr<ITensorHandle> passThroughTensorHandle =
1461  std::make_unique<ConstPassthroughTensorHandle>(inputTensor.second.GetInfo(),
1462  inputTensor.second.GetMemoryArea());
1463 
1464  try
1465  {
1466  if (outputTensorHandle->CanBeImported(passThroughTensorHandle->Map(), forceImportMemorySource)
1467  && (outputTensorHandle->Import(passThroughTensorHandle->Map(), forceImportMemorySource)))
1468  {
1469  importedInputs.push_back(inputIndex);
1470  }
1471  passThroughTensorHandle->Unmap();
1472  }
1473  catch(const MemoryImportException& exception)
1474  {
1475  ARMNN_LOG(error) << "An error occurred attempting to import input_"
1476  << inputIndex << " : " << exception.what();
1477  passThroughTensorHandle->Unmap();
1478  }
1479  inputIndex++;
1480  }
1481 
1482  return importedInputs;
1483  }
1484  else
1485  {
1486  // Import when the import of network properties is enabled
1487  std::vector<ImportedInputId> importedInputs;
1488  Graph& graph = m_OptimizedNetwork->pOptimizedNetworkImpl->GetGraph().TopologicalSort();
1489 
1490  for (auto inputTensor : inputTensors)
1491  {
1492  auto layerBindingId = inputTensor.first;
1493  auto it = std::find_if(graph.GetInputLayers().begin(), graph.GetInputLayers().end(), [=](auto* layer)
1494  {
1495  return layer->GetBindingId() == layerBindingId;
1496  });
1497 
1498  if (it == graph.GetInputLayers().end())
1499  {
1500  throw MemoryImportException(fmt::format(
1501  "ImportInputs: Memory Import failed, unknown LayerBindingId: {}", layerBindingId));
1502  }
1503 
1504  const Layer* layer = *it;
1505  if (layer->GetType() != LayerType::Input)
1506  {
1507  throw InvalidArgumentException("ImportInputs: given layer not an InputLayer");
1508  }
1509 
1510  auto& backend = m_Backends.at(layer->GetBackendId());
1511  if (!HasCapability(BackendOptions::BackendOption{"PreImportIOTensors", true}, backend->GetCapabilities()))
1512  {
1513  std::string er = backend->GetId();
1514  er += " does not have PreImportIOTensors capability";
1515  throw BackendCapabilityException(er);
1516  }
1517 
1518  const OutputSlot& outputSlot = layer->GetOutputSlots()[0];
1519 
1521  const TensorInfo& tensorInfo = outputSlot.GetTensorInfo();
1522 
1523  ITensorHandleFactory* handleFactory = m_TensorHandleFactoryRegistry.GetFactory(factoryId);
1524  ARMNN_ASSERT(handleFactory);
1525 
1526  ImportedTensorHandlePin importedTensorHandlePin{layerBindingId,
1527  handleFactory->CreateTensorHandle(tensorInfo, false)};
1528 
1529  ITensorHandle* tensorHandle = importedTensorHandlePin.m_TensorHandle.get();
1530 
1531  if (!CheckFlag(tensorHandle->GetImportFlags(), forceImportMemorySource))
1532  {
1533  throw MemoryImportException(
1534  fmt::format("ImportInputs: Memory Import failed, backend: "
1535  "{} does not support importing from source {}"
1536  , factoryId, m_NetworkProperties.m_InputSource));
1537  }
1538 
1539  std::unique_ptr<ITensorHandle> passThroughTensorHandle =
1540  std::make_unique<ConstPassthroughTensorHandle>(inputTensor.second.GetInfo(),
1541  inputTensor.second.GetMemoryArea());
1542 
1543  if (tensorHandle->Import(passThroughTensorHandle->Map(), forceImportMemorySource))
1544  {
1545  importedInputs.push_back(m_CurImportedInputId++);
1546  passThroughTensorHandle->Unmap();
1547  }
1548  else
1549  {
1550  passThroughTensorHandle->Unmap();
1551  throw MemoryImportException("ImportInputs: Memory Import failed");
1552  }
1553 
1554  m_PreImportedInputHandles.push_back(std::move(importedTensorHandlePin));
1555  }
1556  return importedInputs;
1557  }
1558 }
1559 
1560 std::vector<ImportedOutputId> LoadedNetwork::ImportOutputs(const OutputTensors& outputTensors,
1561  MemorySource forceImportMemorySource)
1562 {
1563  if (!m_NetworkProperties.m_AsyncEnabled)
1564  {
1565  // Cannot import if import is not enabled and forceImportMemorySource is undefined
1566  if (forceImportMemorySource == MemorySource::Undefined)
1567  {
1568  throw MemoryImportException("ImportOutputs: Memory Import failed, NetworkProperties.m_ImportEnabled");
1569  }
1570  // If forceImportMemorySource is defined, try import if memory is aligned
1571  if (outputTensors.size() != m_OptimizedNetwork->pOptimizedNetworkImpl->GetGraph().GetNumOutputs())
1572  {
1573  throw MemoryImportException("ImportOutputs: Force Import failed, incorrect number of tensors");
1574  }
1575  std::vector<ImportedOutputId> importedOutputs;
1576  Graph& graph = m_OptimizedNetwork->pOptimizedNetworkImpl->GetGraph().TopologicalSort();
1577 
1578  unsigned int outputIndex = 0;
1579  for (const BindableLayer* const outputLayer : graph.GetOutputLayers())
1580  {
1581  auto inputTensorHandle = m_PreImportedOutputHandles[outputIndex].m_TensorHandle.get();
1582  if (!inputTensorHandle)
1583  {
1584  outputIndex++;
1585  continue;
1586  }
1587 
1588  auto layerBindingId = outputLayer->GetBindingId();
1589  auto it = std::find_if(outputTensors.begin(), outputTensors.end(), [=] (const auto& outputTensor)
1590  {
1591  return outputTensor.first == layerBindingId;
1592  });
1593 
1594  if (it == outputTensors.end())
1595  {
1596  outputIndex++;
1597  continue;
1598  }
1599 
1600  const auto outputTensor = *it;
1601  try
1602  {
1603  // Check if the output memory can be imported
1604  if (inputTensorHandle->CanBeImported(outputTensor.second.GetMemoryArea(), forceImportMemorySource)
1605  && inputTensorHandle->Import(outputTensor.second.GetMemoryArea(), forceImportMemorySource))
1606  {
1607  importedOutputs.push_back(outputIndex);
1608  }
1609  }
1610  catch(const MemoryImportException& exception)
1611  {
1612  ARMNN_LOG(error) << "An error occurred attempting to import output_"
1613  << outputIndex << " : " << exception.what();
1614  }
1615  outputIndex++;
1616  }
1617  return importedOutputs;
1618  }
1619 
1620  std::vector<ImportedOutputId> importedOutputs;
1621  Graph& graph = m_OptimizedNetwork->pOptimizedNetworkImpl->GetGraph().TopologicalSort();
1622 
1623  for (const auto& outputTensor : outputTensors)
1624  {
1625  auto layerBindingId = outputTensor.first;
1626  auto it = std::find_if(graph.GetOutputLayers().begin(), graph.GetOutputLayers().end(), [=](auto* layer)
1627  {
1628  return layer->GetBindingId() == layerBindingId;
1629  });
1630 
1631  if (it == graph.GetOutputLayers().end())
1632  {
1633  throw MemoryImportException(fmt::format("ImportOutputs: Memory Import failed, unknown LayerBindingId: {}",
1634  layerBindingId));
1635  }
1636 
1637  const Layer* layer = *it;
1638  if (layer->GetType() != LayerType::Output)
1639  {
1640  throw InvalidArgumentException("ImportOutputs: given layer not an OutputLayer");
1641  }
1642 
1643  auto& backend = m_Backends.at(layer->GetBackendId());
1644  if (!HasCapability(BackendOptions::BackendOption{"PreImportIOTensors", true}, backend->GetCapabilities()))
1645  {
1646  std::string er = backend->GetId();
1647  er += " does not have PreImportIOTensors capability";
1648  throw BackendCapabilityException(er);
1649  }
1650 
1651  const InputSlot& inputSlot = layer->GetInputSlots()[0];
1653  const TensorInfo& tensorInfo = inputSlot.GetConnectedOutputSlot()->GetTensorInfo();
1654 
1655  ITensorHandleFactory* handleFactory = m_TensorHandleFactoryRegistry.GetFactory(factoryId);
1656  ARMNN_ASSERT(handleFactory);
1657 
1658  ImportedTensorHandlePin importedTensorHandlePin{layerBindingId,
1659  handleFactory->CreateTensorHandle(tensorInfo, false)};
1660 
1661  ITensorHandle* tensorHandle = importedTensorHandlePin.m_TensorHandle.get();
1662 
1663  if (!CheckFlag(tensorHandle->GetImportFlags(), forceImportMemorySource))
1664  {
1665  throw MemoryImportException(fmt::format("ImportInputs: Memory Import failed, backend: "
1666  "{} does not support importing from source {}"
1667  , factoryId, forceImportMemorySource));
1668  }
1669 
1670  if (tensorHandle->Import(outputTensor.second.GetMemoryArea(), forceImportMemorySource))
1671  {
1672  importedOutputs.push_back(m_CurImportedOutputId++);
1673  }
1674  else
1675  {
1676  throw MemoryImportException("ImportInputs: Memory Import failed");
1677  }
1678 
1679  m_PreImportedOutputHandles.push_back(std::move(importedTensorHandlePin));
1680  }
1681 
1682  return importedOutputs;
1683 }
1684 
1685 void LoadedNetwork::ClearImportedInputs(const std::vector<ImportedInputId> inputIds)
1686 {
1687  for (auto id : inputIds)
1688  {
1689  if (id > m_PreImportedInputHandles.size())
1690  {
1691  throw InvalidArgumentException(fmt::format("ClearImportedInputs::Unknown ImportedInputId: {}", id));
1692  }
1693 
1694  auto& importedTensorHandle = m_PreImportedInputHandles[id].m_TensorHandle;
1695  if (!importedTensorHandle)
1696  {
1698  fmt::format("ClearImportedInputs::ImportedInput with id: {} has already been deleted", id));
1699  }
1700  // Call Unimport then destroy the tensorHandle
1701  importedTensorHandle->Unimport();
1702  importedTensorHandle = {};
1703  }
1704 }
1705 
1706 void LoadedNetwork::ClearImportedOutputs(const std::vector<ImportedOutputId> outputIds)
1707 {
1708  for (auto id : outputIds)
1709  {
1710  if (id > m_PreImportedOutputHandles.size())
1711  {
1712  throw InvalidArgumentException(fmt::format("ClearImportedOutputs::Unknown ImportedOutputId: {}", id));
1713  }
1714 
1715  auto& importedTensorHandle = m_PreImportedOutputHandles[id].m_TensorHandle;
1716  if (!importedTensorHandle)
1717  {
1719  fmt::format("ClearImportedOutputs::ImportedOutput with id: {} has already been deleted", id));
1720  }
1721  // Call Unimport then destroy the tensorHandle
1722  importedTensorHandle->Unimport();
1723  importedTensorHandle = {};
1724  }
1725 }
1726 
1728  const OutputTensors& outputTensors,
1729  IWorkingMemHandle& iWorkingMemHandle,
1730  std::vector<ImportedInputId> preImportedInputs,
1731  std::vector<ImportedOutputId> preImportedOutputs)
1732 {
1733  const Graph& graph = m_OptimizedNetwork->pOptimizedNetworkImpl->GetGraph();
1734 
1735  if (inputTensors.size() + preImportedInputs.size() != graph.GetNumInputs())
1736  {
1737  if (preImportedInputs.empty())
1738  {
1739  throw InvalidArgumentException("LoadedNetwork::Execute: Number of inputs provided does not match network.");
1740  }
1741  else
1742  {
1743  throw InvalidArgumentException("LoadedNetwork::Execute: "
1744  "Number of inputs + preImportedInputs provided does not match network.");
1745  }
1746  }
1747 
1748  if (outputTensors.size() + preImportedOutputs.size() != graph.GetNumOutputs())
1749  {
1750  if (preImportedOutputs.empty())
1751  {
1752  throw InvalidArgumentException("LoadedNetwork::Execute: "
1753  "Number of outputs provided does not match network.");
1754  }
1755  else
1756  {
1757  throw InvalidArgumentException("LoadedNetwork::Execute: "
1758  "Number of outputs + preImportedOutputs provided does not match network.");
1759  }
1760  }
1761 
1762  WorkingMemHandle& workingMemHandle = dynamic_cast<WorkingMemHandle&>(iWorkingMemHandle);
1763  // Collect all the given LayerBindingIds and check them for duplicates and unknowns.
1764  std::vector<LayerBindingId>& bindingIds = workingMemHandle.GetBindingIdVector();
1765  unsigned int index = 0;
1766  for (auto pair : inputTensors)
1767  {
1768  bindingIds[index++] = pair.first;
1769  }
1770  for (ImportedInputId id : preImportedInputs)
1771  {
1772  bindingIds[index++] = ValidateImportedInputID(id);
1773  }
1774  for (auto pair : outputTensors)
1775  {
1776  bindingIds[index++] = pair.first;
1777  }
1778  for (ImportedOutputId id : preImportedOutputs)
1779  {
1780  bindingIds[index++] = ValidateImportedOutputID(id);
1781  }
1782 
1783  workingMemHandle.ValidateBindingIds();
1784 
1785  auto resetMemHandle = [&]()
1786  {
1787  for (ImportedInputId id: preImportedInputs)
1788  {
1789  const LayerBindingId layerBindingId = m_PreImportedInputHandles[id].m_LayerBindingId;
1790 
1791  auto inputHandle = workingMemHandle.GetInputHandle(layerBindingId);
1792  auto inputConnections = workingMemHandle.GetInputConnections(layerBindingId);
1793  for (auto it : inputConnections)
1794  {
1795  *it = inputHandle;
1796  }
1797  }
1798 
1799  for (ImportedOutputId id: preImportedOutputs)
1800  {
1801  const LayerBindingId layerBindingId = m_PreImportedOutputHandles[id].m_LayerBindingId;
1802 
1803  auto outputHandle = workingMemHandle.GetOutputHandle(layerBindingId);
1804  auto outputConnections = workingMemHandle.GetOutputConnection(layerBindingId);
1805 
1806  for (auto it : outputConnections)
1807  {
1808  *it = outputHandle;
1809  }
1810  }
1811  };
1812 
1813  std::unique_ptr<TimelineUtilityMethods> timelineUtils =
1814  TimelineUtilityMethods::GetTimelineUtils(*m_ProfilingService);
1815  ProfilingGuid inferenceGuid = m_ProfilingService->GetNextGuid();
1816  if (timelineUtils)
1817  {
1818  // Add inference timeline trace if profiling is enabled.
1819  ProfilingGuid networkGuid = m_OptimizedNetwork->GetGuid();
1820  timelineUtils->CreateTypedEntity(inferenceGuid,LabelsAndEventClasses::INFERENCE_GUID);
1821  timelineUtils->CreateRelationship(ProfilingRelationshipType::RetentionLink,
1822  networkGuid,
1823  inferenceGuid,
1824  LabelsAndEventClasses::EXECUTION_OF_GUID);
1825  timelineUtils->RecordEvent(inferenceGuid,LabelsAndEventClasses::ARMNN_PROFILING_SOL_EVENT_CLASS);
1826  }
1827 
1828  bool executionSucceeded = true;
1829 
1830  if (timelineUtils)
1831  {
1832  // Add end of life of the inference timeline if profiling is enabled.
1833  timelineUtils->RecordEvent(inferenceGuid,LabelsAndEventClasses::ARMNN_PROFILING_EOL_EVENT_CLASS);
1834  timelineUtils->Commit();
1835  }
1836 
1837  if (!workingMemHandle.IsAllocated())
1838  {
1839  workingMemHandle.Allocate();
1840  }
1841 
1842  {
1844  for (auto pair : inputTensors)
1845  {
1846  EnqueueInput(pair.second, workingMemHandle.GetInputHandle(pair.first));
1847  }
1848 
1849  // Swap in the pre-imported inputs if any
1850  for (ImportedInputId id : preImportedInputs)
1851  {
1852  const ImportedTensorHandlePin& importedInputPin = m_PreImportedInputHandles[id];
1853  const LayerBindingId layerBindingId = m_PreImportedInputHandles[id].m_LayerBindingId;
1854  const auto& preimportedHandle = importedInputPin.m_TensorHandle;
1855 
1856  auto inputConnections = workingMemHandle.GetInputConnections(layerBindingId);
1857  for (auto it : inputConnections)
1858  {
1859  *it = preimportedHandle.get();
1860  }
1861  }
1862  }
1863  {
1865  if (m_NetworkProperties.m_ExportEnabled)
1866  {
1867  for (auto pair: outputTensors)
1868  {
1869  ImportOutputTensor(pair.second, workingMemHandle.GetOutputHandle(pair.first));
1870  }
1871  }
1872 
1873  for (ImportedOutputId id : preImportedOutputs)
1874  {
1875  const ImportedTensorHandlePin& importedOutputPin = m_PreImportedOutputHandles[id];
1876  const LayerBindingId layerBindingId = m_PreImportedOutputHandles[id].m_LayerBindingId;
1877  const auto& preimportedHandle = importedOutputPin.m_TensorHandle;
1878 
1879  auto outputConnections = workingMemHandle.GetOutputConnection(layerBindingId);
1880  for (auto it : outputConnections)
1881  {
1882  *it = preimportedHandle.get();
1883  }
1884  }
1885  }
1886 
1887  auto Fail = [&](const std::exception& error)
1888  {
1889  ARMNN_LOG(error) << "An error occurred attempting to execute a workload: " << error.what();
1890  executionSucceeded = false;
1891  };
1892  ProfilingDynamicGuid workloadInferenceID(0);
1893 
1894  try
1895  {
1896  for (unsigned int i = 0; i < m_WorkloadQueue.size(); ++i)
1897  {
1898  auto& workload = m_WorkloadQueue[i];
1899  if (timelineUtils)
1900  {
1901  workloadInferenceID = timelineUtils->RecordWorkloadInferenceAndStartOfLifeEvent(workload->GetGuid(),
1902  inferenceGuid);
1903  }
1904 
1905  workload->ExecuteAsync(workingMemHandle.GetExecutionDataAt(i).second);
1906 
1907  if (timelineUtils)
1908  {
1909  timelineUtils->RecordEndOfLifeEvent(workloadInferenceID);
1910  }
1911  }
1912  }
1913  catch (const RuntimeException& error)
1914  {
1915  resetMemHandle();
1916  Fail(error);
1917  }
1918  catch (const std::runtime_error& error)
1919  {
1920  resetMemHandle();
1921  Fail(error);
1922  }
1923  catch (...)
1924  {
1925  resetMemHandle();
1926  throw;
1927  }
1928 
1929  if (!m_NetworkProperties.m_ExportEnabled)
1930  {
1931  for (auto pair: outputTensors)
1932  {
1933  CopyToOutputTensor(pair.second, workingMemHandle.GetOutputHandle(pair.first));
1934  }
1935  }
1936  else
1937  {
1938  ARMNN_SCOPED_PROFILING_EVENT(Compute::Undefined, "SyncMemGeneric_Execute");
1939  workingMemHandle.MemSyncOutputs();
1940  }
1941 
1942  resetMemHandle();
1943 
1944  return executionSucceeded ? Status::Success : Status::Failure;
1945 }
1946 
1947 /// Create a new unique WorkingMemHandle object. Create multiple handles if you wish to have
1948 /// overlapped Execution by calling this function from different threads.
1949 std::unique_ptr<IWorkingMemHandle> LoadedNetwork::CreateWorkingMemHandle(NetworkId networkId)
1950 {
1951  Graph& order = m_OptimizedNetwork->pOptimizedNetworkImpl->GetGraph();
1952 
1953  // Tensors that will need to be allocated internally within armnn
1954  std::vector<std::unique_ptr<ITensorHandle>> managedTensorHandles;
1955  // Tensors that will be allocated externally by the user
1956  std::vector<std::unique_ptr<ITensorHandle>> unmanagedTensorHandles;
1957 
1958  std::vector<WorkingMemDescriptor> workingMemDescriptors;
1959  std::vector<std::pair<BackendId, ExecutionData>> executionDataVec;
1960 
1961  auto GetTensorHandle = [&](Layer* layer, const OutputSlot& outputSlot)
1962  {
1963  ITensorHandleFactory::FactoryId factoryId = outputSlot.GetTensorHandleFactoryId();
1964  const TensorInfo& tensorInfo = outputSlot.GetTensorInfo();
1965 
1966  if (factoryId == ITensorHandleFactory::LegacyFactoryId)
1967  {
1968  BackendId id = layer->GetBackendId();
1970  return m_WorkloadFactories.at(id)->CreateTensorHandle(tensorInfo, false);
1972  }
1973  else
1974  {
1975  ITensorHandleFactory* handleFactory = m_TensorHandleFactoryRegistry.GetFactory(factoryId);
1976  ARMNN_ASSERT(handleFactory);
1977  return handleFactory->CreateTensorHandle(tensorInfo, false);
1978  }
1979  };
1980 
1981  struct HandleInfo
1982  {
1983  ITensorHandle* m_TensorHandle;
1984 
1985  bool m_IsInputLayerHandle = false;
1986  bool m_IsOutputLayerHandle = false;
1987 
1988  WorkingMemHandle::InputMemDescriptorCoords m_InputMemDescriptorCoords;
1989  WorkingMemHandle::OutputMemDescriptorCoords m_OutputMemDescriptorCoords;
1990  };
1991 
1992  std::unordered_map<const OutputSlot*, HandleInfo> outputToHandleInfoMap;
1993 
1994  unsigned int layerIndex = 0;
1995  for (auto&& layer : order)
1996  {
1997  // Constant layers execution and management is handled during loaded network construction
1998  if (layer->GetType() == LayerType::Constant)
1999  {
2000  continue;
2001  }
2002 
2003  WorkingMemDescriptor workingMemDescriptor;
2004 
2005  bool isMemoryManaged = true;
2006  bool isInputLayer = false;
2007  bool isOutputLayer = false;
2008  bool isConnectedToOutputLayer = false;
2009 
2010  if (layer->GetType() == LayerType::Input || layer->GetType() == LayerType::MemImport)
2011  {
2012  // Input layers/workloads will not be executed so the descriptor is not added to workingMemDescriptors
2013  // However we will still need to manage the tensorHandle
2014  isInputLayer = true;
2015  isMemoryManaged = !m_NetworkProperties.m_ImportEnabled;
2016  }
2017  else if (layer->GetType() == LayerType::Output)
2018  {
2019  isOutputLayer = true;
2020  }
2021 
2022  unsigned int slotIndex = 0;
2023  // Create a tensor handle for each output slot of a layer
2024  // Once we create it, we start managing its lifetime
2025  for (auto& slot : layer->GetOutputSlots())
2026  {
2027  for (unsigned int i = 0; i < slot.GetNumConnections(); ++i)
2028  {
2029  if ((slot.GetConnection(i)->GetOwningLayer().GetType() == LayerType::Output))
2030  {
2031  if (!isConnectedToOutputLayer)
2032  {
2033  isConnectedToOutputLayer = true;
2034  // If Export is enabled disable memory management, so we can export, otherwise we do a copy
2035  isMemoryManaged = !m_NetworkProperties.m_ExportEnabled;
2036  }
2037  else
2038  {
2039  // Importing in this case would likely cause unexpected behaviour, so we disallow it.
2040  ARMNN_LOG(warning) <<
2041  fmt::format("Layer name: '{0}' guid: '{1}' has two or more OutputLayers connected to it. "
2042  "This will prevent importing on the connected OutputLayers.",
2043  layer->GetName(), layer->GetGuid());
2044  isMemoryManaged = true;
2045  }
2046  }
2047  }
2048 
2049  ITensorHandle* tensorHandle;
2050  if (isMemoryManaged)
2051  {
2052  managedTensorHandles.emplace_back(GetTensorHandle(layer, slot));
2053  tensorHandle = managedTensorHandles.back().get();
2054  }
2055  else
2056  {
2057  unmanagedTensorHandles.emplace_back(GetTensorHandle(layer, slot));
2058  tensorHandle = unmanagedTensorHandles.back().get();
2059  }
2060 
2061  workingMemDescriptor.m_Outputs.push_back(tensorHandle);
2062 
2063  HandleInfo& handleInfo = outputToHandleInfoMap[&slot];
2064  handleInfo.m_TensorHandle = tensorHandle;
2065 
2066  // Store the coordinates of the current layer's OutputSlot that is connected to the OutputLayer
2067  if (isConnectedToOutputLayer)
2068  {
2069  handleInfo.m_IsOutputLayerHandle = true;
2070  handleInfo.m_OutputMemDescriptorCoords.m_OutputSlotCoords = {layerIndex, slotIndex};
2071  }
2072  // Store the LayerBindingId of the InputLayer
2073  if (isInputLayer)
2074  {
2075  handleInfo.m_IsInputLayerHandle = true;
2076  LayerBindingId bindingId = static_cast<BindableLayer*>(layer)->GetBindingId();
2077  handleInfo.m_InputMemDescriptorCoords.m_LayerBindingId = bindingId;
2078  }
2079  slotIndex++;
2080  }
2081  // Loop through the input slots in the same layer and decrement the reference counter associated
2082  // to each tensor handle we encounter.
2083  // Once it reaches zero, the lifetime of the tensor handle has ended, and we mark its memory as available
2084  // so that the next tensor handle with a non overlapping lifetime can share its memory.
2085  for (auto& slot : layer->GetInputSlots())
2086  {
2087  ARMNN_ASSERT(slot.GetConnection());
2088  auto outputSlot = slot.GetConnectedOutputSlot();
2089  auto key = outputSlot->GetOwningLayer().GetGuid();
2090 
2091  // Constant layers execution and management is handled during loaded network construction
2092  auto found = m_ConstantTensorHandles.find(key);
2093  if (found != m_ConstantTensorHandles.end())
2094  {
2095  ITensorHandle* tensorHandle = found->second;
2096  workingMemDescriptor.m_Inputs.push_back(tensorHandle);
2097 
2098  // Odd case where a constant layer is connected to an output layer
2099  // We will need to create a HandleInfo to track it
2100  if (isOutputLayer)
2101  {
2102  LayerBindingId bindingId = static_cast<BindableLayer*>(layer)->GetBindingId();
2103 
2104  HandleInfo& handleInfo = outputToHandleInfoMap[outputSlot];
2105  handleInfo.m_TensorHandle = tensorHandle;
2106  handleInfo.m_IsOutputLayerHandle = true;
2107  handleInfo.m_OutputMemDescriptorCoords.m_LayerBindingIds.push_back(bindingId);
2108  handleInfo.m_OutputMemDescriptorCoords.m_InputSlotCoords.push_back({layerIndex, 0});
2109  }
2110  continue;
2111  }
2112 
2113  HandleInfo& handleInfo = outputToHandleInfoMap.at(outputSlot);
2114 
2115  ITensorHandle* inputTensorHandle = handleInfo.m_TensorHandle;
2116  workingMemDescriptor.m_Inputs.push_back(inputTensorHandle);
2117 
2118  // Store the LayerBindingId of the OutputLayer
2119  if (isOutputLayer)
2120  {
2121  LayerBindingId bindingId = static_cast<BindableLayer*>(layer)->GetBindingId();
2122  handleInfo.m_OutputMemDescriptorCoords.m_LayerBindingIds.push_back(bindingId);
2123  handleInfo.m_OutputMemDescriptorCoords.m_InputSlotCoords.push_back({layerIndex, 0});
2124  }
2125  // In this case the layer is not an Output Layer but shares its input tensorhandle with an OutputLayer
2126  // It will need to be updated as well, if we swap out the tensorhandle
2127  else if (handleInfo.m_IsOutputLayerHandle)
2128  {
2129  handleInfo.m_OutputMemDescriptorCoords.m_InputSlotCoords.push_back({layerIndex, slot.GetSlotIndex()});
2130  }
2131 
2132  // Store the coordinates of the InputSlots connected to the InputLayer
2133  // There can be more than one InputSlot connected to an InputLayer, so we use a vector
2134  if (handleInfo.m_IsInputLayerHandle)
2135  {
2136  std::pair<LayerGuid, unsigned int> connectionLocation{layerIndex, slot.GetSlotIndex()};
2137  handleInfo.m_InputMemDescriptorCoords.m_InputSlotCoords.emplace_back(connectionLocation);
2138  }
2139  }
2140 
2141  // Input/Output layers/workloads will not be executed, so the descriptor is not added to workingMemDescriptors
2142  // However we will still need to manage the tensorHandle
2143  if (!isInputLayer)
2144  {
2145  // Simply auto initialise ExecutionData here, so it's added only for the layer that require execution.
2146  // The memory and data will be allocated/assigned for the void* in WorkingMemHandle::Allocate.
2147  std::pair<BackendId, ExecutionData> dataPair;
2148  dataPair.first = layer->GetBackendId();
2149 
2150  executionDataVec.push_back(dataPair);
2151  workingMemDescriptors.push_back(workingMemDescriptor);
2152 
2153  layerIndex++;
2154  }
2155  }
2156 
2157  std::vector<std::pair<std::shared_ptr<TensorMemory>, MemorySource>> tensorMemory;
2158 
2159  auto externalMemoryManager = CreateExternalMemoryManger(tensorMemory);
2160 
2161  // Sort m_TensorMemory, so it's order matches the outputSlot order
2162  std::sort(tensorMemory.begin(), tensorMemory.end(),
2163  [](const std::pair<std::shared_ptr<TensorMemory>, MemorySource>& lhs,
2164  const std::pair<std::shared_ptr<TensorMemory>, MemorySource>& rhs)
2165  {
2166  return lhs.first->m_OutputSlotId < rhs.first->m_OutputSlotId;
2167  });
2168 
2169  std::vector<WorkingMemHandle::InputMemDescriptorCoords> inputConnectionsInfo;
2170  std::vector<WorkingMemHandle::OutputMemDescriptorCoords> outputConnectionsInfo;
2171 
2172  for (const auto& handleInfo: outputToHandleInfoMap)
2173  {
2174  if (handleInfo.second.m_IsOutputLayerHandle)
2175  {
2176  outputConnectionsInfo.emplace_back(handleInfo.second.m_OutputMemDescriptorCoords);
2177  }
2178 
2179  if (handleInfo.second.m_IsInputLayerHandle)
2180  {
2181  inputConnectionsInfo.emplace_back(handleInfo.second.m_InputMemDescriptorCoords);
2182  }
2183  }
2184 
2185  return std::make_unique<WorkingMemHandle>(networkId,
2186  inputConnectionsInfo,
2187  outputConnectionsInfo,
2188  workingMemDescriptors,
2189  std::move(externalMemoryManager),
2190  std::move(tensorMemory),
2191  std::move(managedTensorHandles),
2192  std::move(unmanagedTensorHandles),
2193  executionDataVec,
2194  &m_Backends);
2195 }
2196 
2198 {
2199  for (auto&& workloadPtr: m_WorkloadQueue)
2200  {
2201  workloadPtr.get()->RegisterDebugCallback(func);
2202  }
2203 }
2204 
2205 
2206 void LoadedNetwork::CreateMemoryProfileAsync()
2207 {
2208  struct PartialBlock
2209  {
2210  unsigned int m_StartOfLife;
2211  unsigned int m_Lifetime;
2212 
2213  size_t m_MemSize;
2214  unsigned int m_Index;
2215 
2216  BackendId m_BackendId;
2217  };
2218 
2219  auto align = [](size_t numToAlign)
2220  {
2221  const size_t alignment = sizeof(float);
2222  return ((numToAlign + alignment - 1) / alignment) * alignment;
2223  };
2224 
2225  std::unordered_map<const OutputSlot*, PartialBlock> memBlockTrackerMap;
2226 
2227  const bool inputImportingEnabled = m_NetworkProperties.m_InputSource != MemorySource::Undefined;
2228  const bool outputImportingEnabled = m_NetworkProperties.m_OutputSource != MemorySource::Undefined;
2229 
2230  unsigned int timestep = 0;
2231  unsigned int outputIndex = 0;
2232  Graph& order = m_OptimizedNetwork->pOptimizedNetworkImpl->GetGraph().TopologicalSort();
2233 
2234  for (auto&& layer : order)
2235  {
2236  const LayerType& layerType = layer->GetType();
2237  // Don't manage memory if importing.
2238  if (layerType == LayerType::Input && inputImportingEnabled)
2239  {
2240  continue;
2241  }
2242  // Don't manage memory if importing.
2243  if (layerType == LayerType::Output && outputImportingEnabled
2244  && layer->GetInputSlot(0).GetConnectedOutputSlot()->GetNumConnections() == 1)
2245  {
2246  continue;
2247  }
2248  // Because Constant Layer memory can not be shared, the memory must persist for the lifetime of execution,
2249  // management is done separately.
2250  if (layerType == LayerType::Constant)
2251  {
2252  continue;
2253  }
2254 
2255  BackendId backendId = layer->GetBackendId();
2256  for (auto& outputSlot : layer->GetOutputSlots())
2257  {
2258  if (!m_SupportsExternallyManagedMemory[backendId])
2259  {
2260  continue;
2261  }
2262 
2263  PartialBlock partialBlock;
2264 
2265  partialBlock.m_StartOfLife = timestep;
2266 
2267  size_t alignedSize = align(outputSlot.GetOutputHandler().GetTensorInfo().GetNumBytes());
2268  partialBlock.m_MemSize = alignedSize;
2269  partialBlock.m_Index = outputIndex++;
2270  partialBlock.m_Lifetime = outputSlot.GetNumConnections();
2271  partialBlock.m_BackendId = backendId;
2272 
2273  if (partialBlock.m_Lifetime == 0)
2274  {
2275  m_MemBlockMap[partialBlock.m_BackendId].emplace_back(partialBlock.m_StartOfLife,
2276  partialBlock.m_StartOfLife,
2277  partialBlock.m_MemSize,
2278  0,
2279  partialBlock.m_Index);
2280  }
2281  else
2282  {
2283  memBlockTrackerMap[&outputSlot] = partialBlock;
2284  }
2285  }
2286 
2287  for (auto& inputSlot : layer->GetInputSlots())
2288  {
2289  const Layer& connectedInputLayer = inputSlot.GetConnectedOutputSlot()->GetOwningLayer();
2290  const LayerType& owningLayerType = connectedInputLayer.GetType();
2291 
2292  if (owningLayerType == LayerType::Constant)
2293  {
2294  continue;
2295  }
2296  if (inputImportingEnabled && owningLayerType == LayerType::Input)
2297  {
2298  continue;
2299  }
2300 
2301  auto outputSlot = inputSlot.GetConnectedOutputSlot();
2302 
2303  PartialBlock& partialBlock = memBlockTrackerMap.at(outputSlot);
2304 
2305  auto& lifetime = partialBlock.m_Lifetime;
2306  --lifetime;
2307 
2308  if (lifetime == 0)
2309  {
2310  m_MemBlockMap[partialBlock.m_BackendId].emplace_back(partialBlock.m_StartOfLife,
2311  timestep,
2312  partialBlock.m_MemSize,
2313  0,
2314  partialBlock.m_Index);
2315  }
2316  }
2317  ++timestep;
2318  }
2319 }
2320 
2321 void LoadedNetwork::CreateMemoryProfile()
2322 {
2323  // Finds the first TensorHandle ancestor of a SubTensorHandle. If the ITensorHandle provided
2324  // is a TensorHandle, the function just returns it
2325  auto TraceSubTensorHandleAncestry = [](ITensorHandle* const subTensorHandle)
2326  {
2327  ITensorHandle* ancestor = subTensorHandle;
2328  while (ancestor && ancestor->GetParent())
2329  {
2330  ancestor = ancestor->GetParent();
2331  }
2332  return ancestor;
2333  };
2334 
2335  struct PartialBlock
2336  {
2337  unsigned int m_StartOfLife;
2338  unsigned int m_Lifetime;
2339 
2340  size_t m_MemSize;
2341  unsigned int m_Index;
2342 
2343  BackendId m_BackendId;
2344  };
2345 
2346  auto align = [](size_t numToAlign)
2347  {
2348  const size_t alignment = sizeof(float);
2349  return ((numToAlign + alignment - 1) / alignment) * alignment;
2350  };
2351 
2352  std::unordered_map<ITensorHandle*, PartialBlock> memBlockTrackerMap;
2353 
2354  const bool inputImportingEnabled = m_NetworkProperties.m_InputSource != MemorySource::Undefined;
2355  const bool outputImportingEnabled = m_NetworkProperties.m_OutputSource != MemorySource::Undefined;
2356 
2357  unsigned int timestep = 0;
2358  unsigned int outputIndex = 0;
2359  Graph& order = m_OptimizedNetwork->pOptimizedNetworkImpl->GetGraph().TopologicalSort();
2360 
2361  for (auto&& layer : order)
2362  {
2363  const LayerType& layerType = layer->GetType();
2364  // Don't manage memory if importing.
2365  if (layerType == LayerType::Input && inputImportingEnabled)
2366  {
2367  continue;
2368  }
2369  // Don't manage memory if importing.
2370  if (layerType == LayerType::Output && outputImportingEnabled
2371  && layer->GetInputSlot(0).GetConnectedOutputSlot()->GetNumConnections() == 1)
2372  {
2373  continue;
2374  }
2375  // Because Constant Layer memory can not be shared, the memory must persist for the lifetime of execution,
2376  // management is done separately.
2377  if (layerType == LayerType::Constant)
2378  {
2379  continue;
2380  }
2381 
2382  BackendId backendId = layer->GetBackendId();
2383  for (auto& outputSlot : layer->GetOutputSlots())
2384  {
2385  if (!m_SupportsExternallyManagedMemory[backendId])
2386  {
2387  continue;
2388  }
2389 
2390  ITensorHandle* tensorHandle = outputSlot.GetOutputHandler().GetData();
2391  tensorHandle = TraceSubTensorHandleAncestry(tensorHandle);
2392 
2393  if (memBlockTrackerMap.find(tensorHandle) == memBlockTrackerMap.end())
2394  {
2395  PartialBlock partialBlock;
2396 
2397  partialBlock.m_StartOfLife = timestep;
2398 
2399  size_t alignedSize = align(outputSlot.GetOutputHandler().GetTensorInfo().GetNumBytes());
2400  partialBlock.m_MemSize = alignedSize;
2401  partialBlock.m_Index = outputIndex++;
2402  partialBlock.m_Lifetime = outputSlot.GetNumConnections();
2403  partialBlock.m_BackendId = backendId;
2404 
2405  if (partialBlock.m_Lifetime == 0)
2406  {
2407  m_MemBlockMap[partialBlock.m_BackendId].emplace_back(partialBlock.m_StartOfLife,
2408  partialBlock.m_StartOfLife,
2409  partialBlock.m_MemSize,
2410  0,
2411  partialBlock.m_Index);
2412  }
2413  else
2414  {
2415  memBlockTrackerMap[tensorHandle] = partialBlock;
2416  }
2417  m_Tensorhandles.push_back(tensorHandle);
2418 
2419  }
2420  else
2421  {
2422  memBlockTrackerMap.at(tensorHandle).m_Lifetime += outputSlot.GetNumConnections();
2423  }
2424  }
2425 
2426  for (auto& inputSlot : layer->GetInputSlots())
2427  {
2428  const Layer& connectedInputLayer = inputSlot.GetConnectedOutputSlot()->GetOwningLayer();
2429  const LayerType& owningLayerType = connectedInputLayer.GetType();
2430 
2431  if (owningLayerType == LayerType::Constant)
2432  {
2433  continue;
2434  }
2435  if (inputImportingEnabled && owningLayerType == LayerType::Input)
2436  {
2437  continue;
2438  }
2439  if (!m_SupportsExternallyManagedMemory[connectedInputLayer.GetBackendId()])
2440  {
2441  continue;
2442  }
2443 
2444  auto outputSlot = inputSlot.GetConnectedOutputSlot();
2445 
2446  ITensorHandle* tensorHandle = outputSlot->GetOutputHandler().GetData();
2447  tensorHandle = TraceSubTensorHandleAncestry(tensorHandle);
2448 
2449  PartialBlock& partialBlock = memBlockTrackerMap.at(tensorHandle);
2450 
2451  auto& lifetime = partialBlock.m_Lifetime;
2452  --lifetime;
2453 
2454  if (lifetime == 0)
2455  {
2456  m_MemBlockMap[partialBlock.m_BackendId].emplace_back(partialBlock.m_StartOfLife,
2457  timestep,
2458  partialBlock.m_MemSize,
2459  0,
2460  partialBlock.m_Index);
2461  }
2462  }
2463  ++timestep;
2464  }
2465 
2466 }
2467 
2468 std::unique_ptr<MemoryManager> LoadedNetwork::CreateExternalMemoryManger(
2469  std::vector<std::pair<std::shared_ptr<TensorMemory>, MemorySource>>& tensorMemoryVec)
2470 {
2471  std::unique_ptr<MemoryManager> memoryManager = std::make_unique<MemoryManager>();
2472  auto allocatorMap = BackendRegistryInstance().GetAllocators();
2473 
2474  for (auto& backend : m_MemBinMap)
2475  {
2476  std::vector<BufferStorage> bufferStorageVec;
2477 
2478  std::shared_ptr<ICustomAllocator> backendAllocator;
2479  if (allocatorMap.find(backend.first) != allocatorMap.end())
2480  {
2481  backendAllocator = allocatorMap[backend.first];
2482  }
2483  else
2484  {
2485  backendAllocator = m_Backends[backend.first]->GetDefaultAllocator();
2486  }
2487 
2488  for (auto& memBin : backend.second)
2489  {
2490  BufferStorage bufferStorage;
2491  bufferStorage.m_BufferSize = memBin.m_MemSize;
2492  bufferStorage.m_TensorMemoryVector.reserve(memBin.m_MemBlocks.size());
2493 
2494  for (auto& memBlock : memBin.m_MemBlocks)
2495  {
2496  auto tensorMemory = std::make_shared<TensorMemory>(TensorMemory{memBlock.m_Offset, memBlock.m_Index});
2497 
2498  tensorMemoryVec.emplace_back(tensorMemory, backendAllocator->GetMemorySourceType());
2499  bufferStorage.m_TensorMemoryVector.emplace_back(tensorMemory);
2500  }
2501 
2502  bufferStorageVec.emplace_back(std::move(bufferStorage));
2503  }
2504 
2505  memoryManager->StoreMemToAllocate(bufferStorageVec, backendAllocator, 4);
2506  }
2507 
2508  return memoryManager;
2509 }
2510 
2511 LayerBindingId LoadedNetwork::ValidateImportedInputID(ImportedInputId id)
2512 {
2513  try
2514  {
2515  const auto& importedTensorHandlePin = m_PreImportedInputHandles.at(id);
2516  if (!importedTensorHandlePin.m_TensorHandle)
2517  {
2518  throw InvalidArgumentException(fmt::format("LoadedNetwork::Execute:"
2519  "PreImportedInput: {} has been deleted", id));
2520  }
2521  return importedTensorHandlePin.m_LayerBindingId;
2522  }
2523  catch (const std::out_of_range&)
2524  {
2525  throw InvalidArgumentException(fmt::format("LoadedNetwork::Execute: Unknown ImportedInputId: {}", id));
2526  }
2527 }
2528 
2529 LayerBindingId LoadedNetwork::ValidateImportedOutputID(ImportedOutputId id)
2530 {
2531  try
2532  {
2533  const auto& importedTensorHandlePin = m_PreImportedOutputHandles.at(id);
2534  if (!importedTensorHandlePin.m_TensorHandle)
2535  {
2536  throw InvalidArgumentException(fmt::format("LoadedNetwork::Execute: "
2537  "PreImportedOutput: {} has been deleted", id));
2538  }
2539  return importedTensorHandlePin.m_LayerBindingId;
2540  }
2541  catch (const std::out_of_range&)
2542  {
2543  throw InvalidArgumentException(fmt::format("LoadedNetwork::Execute: Unknown ImportedOutputId: {}", id));
2544  }
2545 }
2546 
2547 }
IMemoryManager.hpp
armnn::GetTensorInfo
const TensorInfo & GetTensorInfo(const ITensorHandle *tensorHandle)
float32 helpers
Definition: RefWorkloadUtils.hpp:27
armnn::BackendId
Definition: BackendId.hpp:75
armnn::Graph::GetNumOutputs
size_t GetNumOutputs() const
Definition: Graph.hpp:188
armnn::BackendCapabilityException
Definition: Exceptions.hpp:152
armnn::experimental::IWorkingMemHandle
Definition: IWorkingMemHandle.hpp:20
armnn::BackendRegistryInstance
BackendRegistry & BackendRegistryInstance()
Definition: BackendRegistry.cpp:15
armnn::LoadedNetwork::MakeLoadedNetwork
static std::unique_ptr< LoadedNetwork > MakeLoadedNetwork(std::unique_ptr< IOptimizedNetwork > net, std::string &errorMessage, const INetworkProperties &networkProperties, arm::pipe::IProfilingService *profilingService)
Definition: LoadedNetwork.cpp:170
armnn::Layer::GetNumOutputSlots
unsigned int GetNumOutputSlots() const override
Returns the number of connectable output slots.
Definition: Layer.hpp:322
armnn::OutputHandler
Definition: OutputHandler.hpp:28
armnn::MemorySource::Undefined
@ Undefined
armnn::BackendId::Get
const std::string & Get() const
Definition: BackendId.hpp:138
armnn::OutputSlot
Definition: Layer.hpp:87
IBackendInternal.hpp
armnn::LoadedNetwork::ClearImportedInputs
void ClearImportedInputs(const std::vector< ImportedInputId > inputIds)
Definition: LoadedNetwork.cpp:1685
armnn::experimental::WorkingMemHandle::GetOutputHandle
ITensorHandle * GetOutputHandle(LayerBindingId layerBindingId) const
Definition: WorkingMemHandle.hpp:102
MemSyncWorkload.hpp
armnn::OutputHandler::GetTensorInfo
const TensorInfo & GetTensorInfo() const
Gets the matching TensorInfo for the output.
Definition: OutputHandler.hpp:42
armnn::Exception
Base class for all ArmNN exceptions so that users can filter to just those.
Definition: Exceptions.hpp:46
armnn::InputSlot
Definition: Layer.hpp:42
armnn::InputSlot::GetConnectedOutputSlot
const OutputSlot * GetConnectedOutputSlot() const
Definition: Layer.hpp:56
armnn::Graph::GetInputLayers
InputLayersAccessor GetInputLayers() const
Returns a wrapper object with begin(), end() methods to iterate over the input layers in a range-base...
Definition: Graph.hpp:192
CHECK_LOCATION
#define CHECK_LOCATION()
Definition: Exceptions.hpp:203
armnn::CopyToOutputTensor
void CopyToOutputTensor(const Tensor &outputTensor, ITensorHandle *outputTensorHandle)
Definition: LoadedNetwork.cpp:1376
armnn::LayerType::Input
@ Input
armnn::TensorHandleFactoryRegistry::AquireMemory
void AquireMemory()
Aquire memory required for inference.
Definition: TensorHandleFactoryRegistry.cpp:78
armnn::INetworkProperties::m_InputSource
const MemorySource m_InputSource
Definition: IRuntime.hpp:72
armnn::LayerBindingId
int LayerBindingId
Type of identifiers for bindable layers (inputs, outputs).
Definition: Types.hpp:290
armnn::ITensorHandleFactory::LegacyFactoryId
static const FactoryId LegacyFactoryId
Definition: ITensorHandleFactory.hpp:50
armnn::experimental::WorkingMemHandle::MemSyncOutputs
void MemSyncOutputs()
Definition: WorkingMemHandle.cpp:136
armnn::Graph::GetNumLayers
size_t GetNumLayers() const
Definition: Graph.hpp:198
armnn::GetInputTensor
const armnn::ConstTensor GetInputTensor(const LayerBindingId layerId, const InputTensors &inputTensors)
Definition: LoadedNetwork.cpp:1392
armnn::experimental::WorkingMemDescriptor::m_Inputs
std::vector< ITensorHandle * > m_Inputs
Definition: WorkingMemDescriptor.hpp:20
armnn::BackendRegistry::GetFactory
FactoryFunction GetFactory(const BackendId &id) const
Definition: BackendRegistry.cpp:57
armnn::LoadedNetwork
Definition: LoadedNetwork.hpp:42
armnn::LoadedNetwork::FreeWorkingMemory
void FreeWorkingMemory()
Definition: LoadedNetwork.cpp:1222
BackendHelper.hpp
armnn::INetworkProperties::m_OutputNetworkDetailsMethod
const ProfilingDetailsMethod m_OutputNetworkDetailsMethod
Definition: IRuntime.hpp:70
armnn::IBackendInternal::IWorkloadFactoryPtr
std::unique_ptr< IWorkloadFactory > IWorkloadFactoryPtr
Definition: IBackendInternal.hpp:89
armnn::ConstTensor
A tensor defined by a TensorInfo (shape and data type) and an immutable backing store.
Definition: Tensor.hpp:327
armnn::BackendOptions
Struct for the users to pass backend specific options.
Definition: BackendOptions.hpp:22
armnn::ImportedInputId
unsigned int ImportedInputId
Definition: Types.hpp:291
armnn::Layer
Definition: Layer.hpp:217
armnn::Graph::SetLayersOutOfOrder
void SetLayersOutOfOrder()
Definition: Graph.cpp:661
armnn::experimental::WorkingMemHandle::GetInputConnections
const std::vector< std::vector< ITensorHandle * >::iterator > & GetInputConnections(LayerBindingId layerBindingId) const
Definition: WorkingMemHandle.hpp:107
armnn::MemoryImportException
Definition: Exceptions.hpp:125
armnn::IgnoreUnused
void IgnoreUnused(Ts &&...)
Definition: IgnoreUnused.hpp:14
armnn::LoadedNetwork::ImportOutputs
std::vector< ImportedOutputId > ImportOutputs(const OutputTensors &outputTensors, MemorySource forceImportMemorySource=MemorySource::Undefined)
Definition: LoadedNetwork.cpp:1560
Assert.hpp
armnn::INetworkProperties::m_ProfilingEnabled
const bool m_ProfilingEnabled
Definition: IRuntime.hpp:68
armnn::Exception::what
virtual const char * what() const noexcept override
Definition: Exceptions.cpp:32
TensorHandle.hpp
armnn::IWorkloadFactory
Definition: WorkloadFactory.hpp:22
armnn::DebugCallbackFunction
std::function< void(LayerGuid guid, unsigned int slotIndex, ITensorHandle *tensorHandle)> DebugCallbackFunction
Define the type of callback for the Debug layer to call.
Definition: Types.hpp:379
armnn::experimental::WorkingMemHandle::InputMemDescriptorCoords
Definition: WorkingMemHandle.hpp:33
armnn::experimental::WorkingMemDescriptor
Definition: WorkingMemDescriptor.hpp:18
armnn::Graph::InputLayersAccessor::end
ConstIteratorInputs end() const
Definition: Graph.hpp:70
armnn::GetOutputTensor
const armnn::Tensor GetOutputTensor(const LayerBindingId layerId, const OutputTensors &outputTensors)
Definition: LoadedNetwork.cpp:1405
armnn::LoadedNetwork::WorkloadQueue
std::vector< std::unique_ptr< IWorkload > > WorkloadQueue
Definition: LoadedNetwork.hpp:45
armnn::OutputHandler::GetData
ITensorHandle * GetData() const
Gets the allocated tensor memory.
Definition: OutputHandler.hpp:46
armnn::Graph::GetOutputLayers
OutputLayersAccessor GetOutputLayers() const
Returns a wrapper object with begin(), end() methods to iterate over the output layers in a range-bas...
Definition: Graph.hpp:196
ARMNN_LOG
#define ARMNN_LOG(severity)
Definition: Logging.hpp:212
armnn::experimental::WorkingMemHandle::OutputMemDescriptorCoords
Definition: WorkingMemHandle.hpp:40
armnn
Copyright (c) 2021 ARM Limited and Contributors.
Definition: 01_00_quick_start.dox:6
armnn::MemSyncQueueDescriptor
Definition: WorkloadData.hpp:99
armnn::OutputSlot::GetTensorInfo
const TensorInfo & GetTensorInfo() const override
Definition: Layer.cpp:92
armnn::BoostLogSeverityMapping::warning
@ warning
armnn::RuntimeException
Definition: Exceptions.hpp:120
armnn::ITensorHandle
Definition: ITensorHandle.hpp:15
armnn::OutputSlot::GetTensorHandleFactoryId
ITensorHandleFactory::FactoryId GetTensorHandleFactoryId() const
Definition: Layer.cpp:205
armnn::INetworkProperties
Definition: IRuntime.hpp:43
armnn::LoadedNetwork::GetOutputTensorInfo
TensorInfo GetOutputTensorInfo(LayerBindingId layerId) const
Definition: LoadedNetwork.cpp:711
armnn::Layer::GetNameStr
const std::string & GetNameStr() const
Definition: Layer.hpp:227
armnn::LayerType
LayerType
When adding a new layer, adapt also the LastLayer enum value in the enum class LayerType below.
Definition: Types.hpp:466
armnn::ITensorHandle::Import
virtual bool Import(void *memory, MemorySource source)
Import externally allocated memory.
Definition: ITensorHandle.hpp:75
armnn::ITensorHandle::Map
virtual const void * Map(bool blocking=true) const =0
Map the tensor data for access.
armnn::experimental::WorkingMemHandle::Allocate
void Allocate() override
Allocate the backing memory required for execution.
Definition: WorkingMemHandle.cpp:100
armnn::InputTensors
std::vector< std::pair< LayerBindingId, class ConstTensor > > InputTensors
Definition: Tensor.hpp:392
armnn::LoadedNetwork::SendNetworkStructure
void SendNetworkStructure(arm::pipe::IProfilingService &profilingService)
Definition: LoadedNetwork.cpp:654
armnn::Layer::GetOutputHandler
const OutputHandler & GetOutputHandler(unsigned int i=0) const
Definition: Layer.hpp:232
armnn::Graph::OutputLayersAccessor::begin
ConstIteratorOutputs begin() const
Definition: Graph.hpp:84
armnn::INetworkProperties::m_AsyncEnabled
const bool m_AsyncEnabled
Definition: IRuntime.hpp:66
armnn::Status::Failure
@ Failure
armnn::Layer::GetType
LayerType GetType() const override
Returns the armnn::LayerType of this layer.
Definition: Layer.hpp:273
armnn::Layer::GetOutputSlots
const std::vector< OutputSlot > & GetOutputSlots() const
Definition: Layer.hpp:246
armnn::InputQueueDescriptor
MemCopyQueueDescriptor InputQueueDescriptor
Definition: WorkloadData.hpp:91
ARMNN_SCOPED_PROFILING_EVENT
#define ARMNN_SCOPED_PROFILING_EVENT(backendId, name)
Definition: Profiling.hpp:220
armnn::TensorHandleFactoryRegistry::ReleaseMemory
void ReleaseMemory()
Release memory required for inference.
Definition: TensorHandleFactoryRegistry.cpp:86
armnn::Graph::InputLayersAccessor::begin
ConstIteratorInputs begin() const
Definition: Graph.hpp:65
armnn::MemorySource
MemorySource
Define the Memory Source to reduce copies.
Definition: Types.hpp:230
armnn::LoadedNetwork::CreateWorkingMemHandle
std::unique_ptr< IWorkingMemHandle > CreateWorkingMemHandle(NetworkId networkId)
Create a new unique WorkingMemHandle object.
Definition: LoadedNetwork.cpp:1949
armnn::experimental::WorkingMemHandle::GetOutputConnection
const std::vector< std::vector< ITensorHandle * >::iterator > & GetOutputConnection(LayerBindingId layerBindingId) const
Definition: WorkingMemHandle.hpp:112
armnn::LoadedNetwork::Execute
Status Execute(const InputTensors &inputTensors, const OutputTensors &outputTensors, IWorkingMemHandle &workingMemHandle, std::vector< ImportedInputId > preImportedInputs={}, std::vector< ImportedOutputId > preImportedOutputs={})
Thread safe execution of the loaded network.
Definition: LoadedNetwork.cpp:1727
armnn::INetworkProperties::m_OutputSource
const MemorySource m_OutputSource
Definition: IRuntime.hpp:73
armnn::BindableLayer
Definition: Layer.hpp:452
armnn::LoadedNetwork::GetInputTensorInfo
TensorInfo GetInputTensorInfo(LayerBindingId layerId) const
Definition: LoadedNetwork.cpp:697
armnn::LoadedNetwork::GetNetworkGuid
arm::pipe::ProfilingGuid GetNetworkGuid()
Definition: LoadedNetwork.cpp:692
armnn::LoadedNetwork::EnqueueWorkload
Status EnqueueWorkload(const InputTensors &inputTensors, const OutputTensors &outputTensors, std::vector< ImportedInputId > preImportedInputIds={}, std::vector< ImportedOutputId > preImportedOutputIds={})
Single thread execution of the loaded network.
Definition: LoadedNetwork.cpp:839
armnn::TensorInfo
Definition: Tensor.hpp:152
armnn::LayerType::Constant
@ Constant
armnn::ProfilerManager::GetInstance
static ProfilerManager & GetInstance()
Definition: Profiling.cpp:593
armnn::ITensorHandleFactory::CreateTensorHandle
virtual std::unique_ptr< ITensorHandle > CreateTensorHandle(const TensorInfo &tensorInfo) const =0
armnn::Graph::GetNumInputs
size_t GetNumInputs() const
Definition: Graph.hpp:187
armnn::Layer::GetBackendId
const BackendId & GetBackendId() const
Definition: Layer.hpp:277
Layer.hpp
ArmNNProfiling.hpp
armnn::experimental::WorkingMemHandle::IsAllocated
bool IsAllocated() override
IsAllocated returns true if the backing memory is currently allocated.
Definition: WorkingMemHandle.hpp:77
armnn::NetworkId
int NetworkId
Definition: IRuntime.hpp:35
armnn::BackendOptions::BackendOption
Definition: BackendOptions.hpp:215
armnn::Status::Success
@ Success
armnn::experimental::WorkingMemHandle::GetInputHandle
ITensorHandle * GetInputHandle(LayerBindingId layerBindingId) const
Definition: WorkingMemHandle.hpp:97
armnn::experimental::WorkingMemHandle::GetExecutionDataAt
std::pair< BackendId, ExecutionData > & GetExecutionDataAt(unsigned int id) override
Get the ExecutionData at an index.
Definition: WorkingMemHandle.hpp:92
armnn::Status
Status
Definition: Types.hpp:42
armnn::LoadedNetwork::ImportInputs
std::vector< ImportedInputId > ImportInputs(const InputTensors &inputTensors, MemorySource forceImportMemorySource=MemorySource::Undefined)
Definition: LoadedNetwork.cpp:1418
armnn::INetworkProperties::m_ImportEnabled
const bool m_ImportEnabled
Deprecated and will be removed in future release.
Definition: IRuntime.hpp:62
armnn::TensorHandleFactoryRegistry::GetFactory
ITensorHandleFactory * GetFactory(ITensorHandleFactory::FactoryId id) const
Find a TensorHandleFactory by Id Returns nullptr if not found.
Definition: TensorHandleFactoryRegistry.cpp:39
armnn::Compute::Undefined
@ Undefined
armnn::INetworkProperties::m_ExportEnabled
const bool m_ExportEnabled
Deprecated and will be removed in future release.
Definition: IRuntime.hpp:64
armnn::Layer::GetInputSlots
const std::vector< InputSlot > & GetInputSlots() const
Definition: Layer.hpp:245
ARMNN_ASSERT_MSG
#define ARMNN_ASSERT_MSG(COND, MSG)
Definition: Assert.hpp:15
BackendRegistry.hpp
HeapProfiling.hpp
armnn::CheckFlag
bool CheckFlag(MemorySourceFlags flags, MemorySource source)
Definition: MemorySources.hpp:41
armnn::LoadedNetwork::ClearImportedOutputs
void ClearImportedOutputs(const std::vector< ImportedOutputId > outputIds)
Definition: LoadedNetwork.cpp:1706
armnn::WorkloadInfo
Contains information about TensorInfos of a layer.
Definition: WorkloadInfo.hpp:16
ARMNN_NO_DEPRECATE_WARN_BEGIN
#define ARMNN_NO_DEPRECATE_WARN_BEGIN
Definition: Deprecated.hpp:33
armnn::BackendRegistry::GetMemoryOptimizerStrategies
MemoryOptimizerStrategiesMapRef GetMemoryOptimizerStrategies()
Definition: BackendRegistry.cpp:150
armnn::Layer::GetGuid
LayerGuid GetGuid() const final
Returns the unique id of the layer.
Definition: Layer.hpp:330
armnn::Graph
Definition: Graph.hpp:30
armnn::ITensorHandleFactory
Definition: ITensorHandleFactory.hpp:46
ARMNN_ASSERT
#define ARMNN_ASSERT(COND)
Definition: Assert.hpp:14
armnn::OutputTensors
std::vector< std::pair< LayerBindingId, class Tensor > > OutputTensors
Definition: Tensor.hpp:393
armnn::experimental::WorkingMemHandle::GetBindingIdVector
std::vector< LayerBindingId > & GetBindingIdVector()
Definition: WorkingMemHandle.hpp:119
WorkingMemHandle.hpp
armnn::ITensorHandle::GetImportFlags
virtual unsigned int GetImportFlags() const
Get flags describing supported import sources.
Definition: ITensorHandle.hpp:69
armnn::experimental::WorkingMemDescriptor::m_Outputs
std::vector< ITensorHandle * > m_Outputs
Definition: WorkingMemDescriptor.hpp:21
armnn::experimental::WorkingMemHandle::ValidateBindingIds
void ValidateBindingIds()
Definition: WorkingMemHandle.cpp:145
armnn::CopyTensorContentsGeneric
void CopyTensorContentsGeneric(const ITensorHandle *srcTensor, ITensorHandle *dstTensor, CopyFunc copy)
Definition: WorkloadUtils.hpp:46
armnn::ValidateSourcesMatchOptimizedNetwork
void ValidateSourcesMatchOptimizedNetwork(std::vector< BackendOptions > optimizedOptions, const INetworkProperties &networkProperties)
This function performs a sanity check to ensure that the combination of input and output memory sourc...
Definition: LoadedNetwork.cpp:98
Logging.hpp
armnn::TensorHandleFactoryRegistry::GetMatchingImportFactoryId
ITensorHandleFactory::FactoryId GetMatchingImportFactoryId(ITensorHandleFactory::FactoryId copyFactoryId)
Get a matching TensorHandleFatory Id for Memory Import given TensorHandleFactory Id for Memory Copy.
Definition: TensorHandleFactoryRegistry.cpp:72
armnn::LoadedNetwork::RegisterDebugCallback
void RegisterDebugCallback(const DebugCallbackFunction &func)
Definition: LoadedNetwork.cpp:2197
ARMNN_SCOPED_HEAP_PROFILING
#define ARMNN_SCOPED_HEAP_PROFILING(TAG)
Definition: HeapProfiling.hpp:45
LoadedNetwork.hpp
armnn::Layer::GetName
const char * GetName() const override
Returns the name of the layer.
Definition: Layer.hpp:319
arm::pipe
Definition: BackendRegistry.hpp:17
armnn::Tensor
A tensor defined by a TensorInfo (shape and data type) and a mutable backing store.
Definition: Tensor.hpp:319
std
Definition: BackendId.hpp:149
armnn::ITensorHandleFactory::FactoryId
std::string FactoryId
Definition: ITensorHandleFactory.hpp:49
armnn::LayerType::Output
@ Output
armnn::experimental::WorkingMemHandle
Definition: WorkingMemHandle.hpp:29
armnn::MemorySourceFlags
unsigned int MemorySourceFlags
Definition: MemorySources.hpp:15
armnn::BaseTensor::GetMemoryArea
MemoryType GetMemoryArea() const
Definition: Tensor.hpp:305
armnn::InvalidArgumentException
Definition: Exceptions.hpp:80
armnn::LayerType::MemImport
@ MemImport
ExecutionData.hpp
armnn::BoostLogSeverityMapping::error
@ error
armnn::ITensorHandle::Unmap
virtual void Unmap() const =0
Unmap the tensor data.
Graph.hpp
armnn::Graph::OutputLayersAccessor::end
ConstIteratorOutputs end() const
Definition: Graph.hpp:90
armnn::Graph::TopologicalSort
Graph & TopologicalSort()
Sorts layers in topological order and return this.
Definition: Graph.hpp:184
armnn::ImportedOutputId
unsigned int ImportedOutputId
Definition: Types.hpp:292
armnn::OutputQueueDescriptor
MemCopyQueueDescriptor OutputQueueDescriptor
Definition: WorkloadData.hpp:92
armnn::BaseTensor::GetInfo
const TensorInfo & GetInfo() const
Definition: Tensor.hpp:295
armnn::ProfilerManager::RegisterProfiler
void RegisterProfiler(IProfiler *profiler)
Definition: Profiling.cpp:600
armnn::HasCapability
bool HasCapability(const std::string &name, const BackendCapabilities &capabilities)
Convenience function to check if a capability exists in a BackendCapabilites struct.
Definition: BackendHelper.cpp:65
armnn::QueueDescriptor::m_Inputs
std::vector< ITensorHandle * > m_Inputs
Definition: WorkloadData.hpp:26
armnn::BoostLogSeverityMapping::info
@ info
MemCopyWorkload.hpp
Profiling.hpp
armnn::BackendRegistry::GetAllocators
std::unordered_map< BackendId, std::shared_ptr< ICustomAllocator > > GetAllocators()
Definition: BackendRegistry.cpp:128
ARMNN_NO_DEPRECATE_WARN_END
#define ARMNN_NO_DEPRECATE_WARN_END
Definition: Deprecated.hpp:34