ArmNN
 22.05
LoadedNetwork.cpp
Go to the documentation of this file.
1 //
2 // Copyright © 2017 Arm Ltd and Contributors. All rights reserved.
3 // SPDX-License-Identifier: MIT
4 //
5 
6 #include "LoadedNetwork.hpp"
7 #include "Layer.hpp"
8 #include "Graph.hpp"
9 #include "Profiling.hpp"
10 #include "HeapProfiling.hpp"
11 #include "WorkingMemHandle.hpp"
12 
13 #include <armnn/BackendHelper.hpp>
15 #include <armnn/Logging.hpp>
16 
20 
22 
23 #include <armnn/utility/Assert.hpp>
24 
26 
27 #include <common/include/Processes.hpp>
28 
29 #include <fmt/format.h>
30 
31 namespace armnn
32 {
33 
34 using namespace std;
35 using namespace arm::pipe;
36 
37 namespace
38 {
39 
40 template <typename ExceptionType>
41 std::string ToErrorMessage(const char * prefix, const ExceptionType & error)
42 {
43  std::stringstream ss;
44  ss << prefix << " " << error.what();
45  return ss.str();
46 }
47 
48 void AddLayerStructure(std::unique_ptr<TimelineUtilityMethods>& timelineUtils,
49  const Layer& layer,
50  ProfilingGuid networkGuid)
51 {
52  // Add layer to the post-optimisation network structure
53  std::string layerName = layer.GetNameStr().empty() ? "<Unnamed>" : layer.GetNameStr();
54  timelineUtils->CreateNamedTypedChildEntity(layer.GetGuid(),
55  networkGuid,
56  layerName,
57  LabelsAndEventClasses::LAYER_GUID);
58  for (auto&& input : layer.GetInputSlots())
59  {
60  const IOutputSlot* source = input.GetConnectedOutputSlot();
61  ARMNN_ASSERT(source != NULL);
62  timelineUtils->CreateConnectionRelationship(ProfilingRelationshipType::RetentionLink,
63  source->GetOwningLayerGuid(),
64  layer.GetGuid());
65  }
66 }
67 
68 void AddWorkloadStructure(std::unique_ptr<TimelineUtilityMethods>& timelineUtils,
69  std::unique_ptr<IWorkload>& workload,
70  const Layer& layer)
71 {
72  // Add workload to the post-optimisation network structure
73  timelineUtils->CreateTypedEntity(workload->GetGuid(), LabelsAndEventClasses::WORKLOAD_GUID);
74  timelineUtils->MarkEntityWithLabel(workload->GetGuid(),
75  layer.GetBackendId().Get(),
76  LabelsAndEventClasses::BACKENDID_GUID);
77 
78  // Link the workload to the layer
79  timelineUtils->CreateRelationship(ProfilingRelationshipType::RetentionLink,
80  layer.GetGuid(),
81  workload->GetGuid(),
82  LabelsAndEventClasses::CHILD_GUID);
83 }
84 
85 } // anonymous
86 
87 std::unique_ptr<LoadedNetwork> LoadedNetwork::MakeLoadedNetwork(std::unique_ptr<IOptimizedNetwork> net,
88  std::string& errorMessage,
89  const INetworkProperties& networkProperties,
90  arm::pipe::IProfilingService* profilingService)
91 {
92  std::unique_ptr<LoadedNetwork> loadedNetwork;
93 
94  auto Fail = [&](const std::exception& error) -> std::unique_ptr<LoadedNetwork>
95  {
96  errorMessage = ToErrorMessage("An error occurred when preparing the network workloads: ", error);
97  ARMNN_LOG(error) << errorMessage;
98 
99  return std::unique_ptr<LoadedNetwork>();
100  };
101 
102  try
103  {
104  loadedNetwork.reset(new LoadedNetwork(std::move(net), networkProperties, profilingService));
105  }
106  catch (const armnn::RuntimeException& error)
107  {
108  return Fail(error);
109  }
110  catch (const armnn::Exception& error)
111  {
112  return Fail(error);
113  }
114  catch (const std::runtime_error& error)
115  {
116  return Fail(error);
117  }
118 
119  return loadedNetwork;
120 }
121 
122 LoadedNetwork::LoadedNetwork(std::unique_ptr<IOptimizedNetwork> net,
123  const INetworkProperties& networkProperties,
124  arm::pipe::IProfilingService* profilingService) :
125  m_OptimizedNetwork(std::move(net)),
126  m_NetworkProperties(networkProperties),
127  m_TensorHandleFactoryRegistry(),
128  m_ProfilingService(profilingService)
129 {
131  // Get the profiler and register it for the current thread.
132  const std::shared_ptr<IProfiler>& profiler = m_OptimizedNetwork->GetProfiler();
134 
135  profiler->EnableProfiling(networkProperties.m_ProfilingEnabled);
136 
137  profiler->EnableNetworkDetailsToStdOut(networkProperties.m_OutputNetworkDetailsMethod);
138 
139  //First create tensor handlers, backends and workload factories.
140  //Handlers are created before workloads are.
141  //Because workload creation can modify some of the handlers,
142  //(for example the splitter and concat layers).
143 
144  bool useExternalMemoryManager = false;
145  bool useInternalMemoryManager = false;
146  Graph& order = m_OptimizedNetwork->pOptimizedNetworkImpl->GetGraph().TopologicalSort();
147 
148  if (!networkProperties.m_AsyncEnabled)
149  {
150  m_IsInputImported = std::vector<bool>(order.GetNumInputs(), false);
151  m_IsOutputImported = std::vector<bool>(order.GetNumOutputs(), false);
152  }
153 
154  for (auto&& layer : order)
155  {
156  auto const& backendId = layer->GetBackendId();
157  if (m_Backends.count(backendId) == 0)
158  {
159  auto createBackend = BackendRegistryInstance().GetFactory(backendId);
160  auto it = m_Backends.emplace(std::make_pair(backendId, createBackend()));
161 
162  IBackendInternal* backend = it.first->second.get();
163 
164  if (networkProperties.m_AsyncEnabled &&
165  !HasCapability(BackendOptions::BackendOption{"AsyncExecution", true}, backend->GetCapabilities()))
166  {
167  std::string er = backend->GetId();
168  er += " does not support AsyncExecution";
169  throw BackendCapabilityException(er);
170  }
171 
172  if (networkProperties.m_AsyncEnabled &&
173  !HasCapability(BackendOptions::BackendOption{"ExternallyManagedMemory", true},
174  backend->GetCapabilities()))
175  {
176  std::string er = backend->GetId();
177  er += " does not support ExternallyManagedMemory\n";
178  er += "AsyncEnabled networks require all backends to support ExternallyManagedMemory";
179  throw BackendCapabilityException(er);
180  }
181 
182  if (HasCapability(BackendOptions::BackendOption{"ExternallyManagedMemory", true},backend->GetCapabilities())
183  && (m_NetworkProperties.m_ExternalMemoryManagementEnabled || m_NetworkProperties.m_AsyncEnabled))
184  {
185  m_SupportsExternallyManagedMemory[backend->GetId()] = true;
186  useExternalMemoryManager = true;
187  }
188  else
189  {
190  m_SupportsExternallyManagedMemory[backend->GetId()] = false;
191  useInternalMemoryManager = true;
192  }
193 
195  if (backend->SupportsTensorAllocatorAPI())
196  {
197  workloadFactory = backend->CreateWorkloadFactory(
198  m_TensorHandleFactoryRegistry,
199  m_OptimizedNetwork->pOptimizedNetworkImpl->GetModelOptions(),
200  static_cast<MemorySourceFlags>(m_NetworkProperties.m_InputSource),
201  static_cast<MemorySourceFlags>(m_NetworkProperties.m_OutputSource));
202  }
203  else
204  {
205  m_BackendMemoryMangers.emplace_back(backend->CreateMemoryManager());
206  workloadFactory = backend->CreateWorkloadFactory(
207  m_BackendMemoryMangers.back(), m_OptimizedNetwork->pOptimizedNetworkImpl->GetModelOptions());
208  }
209  m_WorkloadFactories[backendId ] = std::move(workloadFactory);
210  }
211  }
212 
213  if (!networkProperties.m_AsyncEnabled)
214  {
215  for (auto&& layer : order)
216  {
217  auto& workloadFactory = GetWorkloadFactory(*layer);
218  bool supportsExternalManager = m_SupportsExternallyManagedMemory[layer->GetBackendId()];
219 
220  switch (layer->GetType())
221  {
222  case LayerType::Input:
224  {
225  // If IsImportEnabled is true then we need to set IsMemoryManaged
226  // to false when creating TensorHandles
227  layer->CreateTensorHandles(m_TensorHandleFactoryRegistry,
228  workloadFactory,
229  !supportsExternalManager && !m_NetworkProperties.m_ImportEnabled);
230  break;
231  }
232  case LayerType::Constant:
233  {
234  layer->CreateTensorHandles(m_TensorHandleFactoryRegistry, workloadFactory, true);
235  break;
236  }
237  default:
238  {
239  // Look for a layer with 1 OutputSlot which has 1 connection and that connection is an Output Layer
240  // If Export is enabled disable memory management so we can export, otherwise we do a copy
241  if ((layer->GetNumOutputSlots() == 1) &&
242  (layer->GetOutputSlots()[0].GetNumConnections() == 1) &&
243  (layer->GetOutputSlots()[0].GetConnection(0)->GetOwningLayer().GetType() == LayerType::Output))
244  {
245  layer->CreateTensorHandles(m_TensorHandleFactoryRegistry,
246  workloadFactory,
247  !supportsExternalManager && !m_NetworkProperties.m_ExportEnabled);
248  }
249  else
250  {
251  layer->CreateTensorHandles(m_TensorHandleFactoryRegistry,
252  workloadFactory,
253  !supportsExternalManager);
254  }
255  }
256  }
257  }
258  }
259 
260  ProfilingGuid networkGuid = m_OptimizedNetwork->GetGuid();
261  std::unique_ptr<TimelineUtilityMethods> timelineUtils =
262  TimelineUtilityMethods::GetTimelineUtils(*m_ProfilingService);
263  if (timelineUtils)
264  {
265  timelineUtils->CreateTypedEntity(networkGuid, LabelsAndEventClasses::NETWORK_GUID);
266  // Mark the network with a start of life event
267  timelineUtils->RecordEvent(networkGuid, LabelsAndEventClasses::ARMNN_PROFILING_SOL_EVENT_CLASS);
268  // and with the process ID
269  int processID = arm::pipe::GetCurrentProcessId();
270  std::stringstream ss;
271  ss << processID;
272  timelineUtils->MarkEntityWithLabel(networkGuid, ss.str(), LabelsAndEventClasses::PROCESS_ID_GUID);
273  }
274 
275  std::vector<IWorkload*> ConstWorkloads;
276 
277  //Then create workloads.
278  {
279  ARMNN_SCOPED_PROFILING_EVENT(Compute::Undefined, "LoadNetwork_CreateWorkloads");
280  for (auto&& layer: order)
281  {
282  if (timelineUtils)
283  {
284  // Add layer to the post-optimisation network structure
285  AddLayerStructure(timelineUtils, *layer, networkGuid);
286  }
287 
288  const IWorkloadFactory& workloadFactory = GetWorkloadFactory(*layer);
289 
290  switch (layer->GetType())
291  {
292  case LayerType::Input:
293  case LayerType::Output:
294  {
295  // Inputs and outputs are treated in a special way - see EnqueueInput() and EnqueueOutput().
296  break;
297  }
298  default:
299  {
300  auto workload = layer->CreateWorkload(workloadFactory);
301 
302  if (!workload)
303  {
304  const char* const layerName =
305  layer->GetNameStr().length() != 0 ? layer->GetName() : "<Unnamed>";
307  fmt::format("No workload created for layer (name: '{0}' type: '{1}') (compute '{2}')",
308  layerName, static_cast<int>(layer->GetType()), layer->GetBackendId().Get()
309  ));
310  }
311 
312  if (timelineUtils)
313  {
314  // Add workload to the post-optimisation network structure
315  AddWorkloadStructure(timelineUtils, workload, *layer);
316  }
317 
318  // For async networks ConstantWorkloads are managed exclusively by LoadedNetwork
319  // and are separated out from the other workloads
320  if((networkProperties.m_AsyncEnabled || useExternalMemoryManager) &&
321  layer->GetType() == LayerType::Constant)
322  {
323  m_ConstantTensorHandles[layer->GetGuid()] =
324  layer->GetOutputSlot(0).GetOutputHandler().GetData();
325  m_ConstantWorkloads[layer->GetGuid()] = std::move(workload);
326  }
327  else
328  {
329  m_WorkloadQueue.push_back(std::move(workload));
330 
331  if (layer->GetType() == LayerType::Constant)
332  {
333  // Place the Constant Workloads into a queue so that they can be executed first
334  ConstWorkloads.push_back(m_WorkloadQueue.back().get());
335  }
336  }
337  // release the constant data in the layer..
338  layer->ReleaseConstantData();
339  break;
340  }
341  }
342  }
343  }
344 
345  // Gather information about workloads for inputs & outputs
346  if (!networkProperties.m_AsyncEnabled && m_WorkloadQueue.size() != 0)
347  {
348  const int noOfInputs = armnn::numeric_cast<int>(order.GetNumInputs());
349 
350  // Get indices of all workloads connected to each input and
351  // check if they support tensor handle replacement
352  for (const BindableLayer* layer: order.GetInputLayers())
353  {
354  const auto bindingId = layer->GetBindingId();
355 
356  bool supportsReplacement = true;
357 
358  for (const auto inputSlot: layer->GetOutputSlot(0).GetConnections())
359  {
360  auto workloadIndex = std::distance(order.begin(), order.GetPosInGraph(inputSlot->GetOwningLayer()));
361  workloadIndex -= noOfInputs;
362 
363  m_InputWorkloadSlotPairs[bindingId].emplace_back(WorkloadIndices{
364  armnn::numeric_cast<unsigned int>(workloadIndex), inputSlot->GetSlotIndex()});
365 
366  auto workload = m_WorkloadQueue[m_InputWorkloadSlotPairs[bindingId].back().m_WorkloadIndex].get();
367  supportsReplacement &= workload->SupportsTensorHandleReplacement();
368  }
369 
370  ITensorHandleFactory::FactoryId factoryId = layer->GetOutputSlot(0).GetTensorHandleFactoryId();
371  // Get matching import factory Id
372  ITensorHandleFactory::FactoryId importFactoryId =
373  m_TensorHandleFactoryRegistry.GetMatchingImportFactoryId(factoryId);
374 
375  ITensorHandleFactory *importFactory = m_TensorHandleFactoryRegistry.GetFactory(importFactoryId);
376 
377  if (supportsReplacement && importFactory)
378  {
379  m_PreImportedInputHandles.emplace_back(
380  bindingId, importFactory->CreateTensorHandle(layer->GetOutputSlot(0).GetTensorInfo(), false));
381  }
382  else
383  {
384  m_PreImportedInputHandles.emplace_back(bindingId, nullptr);
385  }
386  }
387 
388  // Get indices of all workloads connected to each output and
389  // check if they support tensor handle replacement
390  for (const BindableLayer* layer: order.GetOutputLayers())
391  {
392  const auto bindingId = layer->GetBindingId();
393 
394  const auto outputSlot = layer->GetInputSlot(0).GetConnectedOutputSlot();
395  auto& indices = m_OutputWorkloadSlotPairs[bindingId];
396 
397  auto workloadIndex = std::distance(order.begin(), order.GetPosInGraph(outputSlot->GetOwningLayer()));
398  workloadIndex -= noOfInputs;
399 
400  indices.m_OutputSlotIndices = WorkloadIndices{numeric_cast<unsigned int>(workloadIndex),
401  outputSlot->CalculateIndexOnOwner()};
402 
403  bool supportsReplacement = true;
404  auto outputWorkload = m_WorkloadQueue[indices.m_OutputSlotIndices.m_WorkloadIndex].get();
405  supportsReplacement &= outputWorkload->SupportsTensorHandleReplacement();
406 
407  for (auto &inputSlot: outputSlot->GetConnections())
408  {
409  if(inputSlot->GetOwningLayer().GetType() != LayerType::Output)
410  {
411  auto inWorkloadIndex = std::distance(order.begin(),
412  order.GetPosInGraph(inputSlot->GetOwningLayer()));
413  inWorkloadIndex -= noOfInputs;
414  indices.m_InputSlotIndices.emplace_back(WorkloadIndices{numeric_cast<unsigned int>(inWorkloadIndex),
415  inputSlot->GetSlotIndex()});
416  auto inputWorkload = m_WorkloadQueue[indices.m_InputSlotIndices.back().m_WorkloadIndex].get();
417  supportsReplacement &= inputWorkload->SupportsTensorHandleReplacement();
418  }
419  }
420 
421  ITensorHandleFactory::FactoryId factoryId = outputSlot->GetTensorHandleFactoryId();
422  // Get matching import factory Id
423  ITensorHandleFactory::FactoryId importFactoryId =
424  m_TensorHandleFactoryRegistry.GetMatchingImportFactoryId(factoryId);
425  ITensorHandleFactory *importFactory = m_TensorHandleFactoryRegistry.GetFactory(importFactoryId);
426 
427  if (supportsReplacement && importFactory)
428  {
429  m_PreImportedOutputHandles.emplace_back(
430  bindingId, importFactory->CreateTensorHandle(outputSlot->GetTensorInfo(), false));
431  }
432  else
433  {
434  m_PreImportedOutputHandles.emplace_back(bindingId, nullptr);
435  }
436  }
437  }
438 
439  for (auto&& workloadFactory : m_WorkloadFactories)
440  {
441  workloadFactory.second->AfterWorkloadsCreated();
442  }
443 
444  if (timelineUtils)
445  {
446  // Commit to send the post-optimisation network structure
447  timelineUtils->Commit();
448  }
449 
450  if (useExternalMemoryManager)
451  {
452  if (networkProperties.m_AsyncEnabled)
453  {
454  CreateMemoryProfileAsync();
455  }
456  else
457  {
458  CreateMemoryProfile();
459  }
460 
461  auto backendStrategyMap = BackendRegistryInstance().GetMemoryOptimizerStrategies();
462  for (auto& backendMemoryProfile : m_MemBlockMap)
463  {
464  const BackendId& backendId = backendMemoryProfile.first;
465  if (backendStrategyMap.find(backendId) != backendStrategyMap.end())
466  {
467  m_MemBinMap[backendId] = backendStrategyMap[backendId]->Optimize(backendMemoryProfile.second);
468  }
469  else
470  {
471  m_MemBinMap[backendId] = m_ConstantStrategy->Optimize(backendMemoryProfile.second);
472  }
473  }
474 
475  if (!networkProperties.m_AsyncEnabled)
476  {
477  m_ExternalMemoryManager = CreateExternalMemoryManger(m_TensorMemory);
478 
479  // Sort m_TensorMemory, so it's order matches m_Tensorhandles
480  std::sort(m_TensorMemory.begin(), m_TensorMemory.end(),
481  [](const std::pair<std::shared_ptr<TensorMemory>, MemorySource>& lhs,
482  const std::pair<std::shared_ptr<TensorMemory>, MemorySource>& rhs)
483  {
484  return lhs.first->m_OutputSlotId < rhs.first->m_OutputSlotId;
485  });
486  }
487  }
488 
489  // Now that the intermediate tensor memory has been set-up,
490  // do any post allocation configuration for each workload.
491  if (!networkProperties.m_AsyncEnabled)
492  {
493  if (useInternalMemoryManager)
494  {
495  // Set up memory.
496  m_OptimizedNetwork->pOptimizedNetworkImpl->GetGraph().AllocateDynamicBuffers();
497  }
498 
499  for (auto &workload : m_WorkloadQueue)
500  {
501  workload->PostAllocationConfigure();
502  }
503  }
504 
505  if (useExternalMemoryManager)
506  {
507  if (!networkProperties.m_AsyncEnabled)
508  {
509  AllocateAndExecuteConstantWorkloads();
510  }
511  else
512  {
513  AllocateAndExecuteConstantWorkloadsAsync();
514  }
515  }
516  // If synchronous, execute all constant layer workloads
517  if (!networkProperties.m_AsyncEnabled)
518  {
519  for (auto workload: ConstWorkloads)
520  {
521  workload->Execute();
522  }
523  }
524 }
525 
526 void LoadedNetwork::AllocateAndExecuteConstantWorkloads()
527 {
528  ARMNN_SCOPED_PROFILING_EVENT(Compute::Undefined, "LoadNetwork_AllocateAndExecuteConstants");
529  for (auto& pair : m_ConstantWorkloads)
530  {
531  auto tensorHandle = m_ConstantTensorHandles[pair.first];
532  tensorHandle->Allocate();
533  pair.second->Execute();
534  }
535 }
536 
537 void LoadedNetwork::AllocateAndExecuteConstantWorkloadsAsync()
538 {
539  ARMNN_SCOPED_PROFILING_EVENT(Compute::Undefined, "LoadNetwork_AllocateAndExecuteConstants");
540  Graph& order = m_OptimizedNetwork->pOptimizedNetworkImpl->GetGraph();
541  for (auto&& layer : order)
542  {
543  if (layer->GetType() == LayerType::Constant)
544  {
545  const auto& outSlot = layer->GetOutputSlots()[0];
546  const auto factoryId = outSlot.GetTensorHandleFactoryId();
548  auto& workloadFactory = GetWorkloadFactory(*layer);
549 
550  layer->CreateTensorHandles(m_TensorHandleFactoryRegistry, workloadFactory);
551  ITensorHandle* tensorHandle = outSlot.GetOutputHandler().GetData();
552 
553  m_ConstantTensorHandles[layer->GetGuid()] = tensorHandle;
554  tensorHandle->Allocate();
555 
556  WorkingMemDescriptor memDesc;
557  memDesc.m_Outputs.push_back(tensorHandle);
558  m_ConstantWorkloads[layer->GetGuid()]->ExecuteAsync(memDesc);
559  }
560  }
561 }
562 
563 void LoadedNetwork::SendNetworkStructure(arm::pipe::IProfilingService& profilingService)
564 {
565  ARMNN_SCOPED_PROFILING_EVENT(Compute::Undefined, "LoadNetwork_SendNetworkStructure");
566  Graph& order = m_OptimizedNetwork->pOptimizedNetworkImpl->GetGraph().TopologicalSort();
567  ProfilingGuid networkGuid = m_OptimizedNetwork->GetGuid();
568 
569  std::unique_ptr<TimelineUtilityMethods> timelineUtils =
570  TimelineUtilityMethods::GetTimelineUtils(profilingService);
571 
572  timelineUtils->CreateTypedEntity(networkGuid, LabelsAndEventClasses::NETWORK_GUID);
573 
574  for (auto&& layer : order)
575  {
576  // Add layer to the post-optimisation network structure
577  AddLayerStructure(timelineUtils, *layer, networkGuid);
578  switch (layer->GetType())
579  {
580  case LayerType::Input:
581  case LayerType::Output:
582  {
583  // Inputs and outputs are treated in a special way - see EnqueueInput() and EnqueueOutput().
584  break;
585  }
586  default:
587  {
588  for (auto& workload : m_WorkloadQueue)
589  {
590  // Add workload to the post-optimisation network structure
591  AddWorkloadStructure(timelineUtils, workload, *layer);
592  }
593  break;
594  }
595  }
596  }
597  // Commit to send the post-optimisation network structure
598  timelineUtils->Commit();
599 }
600 
602 {
603  return m_OptimizedNetwork->GetGuid();
604 }
605 
607 {
608  for (auto&& inputLayer : m_OptimizedNetwork->pOptimizedNetworkImpl->GetGraph().GetInputLayers())
609  {
610  ARMNN_ASSERT_MSG(inputLayer->GetNumOutputSlots() == 1, "Input layer should have exactly 1 output slot");
611  if (inputLayer->GetBindingId() == layerId)
612  {
613  return inputLayer->GetOutputSlot(0).GetTensorInfo();
614  }
615  }
616 
617  throw InvalidArgumentException(fmt::format("No input layer is associated with id {}", layerId));
618 }
619 
621 {
622  for (auto&& outputLayer : m_OptimizedNetwork->pOptimizedNetworkImpl->GetGraph().GetOutputLayers())
623  {
624  ARMNN_ASSERT_MSG(outputLayer->GetNumInputSlots() == 1, "Output layer should have exactly 1 input slot");
625  ARMNN_ASSERT_MSG(outputLayer->GetInputSlot(0).GetConnection(), "Input slot on Output layer must be connected");
626  if (outputLayer->GetBindingId() == layerId)
627  {
628  return outputLayer->GetInputSlot(0).GetConnection()->GetTensorInfo();
629  }
630  }
631 
632  throw InvalidArgumentException(fmt::format("No output layer is associated with id {}", layerId));
633 }
634 
635 const IWorkloadFactory& LoadedNetwork::GetWorkloadFactory(const Layer& layer) const
636 {
637  const IWorkloadFactory* workloadFactory = nullptr;
638 
639  auto it = m_WorkloadFactories.find(layer.GetBackendId());
640  if (it == m_WorkloadFactories.end())
641  {
642  throw RuntimeException(fmt::format("No workload factory for {0} to be used for layer: {1}",
643  layer.GetBackendId().Get(),
644  layer.GetNameStr()),
645  CHECK_LOCATION());
646  }
647 
648  workloadFactory = it->second.get();
649 
650  ARMNN_ASSERT_MSG(workloadFactory, "No workload factory");
651 
652  std::string reasonIfUnsupported;
654  {},
655  reasonIfUnsupported,
656  m_OptimizedNetwork->pOptimizedNetworkImpl->GetModelOptions()),
657  "Factory does not support layer");
658  IgnoreUnused(reasonIfUnsupported);
659  return *workloadFactory;
660 }
661 
662 namespace {
663 
664 // Non-copyable class owning accelerator-specific tensor data.
665 class TensorPin
666 {
667 public:
668  TensorPin(std::unique_ptr<ITensorHandle> handle, const TensorInfo& info, LayerBindingId id)
669  : m_TensorHandle(std::move(handle))
670  , m_TensorInfo(info)
671  , m_Id(id)
672  {
673  }
674 
675  ITensorHandle* GetTensorHandle() const { return m_TensorHandle.get(); }
676  const TensorInfo& GetTensorInfo() const { return m_TensorInfo; }
677  LayerBindingId GetBindingId() const { return m_Id; }
678 
679 private:
680  std::unique_ptr<ITensorHandle> m_TensorHandle;
681  TensorInfo m_TensorInfo;
682  LayerBindingId m_Id;
683 };
684 
685 static const TensorPin& GetTensorPin(LayerBindingId id,
686  const std::vector<TensorPin>& pins,
687  char const* bindingPointDesc)
688 {
689  auto it = std::find_if(pins.begin(), pins.end(),
690  [id](const TensorPin& pin)
691  {
692  return pin.GetBindingId() == id;
693  });
694 
695  if (it != pins.end())
696  {
697  return *it;
698  }
699  else
700  {
701  throw InvalidArgumentException(fmt::format("No tensor supplied for {0} {1}", bindingPointDesc, id));
702  }
703 }
704 
705 // Stores data that needs to be kept accessible for the entire execution of a workload.
706 class WorkloadData
707 {
708 public:
709  WorkloadData(const InputTensors& inputTensors, const OutputTensors& outputTensors)
710  {
711  m_InputTensorPins.reserve(inputTensors.size());
712  m_OutputTensorPins.reserve(outputTensors.size());
713 
714  for (auto inputTensorPair : inputTensors)
715  {
716  auto inputTensor = inputTensorPair.second;
717 
718  std::unique_ptr<ITensorHandle> tensorHandle =
719  std::make_unique<ConstPassthroughTensorHandle>(inputTensor.GetInfo(),inputTensor.GetMemoryArea());
720  LayerBindingId layerId = inputTensorPair.first;
721 
722  m_InputTensorPins.emplace_back(std::move(tensorHandle), inputTensor.GetInfo(), layerId);
723  }
724 
725  for (auto outputTensorPair : outputTensors)
726  {
727  auto outputTensor = outputTensorPair.second;
728 
729  std::unique_ptr<ITensorHandle> tensorHandle =
730  std::make_unique<PassthroughTensorHandle>(outputTensor.GetInfo(), outputTensor.GetMemoryArea());
731  LayerBindingId layerId = outputTensorPair.first;
732 
733  m_OutputTensorPins.emplace_back(std::move(tensorHandle), outputTensor.GetInfo(), layerId);
734  }
735  }
736 
737  const TensorPin& GetInputTensorPin(LayerBindingId id) const
738  {
739  return GetTensorPin(id, m_InputTensorPins, "input");
740  }
741 
742  const TensorPin& GetOutputTensorPin(LayerBindingId id) const
743  {
744  return GetTensorPin(id, m_OutputTensorPins, "output");
745  }
746 
747 private:
748 
749  std::vector<TensorPin> m_InputTensorPins;
750  std::vector<TensorPin> m_OutputTensorPins;
751 };
752 
753 }
754 
756  const OutputTensors& outputTensors,
757  std::vector<ImportedInputId> preImportedInputIds,
758  std::vector<ImportedOutputId> preImportedOutputIds)
759 {
760  const Graph& graph = m_OptimizedNetwork->pOptimizedNetworkImpl->GetGraph();
761 
762  // Walk graph to determine the order of execution.
763  if (graph.GetNumLayers() < 2)
764  {
765  ARMNN_LOG(warning) << "IRuntime::EnqueueWorkload()::Less than two nodes in graph";
766  return Status::Failure;
767  }
768 
769  // Data that must be kept alive for the entire execution of the workload.
770  WorkloadData workloadData(inputTensors, outputTensors);
771 
772  if (graph.GetNumInputs() != inputTensors.size())
773  {
774  throw InvalidArgumentException("Number of inputs provided does not match network.");
775  }
776 
777  // For each input to the network, call EnqueueInput with the data passed by the user.
778  {
780  m_InputQueue.clear();
781  m_InputQueue.reserve(graph.GetNumInputs());
782 
783  if (preImportedInputIds.size() > graph.GetNumInputs())
784  {
785  throw InvalidArgumentException("Invalid number of preImportedInputIds");
786  }
787 
788  unsigned int inputIndex = 0;
789  unsigned int importedInputIdIndex = 0;
790  std::sort(preImportedInputIds.begin(), preImportedInputIds.end());
791  for (const BindableLayer* inputLayer : graph.GetInputLayers())
792  {
793  if (importedInputIdIndex < preImportedInputIds.size() &&
794  inputIndex == preImportedInputIds[importedInputIdIndex])
795  {
796  // Only replace tensorhandles if they have not already been replaced
797  if (!m_IsInputImported[inputIndex])
798  {
799  auto outputTensorHandle = m_PreImportedInputHandles[inputIndex].m_TensorHandle.get();
800 
801  for (const auto& workloadInfo: m_InputWorkloadSlotPairs[inputLayer->GetBindingId()])
802  {
803  auto workload = m_WorkloadQueue[workloadInfo.m_WorkloadIndex].get();
804  workload->ReplaceInputTensorHandle(outputTensorHandle, workloadInfo.m_SlotIndex);
805  }
806  m_IsInputImported[inputIndex] = true;
807  }
808  importedInputIdIndex++;
809  }
810  else
811  {
812  if (m_IsInputImported[inputIndex])
813  {
814  OutputHandler& handler = const_cast<OutputHandler&>(inputLayer->GetOutputHandler(0));
815 
816  for (const auto& workloadInfo: m_InputWorkloadSlotPairs[inputLayer->GetBindingId()])
817  {
818  auto workload = m_WorkloadQueue[workloadInfo.m_WorkloadIndex].get();
819  workload->ReplaceInputTensorHandle(handler.GetData(), workloadInfo.m_SlotIndex);
820  }
821 
822  m_IsInputImported[inputIndex] = false;
823  }
824 
825  // InputTensorHandle is not imported yet, process to enqueue input
826  const TensorPin& pin = workloadData.GetInputTensorPin(inputLayer->GetBindingId());
827  EnqueueInput(*inputLayer, pin.GetTensorHandle(), pin.GetTensorInfo());
828  }
829  inputIndex++;
830  }
831  }
832  // For each output to the network, call EnqueueOutput with the data passed by the user.
833  {
835  m_OutputQueue.clear();
836  m_OutputQueue.reserve(graph.GetNumOutputs());
837 
838  if (preImportedOutputIds.size() > graph.GetNumOutputs())
839  {
840  throw InvalidArgumentException("Invalid number of preImportedOutputIds");
841  }
842 
843  unsigned int outputIndex = 0;
844  unsigned int importedOutputIdIndex = 0;
845  std::sort(preImportedOutputIds.begin(), preImportedOutputIds.end());
846  for (const BindableLayer* outputLayer : graph.GetOutputLayers())
847  {
848  if (importedOutputIdIndex < preImportedOutputIds.size() &&
849  outputIndex == preImportedOutputIds[importedOutputIdIndex])
850  {
851  // Only replace tensorhandles if they have not already been replaced
852  ITensorHandle* inputTensorHandle = m_PreImportedOutputHandles[outputIndex].m_TensorHandle.get();
853 
854  if (!m_IsOutputImported[outputIndex])
855  {
856  const auto bindingId = outputLayer->GetBindingId();
857  const auto& indices = m_OutputWorkloadSlotPairs[bindingId];
858 
859  auto outputWorkload = m_WorkloadQueue[indices.m_OutputSlotIndices.m_WorkloadIndex].get();
860 
861  outputWorkload->ReplaceOutputTensorHandle(inputTensorHandle,
862  indices.m_OutputSlotIndices.m_SlotIndex);
863 
864  for (const auto& workloadInfo: indices.m_InputSlotIndices)
865  {
866  auto inputWorkload = m_WorkloadQueue[workloadInfo.m_WorkloadIndex].get();
867  inputWorkload->ReplaceInputTensorHandle(inputTensorHandle, workloadInfo.m_SlotIndex);
868  }
869  m_IsOutputImported[outputIndex] = true;
870  }
871 
872  ARMNN_ASSERT_MSG(inputTensorHandle != nullptr, "Data should have been allocated.");
873  MemSyncQueueDescriptor syncDesc;
874  syncDesc.m_Inputs.push_back(inputTensorHandle);
876  info.m_InputTensorInfos.push_back(
877  outputLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo());
878  auto syncWorkload = std::make_unique<SyncMemGenericWorkload>(syncDesc, info);
879  ARMNN_ASSERT_MSG(syncWorkload, "No sync workload created");
880  m_OutputQueue.push_back(move(syncWorkload));
881  importedOutputIdIndex++;
882  }
883  else
884  {
885  if (m_IsOutputImported[outputIndex])
886  {
887  const auto bindingId = outputLayer->GetBindingId();
888  const auto& indices = m_OutputWorkloadSlotPairs[bindingId];
889 
890  auto outputWorkload = m_WorkloadQueue[indices.m_OutputSlotIndices.m_WorkloadIndex].get();
891  const OutputHandler& outputHandler =
892  outputLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetOutputHandler();
893 
894  outputWorkload->ReplaceOutputTensorHandle(
895  outputHandler.GetData(), indices.m_OutputSlotIndices.m_SlotIndex);
896 
897  for (const auto& workloadInfo: indices.m_InputSlotIndices)
898  {
899  auto inputWorkload = m_WorkloadQueue[workloadInfo.m_WorkloadIndex].get();
900  inputWorkload->ReplaceInputTensorHandle(outputHandler.GetData(), workloadInfo.m_SlotIndex);
901  }
902  m_IsOutputImported[outputIndex] = false;
903  }
904 
905  const TensorPin& pin = workloadData.GetOutputTensorPin(outputLayer->GetBindingId());
906  // OutputTensorHandle is not imported yet, process to enqueue Output
907  EnqueueOutput(*outputLayer, pin.GetTensorHandle(), pin.GetTensorInfo());
908  }
909  outputIndex++;
910  }
911  }
912 
913  std::unique_ptr<TimelineUtilityMethods> timelineUtils =
914  TimelineUtilityMethods::GetTimelineUtils(*m_ProfilingService);
915  ProfilingGuid inferenceGuid = m_ProfilingService->GetNextGuid();
916  if (timelineUtils)
917  {
918  // Add inference timeline trace if profiling is enabled.
919  ProfilingGuid networkGuid = m_OptimizedNetwork->GetGuid();
920  timelineUtils->CreateTypedEntity(inferenceGuid, LabelsAndEventClasses::INFERENCE_GUID);
921  timelineUtils->CreateRelationship(ProfilingRelationshipType::RetentionLink,
922  networkGuid,
923  inferenceGuid,
924  LabelsAndEventClasses::EXECUTION_OF_GUID);
925  timelineUtils->RecordEvent(inferenceGuid, LabelsAndEventClasses::ARMNN_PROFILING_SOL_EVENT_CLASS);
926  }
927 
928  bool executionSucceeded = true;
929 
930  {
931  if (m_ProfilingService->IsProfilingEnabled())
932  {
933  m_ProfilingService->IncrementCounterValue(INFERENCES_RUN);
934  }
936  ARMNN_SCOPED_HEAP_PROFILING("Executing");
937  executionSucceeded = Execute(timelineUtils, inferenceGuid);
938  }
939 
940  if (timelineUtils)
941  {
942  // Add end of life of the inference timeline if profiling is enabled.
943  timelineUtils->RecordEvent(inferenceGuid, LabelsAndEventClasses::ARMNN_PROFILING_EOL_EVENT_CLASS);
944  timelineUtils->Commit();
945  }
946 
947  return executionSucceeded ? Status::Success : Status::Failure;
948 }
949 
950 void LoadedNetwork::EnqueueInput(const BindableLayer& layer, ITensorHandle* tensorHandle, const TensorInfo& tensorInfo)
951 {
952  if (layer.GetType() != LayerType::Input)
953  {
954  throw InvalidArgumentException("EnqueueInput: given layer not an InputLayer");
955  }
956 
957  if (tensorHandle == nullptr)
958  {
959  throw InvalidArgumentException("EnqueueInput: tensorHandle must not be NULL");
960  }
961 
962  InputQueueDescriptor inputQueueDescriptor;
964 
965  inputQueueDescriptor.m_Inputs.push_back(tensorHandle);
966  info.m_InputTensorInfos.push_back(tensorInfo);
967 
968  ARMNN_ASSERT_MSG(layer.GetNumOutputSlots() == 1, "Can only handle Input Layer with one output");
969  const OutputHandler& handler = layer.GetOutputHandler();
970  const TensorInfo& outputTensorInfo = handler.GetTensorInfo();
971  ITensorHandle* outputTensorHandle = handler.GetData();
972  ARMNN_ASSERT_MSG(outputTensorHandle != nullptr,
973  "Data should have been allocated.");
974  inputQueueDescriptor.m_Outputs.push_back(outputTensorHandle);
975  info.m_OutputTensorInfos.push_back(outputTensorInfo);
976 
977  MemorySourceFlags importFlags = outputTensorHandle->GetImportFlags();
978  bool needMemCopy = true;
979  if (m_NetworkProperties.m_ImportEnabled) // Try import the input tensor
980  {
981  if(CheckFlag(importFlags, m_NetworkProperties.m_InputSource))
982  {
983  needMemCopy = false;
984  // This assumes a CPU Tensor handle
985  void* mem = tensorHandle->Map(false);
986  if (outputTensorHandle->Import(mem, m_NetworkProperties.m_InputSource))
987  {
988  tensorHandle->Unmap();
989  return; // No need for a workload since the import has been done.
990  }
991  tensorHandle->Unmap();
992  throw MemoryImportException("EnqueueInput: Memory Import failed");
993  }
994  }
995  if (needMemCopy)
996  {
997  // Create a mem copy workload for input since we did not import
998  std::unique_ptr<IWorkload> inputWorkload = std::make_unique<CopyMemGenericWorkload>(inputQueueDescriptor, info);
999 
1000  ARMNN_ASSERT_MSG(inputWorkload, "No input workload created");
1001 
1002  std::unique_ptr<TimelineUtilityMethods> timelineUtils =
1003  TimelineUtilityMethods::GetTimelineUtils(*m_ProfilingService);
1004  if (timelineUtils)
1005  {
1006  // Add Input Workload to the post-optimisation network structure
1007  AddWorkloadStructure(timelineUtils, inputWorkload, layer);
1008  timelineUtils->Commit();
1009  }
1010 
1011  m_InputQueue.push_back(move(inputWorkload));
1012  }
1013 }
1014 
1015 void LoadedNetwork::EnqueueOutput(const BindableLayer& layer, ITensorHandle* tensorHandle, const TensorInfo& tensorInfo)
1016 {
1017  if (layer.GetType() != LayerType::Output)
1018  {
1019  throw InvalidArgumentException("EnqueueOutput: given layer not an OutputLayer");
1020  }
1021 
1022  if (tensorHandle == nullptr)
1023  {
1024  throw InvalidArgumentException("EnqueueOutput: tensorHandle must not be NULL");
1025  }
1026 
1027  OutputQueueDescriptor outputQueueDescriptor;
1029 
1030  outputQueueDescriptor.m_Outputs.push_back(tensorHandle);
1031  info.m_OutputTensorInfos.push_back(tensorInfo);
1032 
1033  ARMNN_ASSERT_MSG(layer.GetNumInputSlots() == 1, "Output Layer should have exactly one input.");
1034 
1035  // Gets the output handler from the previous node.
1036  const OutputHandler& outputHandler = layer.GetInputSlots()[0].GetConnectedOutputSlot()->GetOutputHandler();
1037 
1038  const TensorInfo& inputTensorInfo = outputHandler.GetTensorInfo();
1039  ITensorHandle* inputTensorHandle = outputHandler.GetData();
1040  ARMNN_ASSERT_MSG(inputTensorHandle != nullptr, "Data should have been allocated.");
1041 
1042  // Try import the output tensor.
1043  // Note: We can only import the output pointer if all of the following hold true:
1044  // a) The imported pointer is aligned sufficiently
1045  // b) The tensor has zero padding
1046  // c) There is only one connection to the OutputSlot and it is to an OutputLayer.
1047  // d) The output pointer is allocated via malloc. (Other types will be supported in a later release)
1048  // e) m_IsExportEnabled must be set to true
1049  bool needMemCopy = true;
1050  if (m_NetworkProperties.m_ExportEnabled &&
1051  (layer.GetInputSlots()[0].GetConnectedOutputSlot()->GetNumConnections() == 1))
1052  {
1053  if(layer.GetInputSlots()[0].GetConnectedOutputSlot()->GetOwningLayer().GetType() != LayerType::Input)
1054  {
1055  MemorySourceFlags importFlags = inputTensorHandle->GetImportFlags();
1056  if (CheckFlag(importFlags, m_NetworkProperties.m_OutputSource))
1057  {
1058  needMemCopy = false;
1059  void *mem = tensorHandle->Map(false);
1060  bool importOk = inputTensorHandle->Import(mem, m_NetworkProperties.m_OutputSource);
1061  tensorHandle->Unmap();
1062 
1063  if (importOk)
1064  {
1065  // Insert synchronization workload
1066  MemSyncQueueDescriptor syncDesc;
1067  syncDesc.m_Inputs.push_back(inputTensorHandle);
1068  info.m_InputTensorInfos.push_back(inputTensorInfo);
1069  auto syncWorkload = std::make_unique<SyncMemGenericWorkload>(syncDesc, info);
1070  ARMNN_ASSERT_MSG(syncWorkload, "No sync workload created");
1071  m_OutputQueue.push_back(move(syncWorkload));
1072  }
1073  else
1074  {
1075  throw MemoryExportException("EnqueueOutput: Memory Export failed");
1076  }
1077  }
1078  }
1079  }
1080  if (needMemCopy)
1081  {
1082  // If we got here then we didn't export the memory, so add an output workload which performs a memcopy.
1083  outputQueueDescriptor.m_Inputs.push_back(inputTensorHandle);
1084  info.m_InputTensorInfos.push_back(inputTensorInfo);
1085 
1086  std::unique_ptr<IWorkload> outputWorkload =
1087  std::make_unique<CopyMemGenericWorkload>(outputQueueDescriptor, info);
1088  ARMNN_ASSERT_MSG(outputWorkload, "No output workload created");
1089 
1090  std::unique_ptr<TimelineUtilityMethods> timelineUtils =
1091  TimelineUtilityMethods::GetTimelineUtils(*m_ProfilingService);
1092  if (timelineUtils)
1093  {
1094  // Add Output Workload to the post-optimisation network structure
1095  AddWorkloadStructure(timelineUtils, outputWorkload, layer);
1096  timelineUtils->Commit();
1097  }
1098 
1099  m_OutputQueue.push_back(move(outputWorkload));
1100  }
1101 }
1102 
1103 void LoadedNetwork::AllocateWorkingMemory(
1104 #if !defined(ARMNN_DISABLE_THREADS)
1105  std::lock_guard<std::mutex>& lock
1106 #endif
1107  )
1108 {
1109  ARMNN_SCOPED_PROFILING_EVENT(Compute::Undefined, "Working Memory Allocation");
1110 
1111 #if !defined(ARMNN_DISABLE_THREADS)
1112  // this unused parameter makes sure we can only call this function with a valid lock
1113  IgnoreUnused(lock);
1114 #endif
1115  if (m_IsWorkingMemAllocated)
1116  {
1117  return;
1118  }
1119 
1120  if (m_ExternalMemoryManager)
1121  {
1122  m_ExternalMemoryManager->Allocate();
1123 
1124  for (unsigned int i = 0; i < m_TensorMemory.size(); ++i)
1125  {
1126  m_Tensorhandles[i]->Import(m_TensorMemory[i].first->m_Data, m_TensorMemory[i].second);
1127  }
1128  }
1129 
1130  for (auto&& memoryManager : m_BackendMemoryMangers)
1131  {
1132  if (memoryManager)
1133  {
1134  memoryManager->Acquire();
1135  }
1136  }
1137  m_TensorHandleFactoryRegistry.AquireMemory();
1138  m_IsWorkingMemAllocated = true;
1139 }
1140 
1142 {
1143 #if !defined(ARMNN_DISABLE_THREADS)
1144  std::lock_guard<std::mutex> lockGuard(m_WorkingMemMutex);
1145 #endif
1146 
1147  if (!m_IsWorkingMemAllocated)
1148  {
1149  return;
1150  }
1151 
1152  if (m_ExternalMemoryManager)
1153  {
1154  m_ExternalMemoryManager->Deallocate();
1155  }
1156 
1157  // Informs the memory managers to release memory in its respective memory group
1158  for (auto&& memoryManager : m_BackendMemoryMangers)
1159  {
1160  if (memoryManager)
1161  {
1162  memoryManager->Release();
1163  }
1164  }
1165  m_TensorHandleFactoryRegistry.ReleaseMemory();
1166  m_IsWorkingMemAllocated = false;
1167 }
1168 
1169 bool LoadedNetwork::Execute(std::unique_ptr<TimelineUtilityMethods>& timelineUtils,
1170  ProfilingGuid inferenceGuid)
1171 {
1172  bool success = true;
1173 
1174  auto Fail = [&](const std::exception& error)
1175  {
1176  ARMNN_LOG(error) << "An error occurred attempting to execute a workload: " << error.what();
1177  success = false;
1178  };
1179 
1180  try
1181  {
1182 #if !defined(ARMNN_DISABLE_THREADS)
1183  std::lock_guard<std::mutex> lockGuard(m_WorkingMemMutex);
1184  AllocateWorkingMemory(lockGuard);
1185 #else
1186  AllocateWorkingMemory();
1187 #endif
1188 
1189  ProfilingDynamicGuid workloadInferenceID(0);
1190  auto ExecuteQueue = [&timelineUtils, &workloadInferenceID, &inferenceGuid](WorkloadQueue& queue)
1191  {
1192  for (auto& workload : queue)
1193  {
1194  if(timelineUtils)
1195  {
1196  workloadInferenceID = timelineUtils->RecordWorkloadInferenceAndStartOfLifeEvent(workload->GetGuid(),
1197  inferenceGuid);
1198  }
1199  workload->Execute();
1200  if(timelineUtils)
1201  {
1202  timelineUtils->RecordEndOfLifeEvent(workloadInferenceID);
1203  }
1204  }
1205  };
1206 
1207  ExecuteQueue(m_InputQueue);
1208  ExecuteQueue(m_WorkloadQueue);
1209  ExecuteQueue(m_OutputQueue);
1210  }
1211  catch (const RuntimeException& error)
1212  {
1213  Fail(error);
1214  }
1215  catch (const std::runtime_error& error)
1216  {
1217  Fail(error);
1218  }
1219 
1220  return success;
1221 }
1222 
1223 void LoadedNetwork::EnqueueInput(const ConstTensor& inputTensor, ITensorHandle* inputTensorHandle)
1224 {
1225  if (m_NetworkProperties.m_ImportEnabled) // Try import the input tensor
1226  {
1227  MemorySourceFlags importFlags = inputTensorHandle->GetImportFlags();
1228  if (CheckFlag(importFlags, m_NetworkProperties.m_InputSource) )
1229  {
1230  std::unique_ptr<ITensorHandle> tensorHandle =
1231  std::make_unique<ConstPassthroughTensorHandle>(inputTensor.GetInfo(),
1232  inputTensor.GetMemoryArea());
1233  void* mem = tensorHandle->Map(false);
1234 
1235  if (inputTensorHandle->Import(mem, m_NetworkProperties.m_InputSource))
1236  {
1237  tensorHandle->Unmap();
1238  return;
1239  }
1240  tensorHandle->Unmap();
1241  throw MemoryImportException("EnqueueInput: Memory Import failed");
1242  }
1243  else
1244  {
1245  throw MemoryImportException("EnqueueInput: Memory Import failed, backend does not support Import");
1246  }
1247  }
1248  else
1249  {
1250  std::unique_ptr<ITensorHandle> tensorHandle =
1251  std::make_unique<ConstPassthroughTensorHandle>(inputTensor.GetInfo(), inputTensor.GetMemoryArea());
1252 
1253  auto copyFunc = [](void* dst, const void* src, size_t size)
1254  {
1255  memcpy(dst, src, size);
1256  };
1257 
1258  CopyTensorContentsGeneric(tensorHandle.get(), inputTensorHandle, copyFunc);
1259  }
1260 }
1261 
1262 // Note: We can only import the output pointer if all of the following hold true:
1263 // a) The imported pointer is aligned sufficiently
1264 // b) The tensor has zero padding
1265 // c) There is only one connection to the OutputSlot and it is to an OutputLayer.
1266 // d) The output pointer is allocated via malloc. (Other types will be supported in a later release)
1267 // e) m_IsExportEnabled must be set to true
1268 void LoadedNetwork::ImportOutputTensor(const Tensor& outputTensor, ITensorHandle* outputTensorHandle)
1269 {
1270  ARMNN_ASSERT_MSG(outputTensorHandle != nullptr, "Data should have been allocated.");
1271  MemorySourceFlags importFlags = outputTensorHandle->GetImportFlags();
1272  if (CheckFlag(importFlags, m_NetworkProperties.m_OutputSource))
1273  {
1274  std::unique_ptr<ITensorHandle> tensorHandle =
1275  std::make_unique<PassthroughTensorHandle>(outputTensor.GetInfo(),
1276  outputTensor.GetMemoryArea());
1277 
1278  void* mem = tensorHandle->Map(false);
1279  bool importOk = outputTensorHandle->Import(mem, m_NetworkProperties.m_OutputSource);
1280  tensorHandle->Unmap();
1281 
1282  if (!importOk)
1283  {
1284  throw MemoryExportException("ImportOutputTensor: Memory Export failed");
1285  }
1286  }
1287  else
1288  {
1289  throw MemoryExportException("ImportOutputTensor: Memory Export failed, attempting to export Input Layer");
1290  }
1291 
1292 }
1293 
1294 void CopyToOutputTensor(const Tensor& outputTensor, ITensorHandle* outputTensorHandle)
1295 {
1296  auto copyFunc = [](void* dst, const void* src, size_t size)
1297  {
1298  memcpy(dst, src, size);
1299  };
1300 
1301  std::unique_ptr<ITensorHandle> tensorHandle =
1302  std::make_unique<PassthroughTensorHandle>(outputTensor.GetInfo(),
1303  outputTensor.GetMemoryArea());
1304 
1305  CopyTensorContentsGeneric(outputTensorHandle, tensorHandle.get(), copyFunc);
1306 }
1307 
1308 
1309 const armnn::ConstTensor GetInputTensor(const LayerBindingId layerId, const InputTensors& inputTensors)
1310 {
1311  for (auto inputTensorPair : inputTensors)
1312  {
1313  LayerBindingId id = inputTensorPair.first;
1314  if (id == layerId)
1315  {
1316  return inputTensorPair.second;
1317  }
1318  }
1319  throw InvalidArgumentException("Input does not exist.");
1320 }
1321 
1322 const armnn::Tensor GetOutputTensor(const LayerBindingId layerId, const OutputTensors& outputTensors)
1323 {
1324  for (auto outputTensorPair : outputTensors)
1325  {
1326  LayerBindingId id = outputTensorPair.first;
1327  if (id == layerId)
1328  {
1329  return outputTensorPair.second;
1330  }
1331  }
1332  throw InvalidArgumentException("Output does not exist.");
1333 }
1334 
1335 std::vector<ImportedInputId> LoadedNetwork::ImportInputs(const InputTensors& inputTensors,
1336  MemorySource forceImportMemorySource)
1337 {
1338  if (!m_NetworkProperties.m_AsyncEnabled)
1339  {
1340  // Cannot import if import is not enabled and forceImportMemorySource is undefined
1341  if (forceImportMemorySource == MemorySource::Undefined)
1342  {
1343  throw MemoryImportException("ImportInputs: Memory Import failed, NetworkProperties.m_ImportEnabled");
1344  }
1345  if (inputTensors.size() != m_OptimizedNetwork->pOptimizedNetworkImpl->GetGraph().GetNumInputs())
1346  {
1347  throw MemoryImportException("ImportInputs: Force Import failed, incorrect number of tensors");
1348  }
1349 
1350  std::vector<ImportedInputId> importedInputs;
1351  Graph& graph = m_OptimizedNetwork->pOptimizedNetworkImpl->GetGraph().TopologicalSort();
1352  unsigned int inputIndex = 0;
1353  for (const BindableLayer* inputLayer : graph.GetInputLayers())
1354  {
1355  auto outputTensorHandle = m_PreImportedInputHandles[inputIndex].m_TensorHandle.get();
1356 
1357  if (!outputTensorHandle)
1358  {
1359  inputIndex++;
1360  continue;
1361  }
1362 
1363  auto layerBindingId = inputLayer->GetBindingId();
1364  auto it = std::find_if(inputTensors.begin(), inputTensors.end(), [=](const auto& inputTensor)
1365  {
1366  return inputTensor.first == layerBindingId;
1367  });
1368 
1369  if (it == inputTensors.end())
1370  {
1371  inputIndex++;
1372  continue;
1373  }
1374 
1375  const auto& inputTensor = *it;
1376  std::unique_ptr<ITensorHandle> passThroughTensorHandle =
1377  std::make_unique<ConstPassthroughTensorHandle>(inputTensor.second.GetInfo(),
1378  inputTensor.second.GetMemoryArea());
1379 
1380  if (outputTensorHandle->CanBeImported(passThroughTensorHandle->Map(), forceImportMemorySource)
1381  && (outputTensorHandle->Import(passThroughTensorHandle->Map(), forceImportMemorySource)))
1382  {
1383  importedInputs.push_back(inputIndex);
1384  }
1385  passThroughTensorHandle->Unmap();
1386 
1387  inputIndex++;
1388  }
1389 
1390  return importedInputs;
1391  }
1392  else
1393  {
1394  // Import when the import of network properties is enabled
1395  std::vector<ImportedInputId> importedInputs;
1396  Graph& graph = m_OptimizedNetwork->pOptimizedNetworkImpl->GetGraph().TopologicalSort();
1397 
1398  for (auto inputTensor : inputTensors)
1399  {
1400  auto layerBindingId = inputTensor.first;
1401  auto it = std::find_if(graph.GetInputLayers().begin(), graph.GetInputLayers().end(), [=](auto* layer)
1402  {
1403  return layer->GetBindingId() == layerBindingId;
1404  });
1405 
1406  if (it == graph.GetInputLayers().end())
1407  {
1408  throw MemoryImportException(fmt::format(
1409  "ImportInputs: Memory Import failed, unknown LayerBindingId: {}", layerBindingId));
1410  }
1411 
1412  const Layer* layer = *it;
1413  if (layer->GetType() != LayerType::Input)
1414  {
1415  throw InvalidArgumentException("ImportInputs: given layer not an InputLayer");
1416  }
1417 
1418  auto& backend = m_Backends.at(layer->GetBackendId());
1419  if (!HasCapability(BackendOptions::BackendOption{"PreImportIOTensors", true}, backend->GetCapabilities()))
1420  {
1421  std::string er = backend->GetId();
1422  er += " does not have PreImportIOTensors capability";
1423  throw BackendCapabilityException(er);
1424  }
1425 
1426  const OutputSlot& outputSlot = layer->GetOutputSlots()[0];
1427 
1429  const TensorInfo& tensorInfo = outputSlot.GetTensorInfo();
1430 
1431  ITensorHandleFactory* handleFactory = m_TensorHandleFactoryRegistry.GetFactory(factoryId);
1432  ARMNN_ASSERT(handleFactory);
1433 
1434  ImportedTensorHandlePin importedTensorHandlePin{layerBindingId,
1435  handleFactory->CreateTensorHandle(tensorInfo, false)};
1436 
1437  ITensorHandle* tensorHandle = importedTensorHandlePin.m_TensorHandle.get();
1438 
1439  if (!CheckFlag(tensorHandle->GetImportFlags(), m_NetworkProperties.m_InputSource))
1440  {
1441  throw MemoryImportException(
1442  fmt::format("ImportInputs: Memory Import failed, backend: "
1443  "{} does not support importing from source {}"
1444  , factoryId, m_NetworkProperties.m_InputSource));
1445  }
1446 
1447  std::unique_ptr<ITensorHandle> passThroughTensorHandle =
1448  std::make_unique<ConstPassthroughTensorHandle>(inputTensor.second.GetInfo(),
1449  inputTensor.second.GetMemoryArea());
1450 
1451  if (tensorHandle->Import(passThroughTensorHandle->Map(), m_NetworkProperties.m_InputSource))
1452  {
1453  importedInputs.push_back(m_CurImportedInputId++);
1454  passThroughTensorHandle->Unmap();
1455  }
1456  else
1457  {
1458  passThroughTensorHandle->Unmap();
1459  throw MemoryImportException("ImportInputs: Memory Import failed");
1460  }
1461 
1462  m_PreImportedInputHandles.push_back(std::move(importedTensorHandlePin));
1463  }
1464  return importedInputs;
1465  }
1466 }
1467 
1468 std::vector<ImportedOutputId> LoadedNetwork::ImportOutputs(const OutputTensors& outputTensors,
1469  MemorySource forceImportMemorySource)
1470 {
1471  if (!m_NetworkProperties.m_AsyncEnabled)
1472  {
1473  // Cannot import if import is not enabled and forceImportMemorySource is undefined
1474  if (forceImportMemorySource == MemorySource::Undefined)
1475  {
1476  throw MemoryImportException("ImportOutputs: Memory Import failed, NetworkProperties.m_ImportEnabled");
1477  }
1478  // If forceImportMemorySource is defined, try import if memory is aligned
1479  if (outputTensors.size() != m_OptimizedNetwork->pOptimizedNetworkImpl->GetGraph().GetNumOutputs())
1480  {
1481  throw MemoryImportException("ImportOutputs: Force Import failed, incorrect number of tensors");
1482  }
1483  std::vector<ImportedOutputId> importedOutputs;
1484  Graph& graph = m_OptimizedNetwork->pOptimizedNetworkImpl->GetGraph().TopologicalSort();
1485 
1486  unsigned int outputIndex = 0;
1487  for (const BindableLayer* const outputLayer : graph.GetOutputLayers())
1488  {
1489  auto inputTensorHandle = m_PreImportedOutputHandles[outputIndex].m_TensorHandle.get();
1490 
1491  if (!inputTensorHandle)
1492  {
1493  outputIndex++;
1494  continue;
1495  }
1496 
1497  auto layerBindingId = outputLayer->GetBindingId();
1498  auto it = std::find_if(outputTensors.begin(), outputTensors.end(), [=] (const auto& outputTensor)
1499  {
1500  return outputTensor.first == layerBindingId;
1501  });
1502 
1503  if (it == outputTensors.end())
1504  {
1505  outputIndex++;
1506  continue;
1507  }
1508 
1509  const auto outputTensor = *it;
1510  // Check if the output memory can be imported
1511  if (inputTensorHandle->CanBeImported(outputTensor.second.GetMemoryArea(), forceImportMemorySource)
1512  && inputTensorHandle->Import(outputTensor.second.GetMemoryArea(), forceImportMemorySource))
1513  {
1514  importedOutputs.push_back(outputIndex);
1515  }
1516  outputIndex++;
1517  }
1518  return importedOutputs;
1519  }
1520 
1521  std::vector<ImportedOutputId> importedOutputs;
1522  Graph& graph = m_OptimizedNetwork->pOptimizedNetworkImpl->GetGraph().TopologicalSort();
1523 
1524  for (const auto& outputTensor : outputTensors)
1525  {
1526  auto layerBindingId = outputTensor.first;
1527  auto it = std::find_if(graph.GetOutputLayers().begin(), graph.GetOutputLayers().end(), [=](auto* layer)
1528  {
1529  return layer->GetBindingId() == layerBindingId;
1530  });
1531 
1532  if (it == graph.GetOutputLayers().end())
1533  {
1534  throw MemoryImportException(fmt::format("ImportOutputs: Memory Import failed, unknown LayerBindingId: {}",
1535  layerBindingId));
1536  }
1537 
1538  const Layer* layer = *it;
1539  if (layer->GetType() != LayerType::Output)
1540  {
1541  throw InvalidArgumentException("ImportOutputs: given layer not an OutputLayer");
1542  }
1543 
1544  auto& backend = m_Backends.at(layer->GetBackendId());
1545  if (!HasCapability(BackendOptions::BackendOption{"PreImportIOTensors", true}, backend->GetCapabilities()))
1546  {
1547  std::string er = backend->GetId();
1548  er += " does not have PreImportIOTensors capability";
1549  throw BackendCapabilityException(er);
1550  }
1551 
1552  const InputSlot& inputSlot = layer->GetInputSlots()[0];
1554  const TensorInfo& tensorInfo = inputSlot.GetConnectedOutputSlot()->GetTensorInfo();
1555 
1556  ITensorHandleFactory* handleFactory = m_TensorHandleFactoryRegistry.GetFactory(factoryId);
1557  ARMNN_ASSERT(handleFactory);
1558 
1559  ImportedTensorHandlePin importedTensorHandlePin{layerBindingId,
1560  handleFactory->CreateTensorHandle(tensorInfo, false)};
1561 
1562  ITensorHandle* tensorHandle = importedTensorHandlePin.m_TensorHandle.get();
1563 
1564  if (!CheckFlag(tensorHandle->GetImportFlags(), m_NetworkProperties.m_OutputSource))
1565  {
1566  throw MemoryImportException(fmt::format("ImportInputs: Memory Import failed, backend: "
1567  "{} does not support importing from source {}"
1568  , factoryId, m_NetworkProperties.m_OutputSource));
1569  }
1570 
1571  if (tensorHandle->Import(outputTensor.second.GetMemoryArea(), m_NetworkProperties.m_OutputSource))
1572  {
1573  importedOutputs.push_back(m_CurImportedOutputId++);
1574  }
1575  else
1576  {
1577  throw MemoryImportException("ImportInputs: Memory Import failed");
1578  }
1579 
1580  m_PreImportedOutputHandles.push_back(std::move(importedTensorHandlePin));
1581  }
1582 
1583  return importedOutputs;
1584 }
1585 
1586 void LoadedNetwork::ClearImportedInputs(const std::vector<ImportedInputId> inputIds)
1587 {
1588  for (auto id : inputIds)
1589  {
1590  if (id > m_PreImportedInputHandles.size())
1591  {
1592  throw InvalidArgumentException(fmt::format("ClearImportedInputs::Unknown ImportedInputId: {}", id));
1593  }
1594 
1595  auto& importedTensorHandle = m_PreImportedInputHandles[id].m_TensorHandle;
1596  if (!importedTensorHandle)
1597  {
1599  fmt::format("ClearImportedInputs::ImportedInput with id: {} has already been deleted", id));
1600  }
1601  // Call Unimport then destroy the tensorHandle
1602  importedTensorHandle->Unimport();
1603  importedTensorHandle = {};
1604  }
1605 }
1606 
1607 void LoadedNetwork::ClearImportedOutputs(const std::vector<ImportedOutputId> outputIds)
1608 {
1609  for (auto id : outputIds)
1610  {
1611  if (id > m_PreImportedOutputHandles.size())
1612  {
1613  throw InvalidArgumentException(fmt::format("ClearImportedOutputs::Unknown ImportedOutputId: {}", id));
1614  }
1615 
1616  auto& importedTensorHandle = m_PreImportedOutputHandles[id].m_TensorHandle;
1617  if (!importedTensorHandle)
1618  {
1620  fmt::format("ClearImportedOutputs::ImportedOutput with id: {} has already been deleted", id));
1621  }
1622  // Call Unimport then destroy the tensorHandle
1623  importedTensorHandle->Unimport();
1624  importedTensorHandle = {};
1625  }
1626 }
1627 
1629  const OutputTensors& outputTensors,
1630  IWorkingMemHandle& iWorkingMemHandle,
1631  std::vector<ImportedInputId> preImportedInputs,
1632  std::vector<ImportedOutputId> preImportedOutputs)
1633 {
1634  const Graph& graph = m_OptimizedNetwork->pOptimizedNetworkImpl->GetGraph();
1635 
1636  if (inputTensors.size() + preImportedInputs.size() != graph.GetNumInputs())
1637  {
1638  if (preImportedInputs.empty())
1639  {
1640  throw InvalidArgumentException("LoadedNetwork::Execute: Number of inputs provided does not match network.");
1641  }
1642  else
1643  {
1644  throw InvalidArgumentException("LoadedNetwork::Execute: "
1645  "Number of inputs + preImportedInputs provided does not match network.");
1646  }
1647  }
1648 
1649  if (outputTensors.size() + preImportedOutputs.size() != graph.GetNumOutputs())
1650  {
1651  if (preImportedOutputs.empty())
1652  {
1653  throw InvalidArgumentException("LoadedNetwork::Execute: "
1654  "Number of outputs provided does not match network.");
1655  }
1656  else
1657  {
1658  throw InvalidArgumentException("LoadedNetwork::Execute: "
1659  "Number of outputs + preImportedOutputs provided does not match network.");
1660  }
1661  }
1662 
1663  WorkingMemHandle& workingMemHandle = dynamic_cast<WorkingMemHandle&>(iWorkingMemHandle);
1664  // Collect all the given LayerBindingIds and check them for duplicates and unknowns.
1665  std::vector<LayerBindingId>& bindingIds = workingMemHandle.GetBindingIdVector();
1666  unsigned int index = 0;
1667  for (auto pair : inputTensors)
1668  {
1669  bindingIds[index++] = pair.first;
1670  }
1671  for (ImportedInputId id : preImportedInputs)
1672  {
1673  bindingIds[index++] = ValidateImportedInputID(id);
1674  }
1675  for (auto pair : outputTensors)
1676  {
1677  bindingIds[index++] = pair.first;
1678  }
1679  for (ImportedOutputId id : preImportedOutputs)
1680  {
1681  bindingIds[index++] = ValidateImportedOutputID(id);
1682  }
1683 
1684  workingMemHandle.ValidateBindingIds();
1685 
1686  auto resetMemHandle = [&]()
1687  {
1688  for (ImportedInputId id: preImportedInputs)
1689  {
1690  const LayerBindingId layerBindingId = m_PreImportedInputHandles[id].m_LayerBindingId;
1691 
1692  auto inputHandle = workingMemHandle.GetInputHandle(layerBindingId);
1693  auto inputConnections = workingMemHandle.GetInputConnections(layerBindingId);
1694  for (auto it : inputConnections)
1695  {
1696  *it = inputHandle;
1697  }
1698  }
1699 
1700  for (ImportedOutputId id: preImportedOutputs)
1701  {
1702  const LayerBindingId layerBindingId = m_PreImportedOutputHandles[id].m_LayerBindingId;
1703 
1704  auto outputHandle = workingMemHandle.GetOutputHandle(layerBindingId);
1705  auto outputConnections = workingMemHandle.GetOutputConnection(layerBindingId);
1706 
1707  for (auto it : outputConnections)
1708  {
1709  *it = outputHandle;
1710  }
1711  }
1712  };
1713 
1714  std::unique_ptr<TimelineUtilityMethods> timelineUtils =
1715  TimelineUtilityMethods::GetTimelineUtils(*m_ProfilingService);
1716  ProfilingGuid inferenceGuid = m_ProfilingService->GetNextGuid();
1717  if (timelineUtils)
1718  {
1719  // Add inference timeline trace if profiling is enabled.
1720  ProfilingGuid networkGuid = m_OptimizedNetwork->GetGuid();
1721  timelineUtils->CreateTypedEntity(inferenceGuid,LabelsAndEventClasses::INFERENCE_GUID);
1722  timelineUtils->CreateRelationship(ProfilingRelationshipType::RetentionLink,
1723  networkGuid,
1724  inferenceGuid,
1725  LabelsAndEventClasses::EXECUTION_OF_GUID);
1726  timelineUtils->RecordEvent(inferenceGuid,LabelsAndEventClasses::ARMNN_PROFILING_SOL_EVENT_CLASS);
1727  }
1728 
1729  bool executionSucceeded = true;
1730 
1731  if (timelineUtils)
1732  {
1733  // Add end of life of the inference timeline if profiling is enabled.
1734  timelineUtils->RecordEvent(inferenceGuid,LabelsAndEventClasses::ARMNN_PROFILING_EOL_EVENT_CLASS);
1735  timelineUtils->Commit();
1736  }
1737 
1738  if (!workingMemHandle.IsAllocated())
1739  {
1740  workingMemHandle.Allocate();
1741  }
1742 
1743  {
1745  for (auto pair : inputTensors)
1746  {
1747  EnqueueInput(pair.second, workingMemHandle.GetInputHandle(pair.first));
1748  }
1749 
1750  // Swap in the pre-imported inputs if any
1751  for (ImportedInputId id : preImportedInputs)
1752  {
1753  const ImportedTensorHandlePin& importedInputPin = m_PreImportedInputHandles[id];
1754  const LayerBindingId layerBindingId = m_PreImportedInputHandles[id].m_LayerBindingId;
1755  const auto& preimportedHandle = importedInputPin.m_TensorHandle;
1756 
1757  auto inputConnections = workingMemHandle.GetInputConnections(layerBindingId);
1758  for (auto it : inputConnections)
1759  {
1760  *it = preimportedHandle.get();
1761  }
1762  }
1763  }
1764  {
1766  if (m_NetworkProperties.m_ExportEnabled)
1767  {
1768  for (auto pair: outputTensors)
1769  {
1770  ImportOutputTensor(pair.second, workingMemHandle.GetOutputHandle(pair.first));
1771  }
1772  }
1773 
1774  for (ImportedOutputId id : preImportedOutputs)
1775  {
1776  const ImportedTensorHandlePin& importedOutputPin = m_PreImportedOutputHandles[id];
1777  const LayerBindingId layerBindingId = m_PreImportedOutputHandles[id].m_LayerBindingId;
1778  const auto& preimportedHandle = importedOutputPin.m_TensorHandle;
1779 
1780  auto outputConnections = workingMemHandle.GetOutputConnection(layerBindingId);
1781 
1782  for (auto it : outputConnections)
1783  {
1784  *it = preimportedHandle.get();
1785  }
1786  }
1787  }
1788 
1789  auto Fail = [&](const std::exception& error)
1790  {
1791  ARMNN_LOG(error) << "An error occurred attempting to execute a workload: " << error.what();
1792  executionSucceeded = false;
1793  };
1794  ProfilingDynamicGuid workloadInferenceID(0);
1795 
1796  try
1797  {
1798  for (unsigned int i = 0; i < m_WorkloadQueue.size(); ++i)
1799  {
1800  auto& workload = m_WorkloadQueue[i];
1801  if (timelineUtils)
1802  {
1803  workloadInferenceID = timelineUtils->RecordWorkloadInferenceAndStartOfLifeEvent(workload->GetGuid(),
1804  inferenceGuid);
1805  }
1806  workload->ExecuteAsync(workingMemHandle.GetWorkingMemDescriptorAt(i));
1807 
1808  if (timelineUtils)
1809  {
1810  timelineUtils->RecordEndOfLifeEvent(workloadInferenceID);
1811  }
1812  }
1813  }
1814  catch (const RuntimeException& error)
1815  {
1816  resetMemHandle();
1817  Fail(error);
1818  }
1819  catch (const std::runtime_error& error)
1820  {
1821  resetMemHandle();
1822  Fail(error);
1823  }
1824  catch (...)
1825  {
1826  resetMemHandle();
1827  throw;
1828  }
1829 
1830  if (!m_NetworkProperties.m_ExportEnabled)
1831  {
1832  for (auto pair: outputTensors)
1833  {
1834  CopyToOutputTensor(pair.second, workingMemHandle.GetOutputHandle(pair.first));
1835  }
1836  }
1837  else
1838  {
1839  ARMNN_SCOPED_PROFILING_EVENT(Compute::Undefined, "SyncMemGeneric_Execute");
1840  workingMemHandle.MemSyncOutputs();
1841  }
1842 
1843  resetMemHandle();
1844 
1845  return executionSucceeded ? Status::Success : Status::Failure;
1846 }
1847 
1848 /// Create a new unique WorkingMemHandle object. Create multiple handles if you wish to have
1849 /// overlapped Execution by calling this function from different threads.
1850 std::unique_ptr<IWorkingMemHandle> LoadedNetwork::CreateWorkingMemHandle(NetworkId networkId)
1851 {
1852  Graph& order = m_OptimizedNetwork->pOptimizedNetworkImpl->GetGraph();
1853 
1854  // Tensors that will need to be allocated internally within armnn
1855  std::vector<std::unique_ptr<ITensorHandle>> managedTensorHandles;
1856  // Tensors that will be allocated externally by the user
1857  std::vector<std::unique_ptr<ITensorHandle>> unmanagedTensorHandles;
1858 
1859  std::vector<WorkingMemDescriptor> workingMemDescriptors;
1860  std::unordered_map<LayerGuid, WorkingMemDescriptor> workingMemDescriptorMap;
1861 
1862  auto GetTensorHandle = [&](Layer* layer, const OutputSlot& outputSlot)
1863  {
1864  ITensorHandleFactory::FactoryId factoryId = outputSlot.GetTensorHandleFactoryId();
1865  const TensorInfo& tensorInfo = outputSlot.GetTensorInfo();
1866 
1867  if (factoryId == ITensorHandleFactory::LegacyFactoryId)
1868  {
1869  BackendId id = layer->GetBackendId();
1871  return m_WorkloadFactories.at(id)->CreateTensorHandle(tensorInfo, false);
1873  }
1874  else
1875  {
1876  ITensorHandleFactory* handleFactory = m_TensorHandleFactoryRegistry.GetFactory(factoryId);
1877  ARMNN_ASSERT(handleFactory);
1878  return handleFactory->CreateTensorHandle(tensorInfo, false);
1879  }
1880  };
1881 
1882  struct HandleInfo
1883  {
1884  ITensorHandle* m_TensorHandle;
1885 
1886  bool m_IsInputLayerHandle = false;
1887  bool m_IsOutputLayerHandle = false;
1888 
1889  WorkingMemHandle::InputMemDescriptorCoords m_InputMemDescriptorCoords;
1890  WorkingMemHandle::OutputMemDescriptorCoords m_OutputMemDescriptorCoords;
1891  };
1892 
1893  std::unordered_map<const OutputSlot*, HandleInfo> outputToHandleInfoMap;
1894 
1895  unsigned int layerIndex = 0;
1896  for (auto&& layer : order)
1897  {
1898  // Constant layers execution and management is handled during loaded network construction
1899  if (layer->GetType() == LayerType::Constant)
1900  {
1901  continue;
1902  }
1903 
1904  WorkingMemDescriptor workingMemDescriptor;
1905 
1906  bool isMemoryManaged = true;
1907  bool isInputLayer = false;
1908  bool isOutputLayer = false;
1909  bool isConnectedToOutputLayer = false;
1910 
1911  if (layer->GetType() == LayerType::Input || layer->GetType() == LayerType::MemImport)
1912  {
1913  // Input layers/workloads will not be executed so the descriptor is not added to workingMemDescriptors
1914  // However we will still need to manage the tensorHandle
1915  isInputLayer = true;
1916  isMemoryManaged = !m_NetworkProperties.m_ImportEnabled;
1917  }
1918  else if (layer->GetType() == LayerType::Output)
1919  {
1920  isOutputLayer = true;
1921  }
1922 
1923  unsigned int slotIndex = 0;
1924  // Create a tensor handle for each output slot of a layer
1925  // Once we create it, we start managing its lifetime
1926  for (auto& slot : layer->GetOutputSlots())
1927  {
1928  for (unsigned int i = 0; i < slot.GetNumConnections(); ++i)
1929  {
1930  if ((slot.GetConnection(i)->GetOwningLayer().GetType() == LayerType::Output))
1931  {
1932  if (!isConnectedToOutputLayer)
1933  {
1934  isConnectedToOutputLayer = true;
1935  // If Export is enabled disable memory management, so we can export, otherwise we do a copy
1936  isMemoryManaged = !m_NetworkProperties.m_ExportEnabled;
1937  }
1938  else
1939  {
1940  // Importing in this case would likely cause unexpected behaviour, so we disallow it.
1941  ARMNN_LOG(warning) <<
1942  fmt::format("Layer name: '{0}' guid: '{1}' has two or more OutputLayers connected to it. "
1943  "This will prevent importing on the connected OutputLayers.",
1944  layer->GetName(), layer->GetGuid());
1945  isMemoryManaged = true;
1946  }
1947  }
1948  }
1949 
1950  ITensorHandle* tensorHandle;
1951  if (isMemoryManaged)
1952  {
1953  managedTensorHandles.emplace_back(GetTensorHandle(layer, slot));
1954  tensorHandle = managedTensorHandles.back().get();
1955  }
1956  else
1957  {
1958  unmanagedTensorHandles.emplace_back(GetTensorHandle(layer, slot));
1959  tensorHandle = unmanagedTensorHandles.back().get();
1960  }
1961 
1962  workingMemDescriptor.m_Outputs.push_back(tensorHandle);
1963 
1964  HandleInfo& handleInfo = outputToHandleInfoMap[&slot];
1965  handleInfo.m_TensorHandle = tensorHandle;
1966 
1967  // Store the coordinates of the current layer's OutputSlot that is connected to the OutputLayer
1968  if (isConnectedToOutputLayer)
1969  {
1970  handleInfo.m_IsOutputLayerHandle = true;
1971  handleInfo.m_OutputMemDescriptorCoords.m_OutputSlotCoords = {layerIndex, slotIndex};
1972  }
1973  // Store the LayerBindingId of the InputLayer
1974  if (isInputLayer)
1975  {
1976  handleInfo.m_IsInputLayerHandle = true;
1977  LayerBindingId bindingId = static_cast<BindableLayer*>(layer)->GetBindingId();
1978  handleInfo.m_InputMemDescriptorCoords.m_LayerBindingId = bindingId;
1979  }
1980  slotIndex++;
1981  }
1982  // Loop through the input slots in the same layer and decrement the reference counter associated
1983  // to each tensor handle we encounter.
1984  // Once it reaches zero, the lifetime of the tensor handle has ended, and we mark its memory as available
1985  // so that the next tensor handle with a non overlapping lifetime can share its memory.
1986  for (auto& slot : layer->GetInputSlots())
1987  {
1988  ARMNN_ASSERT(slot.GetConnection());
1989  auto outputSlot = slot.GetConnectedOutputSlot();
1990  auto key = outputSlot->GetOwningLayer().GetGuid();
1991 
1992  // Constant layers execution and management is handled during loaded network construction
1993  auto found = m_ConstantTensorHandles.find(key);
1994  if (found != m_ConstantTensorHandles.end())
1995  {
1996  ITensorHandle* tensorHandle = found->second;
1997  workingMemDescriptor.m_Inputs.push_back(tensorHandle);
1998 
1999  // Odd case where a constant layer is connected to an output layer
2000  // We will need to create a HandleInfo to track it
2001  if (isOutputLayer)
2002  {
2003  LayerBindingId bindingId = static_cast<BindableLayer*>(layer)->GetBindingId();
2004 
2005  HandleInfo& handleInfo = outputToHandleInfoMap[outputSlot];
2006  handleInfo.m_TensorHandle = tensorHandle;
2007  handleInfo.m_IsOutputLayerHandle = true;
2008  handleInfo.m_OutputMemDescriptorCoords.m_LayerBindingIds.push_back(bindingId);
2009  handleInfo.m_OutputMemDescriptorCoords.m_InputSlotCoords.push_back({layerIndex, 0});
2010  }
2011  continue;
2012  }
2013 
2014  HandleInfo& handleInfo = outputToHandleInfoMap.at(outputSlot);
2015 
2016  ITensorHandle* inputTensorHandle = handleInfo.m_TensorHandle;
2017  workingMemDescriptor.m_Inputs.push_back(inputTensorHandle);
2018 
2019  // Store the LayerBindingId of the OutputLayer
2020  if (isOutputLayer)
2021  {
2022  LayerBindingId bindingId = static_cast<BindableLayer*>(layer)->GetBindingId();
2023  handleInfo.m_OutputMemDescriptorCoords.m_LayerBindingIds.push_back(bindingId);
2024  handleInfo.m_OutputMemDescriptorCoords.m_InputSlotCoords.push_back({layerIndex, 0});
2025  }
2026  // In this case the layer is not an Output Layer but shares its input tensorhandle with an OutputLayer
2027  // It will need to be updated as well, if we swap out the tensorhandle
2028  else if (handleInfo.m_IsOutputLayerHandle)
2029  {
2030  handleInfo.m_OutputMemDescriptorCoords.m_InputSlotCoords.push_back({layerIndex, slot.GetSlotIndex()});
2031  }
2032 
2033  // Store the coordinates of the InputSlots connected to the InputLayer
2034  // There can be more than one InputSlot connected to an InputLayer, so we use a vector
2035  if (handleInfo.m_IsInputLayerHandle)
2036  {
2037  std::pair<LayerGuid, unsigned int> connectionLocation{layerIndex, slot.GetSlotIndex()};
2038  handleInfo.m_InputMemDescriptorCoords.m_InputSlotCoords.emplace_back(connectionLocation);
2039  }
2040  }
2041  workingMemDescriptorMap.insert({layer->GetGuid(), workingMemDescriptor});
2042 
2043  // Input/Output layers/workloads will not be executed, so the descriptor is not added to workingMemDescriptors
2044  // However we will still need to manage the tensorHandle
2045  if (!isInputLayer)
2046  {
2047  workingMemDescriptors.push_back(workingMemDescriptor);
2048  layerIndex++;
2049  }
2050  }
2051 
2052  std::vector<std::pair<std::shared_ptr<TensorMemory>, MemorySource>> tensorMemory;
2053 
2054  auto externalMemoryManager = CreateExternalMemoryManger(tensorMemory);
2055 
2056  // Sort m_TensorMemory, so it's order matches the outputSlot order
2057  std::sort(tensorMemory.begin(), tensorMemory.end(),
2058  [](const std::pair<std::shared_ptr<TensorMemory>, MemorySource>& lhs,
2059  const std::pair<std::shared_ptr<TensorMemory>, MemorySource>& rhs)
2060  {
2061  return lhs.first->m_OutputSlotId < rhs.first->m_OutputSlotId;
2062  });
2063 
2064  std::vector<WorkingMemHandle::InputMemDescriptorCoords> inputConnectionsInfo;
2065  std::vector<WorkingMemHandle::OutputMemDescriptorCoords> outputConnectionsInfo;
2066 
2067  for (const auto& handleInfo: outputToHandleInfoMap)
2068  {
2069  if (handleInfo.second.m_IsOutputLayerHandle)
2070  {
2071  outputConnectionsInfo.emplace_back(handleInfo.second.m_OutputMemDescriptorCoords);
2072  }
2073 
2074  if (handleInfo.second.m_IsInputLayerHandle)
2075  {
2076  inputConnectionsInfo.emplace_back(handleInfo.second.m_InputMemDescriptorCoords);
2077  }
2078  }
2079 
2080  return std::make_unique<WorkingMemHandle>(networkId,
2081  inputConnectionsInfo,
2082  outputConnectionsInfo,
2083  workingMemDescriptors,
2084  workingMemDescriptorMap,
2085  std::move(externalMemoryManager),
2086  std::move(tensorMemory),
2087  std::move(managedTensorHandles),
2088  std::move(unmanagedTensorHandles));
2089 }
2090 
2092 {
2093  for (auto&& workloadPtr: m_WorkloadQueue)
2094  {
2095  workloadPtr.get()->RegisterDebugCallback(func);
2096  }
2097 }
2098 
2099 
2100 void LoadedNetwork::CreateMemoryProfileAsync()
2101 {
2102  struct PartialBlock
2103  {
2104  unsigned int m_StartOfLife;
2105  unsigned int m_Lifetime;
2106 
2107  size_t m_MemSize;
2108  unsigned int m_Index;
2109 
2110  BackendId m_BackendId;
2111  };
2112 
2113  auto align = [](size_t numToAlign)
2114  {
2115  const size_t alignment = sizeof(float);
2116  return ((numToAlign + alignment - 1) / alignment) * alignment;
2117  };
2118 
2119  std::unordered_map<const OutputSlot*, PartialBlock> memBlockTrackerMap;
2120 
2121  const bool inputImportingEnabled = m_NetworkProperties.m_InputSource != MemorySource::Undefined;
2122  const bool outputImportingEnabled = m_NetworkProperties.m_OutputSource != MemorySource::Undefined;
2123 
2124  unsigned int timestep = 0;
2125  unsigned int outputIndex = 0;
2126  Graph& order = m_OptimizedNetwork->pOptimizedNetworkImpl->GetGraph().TopologicalSort();
2127 
2128  for (auto&& layer : order)
2129  {
2130  const LayerType& layerType = layer->GetType();
2131  // Don't manage memory if importing.
2132  if (layerType == LayerType::Input && inputImportingEnabled)
2133  {
2134  continue;
2135  }
2136  // Don't manage memory if importing.
2137  if (layerType == LayerType::Output && outputImportingEnabled
2138  && layer->GetInputSlot(0).GetConnectedOutputSlot()->GetNumConnections() == 1)
2139  {
2140  continue;
2141  }
2142  // Because Constant Layer memory can not be shared, the memory must persist for the lifetime of execution,
2143  // management is done separately.
2144  if (layerType == LayerType::Constant)
2145  {
2146  continue;
2147  }
2148 
2149  BackendId backendId = layer->GetBackendId();
2150  for (auto& outputSlot : layer->GetOutputSlots())
2151  {
2152  if (!m_SupportsExternallyManagedMemory[backendId])
2153  {
2154  continue;
2155  }
2156 
2157  PartialBlock partialBlock;
2158 
2159  partialBlock.m_StartOfLife = timestep;
2160 
2161  size_t alignedSize = align(outputSlot.GetOutputHandler().GetTensorInfo().GetNumBytes());
2162  partialBlock.m_MemSize = alignedSize;
2163  partialBlock.m_Index = outputIndex++;
2164  partialBlock.m_Lifetime = outputSlot.GetNumConnections();
2165  partialBlock.m_BackendId = backendId;
2166 
2167  if (partialBlock.m_Lifetime == 0)
2168  {
2169  m_MemBlockMap[partialBlock.m_BackendId].emplace_back(partialBlock.m_StartOfLife,
2170  partialBlock.m_StartOfLife,
2171  partialBlock.m_MemSize,
2172  0,
2173  partialBlock.m_Index);
2174  }
2175  else
2176  {
2177  memBlockTrackerMap[&outputSlot] = partialBlock;
2178  }
2179  }
2180 
2181  for (auto& inputSlot : layer->GetInputSlots())
2182  {
2183  const Layer& connectedInputLayer = inputSlot.GetConnectedOutputSlot()->GetOwningLayer();
2184  const LayerType& owningLayerType = connectedInputLayer.GetType();
2185 
2186  if (owningLayerType == LayerType::Constant)
2187  {
2188  continue;
2189  }
2190  if (inputImportingEnabled && owningLayerType == LayerType::Input)
2191  {
2192  continue;
2193  }
2194 
2195  auto outputSlot = inputSlot.GetConnectedOutputSlot();
2196 
2197  PartialBlock& partialBlock = memBlockTrackerMap.at(outputSlot);
2198 
2199  auto& lifetime = partialBlock.m_Lifetime;
2200  --lifetime;
2201 
2202  if (lifetime == 0)
2203  {
2204  m_MemBlockMap[partialBlock.m_BackendId].emplace_back(partialBlock.m_StartOfLife,
2205  timestep,
2206  partialBlock.m_MemSize,
2207  0,
2208  partialBlock.m_Index);
2209  }
2210  }
2211  ++timestep;
2212  }
2213 }
2214 
2215 void LoadedNetwork::CreateMemoryProfile()
2216 {
2217  // Finds the first TensorHandle ancestor of a SubTensorHandle. If the ITensorHandle provided
2218  // is a TensorHandle, the function just returns it
2219  auto TraceSubTensorHandleAncestry = [](ITensorHandle* const subTensorHandle)
2220  {
2221  ITensorHandle* ancestor = subTensorHandle;
2222  while (ancestor && ancestor->GetParent())
2223  {
2224  ancestor = ancestor->GetParent();
2225  }
2226  return ancestor;
2227  };
2228 
2229  struct PartialBlock
2230  {
2231  unsigned int m_StartOfLife;
2232  unsigned int m_Lifetime;
2233 
2234  size_t m_MemSize;
2235  unsigned int m_Index;
2236 
2237  BackendId m_BackendId;
2238  };
2239 
2240  auto align = [](size_t numToAlign)
2241  {
2242  const size_t alignment = sizeof(float);
2243  return ((numToAlign + alignment - 1) / alignment) * alignment;
2244  };
2245 
2246  std::unordered_map<ITensorHandle*, PartialBlock> memBlockTrackerMap;
2247 
2248  const bool inputImportingEnabled = m_NetworkProperties.m_InputSource != MemorySource::Undefined;
2249  const bool outputImportingEnabled = m_NetworkProperties.m_OutputSource != MemorySource::Undefined;
2250 
2251  unsigned int timestep = 0;
2252  unsigned int outputIndex = 0;
2253  Graph& order = m_OptimizedNetwork->pOptimizedNetworkImpl->GetGraph().TopologicalSort();
2254 
2255  for (auto&& layer : order)
2256  {
2257  const LayerType& layerType = layer->GetType();
2258  // Don't manage memory if importing.
2259  if (layerType == LayerType::Input && inputImportingEnabled)
2260  {
2261  continue;
2262  }
2263  // Don't manage memory if importing.
2264  if (layerType == LayerType::Output && outputImportingEnabled
2265  && layer->GetInputSlot(0).GetConnectedOutputSlot()->GetNumConnections() == 1)
2266  {
2267  continue;
2268  }
2269  // Because Constant Layer memory can not be shared, the memory must persist for the lifetime of execution,
2270  // management is done separately.
2271  if (layerType == LayerType::Constant)
2272  {
2273  continue;
2274  }
2275 
2276  BackendId backendId = layer->GetBackendId();
2277  for (auto& outputSlot : layer->GetOutputSlots())
2278  {
2279  if (!m_SupportsExternallyManagedMemory[backendId])
2280  {
2281  continue;
2282  }
2283 
2284  ITensorHandle* tensorHandle = outputSlot.GetOutputHandler().GetData();
2285  tensorHandle = TraceSubTensorHandleAncestry(tensorHandle);
2286 
2287  if (memBlockTrackerMap.find(tensorHandle) == memBlockTrackerMap.end())
2288  {
2289  PartialBlock partialBlock;
2290 
2291  partialBlock.m_StartOfLife = timestep;
2292 
2293  size_t alignedSize = align(outputSlot.GetOutputHandler().GetTensorInfo().GetNumBytes());
2294  partialBlock.m_MemSize = alignedSize;
2295  partialBlock.m_Index = outputIndex++;
2296  partialBlock.m_Lifetime = outputSlot.GetNumConnections();
2297  partialBlock.m_BackendId = backendId;
2298 
2299  if (partialBlock.m_Lifetime == 0)
2300  {
2301  m_MemBlockMap[partialBlock.m_BackendId].emplace_back(partialBlock.m_StartOfLife,
2302  partialBlock.m_StartOfLife,
2303  partialBlock.m_MemSize,
2304  0,
2305  partialBlock.m_Index);
2306  }
2307  else
2308  {
2309  memBlockTrackerMap[tensorHandle] = partialBlock;
2310  }
2311  m_Tensorhandles.push_back(tensorHandle);
2312 
2313  }
2314  else
2315  {
2316  memBlockTrackerMap.at(tensorHandle).m_Lifetime += outputSlot.GetNumConnections();
2317  }
2318  }
2319 
2320  for (auto& inputSlot : layer->GetInputSlots())
2321  {
2322  const Layer& connectedInputLayer = inputSlot.GetConnectedOutputSlot()->GetOwningLayer();
2323  const LayerType& owningLayerType = connectedInputLayer.GetType();
2324 
2325  if (owningLayerType == LayerType::Constant)
2326  {
2327  continue;
2328  }
2329  if (inputImportingEnabled && owningLayerType == LayerType::Input)
2330  {
2331  continue;
2332  }
2333  if (!m_SupportsExternallyManagedMemory[connectedInputLayer.GetBackendId()])
2334  {
2335  continue;
2336  }
2337 
2338  auto outputSlot = inputSlot.GetConnectedOutputSlot();
2339 
2340  ITensorHandle* tensorHandle = outputSlot->GetOutputHandler().GetData();
2341  tensorHandle = TraceSubTensorHandleAncestry(tensorHandle);
2342 
2343  PartialBlock& partialBlock = memBlockTrackerMap.at(tensorHandle);
2344 
2345  auto& lifetime = partialBlock.m_Lifetime;
2346  --lifetime;
2347 
2348  if (lifetime == 0)
2349  {
2350  m_MemBlockMap[partialBlock.m_BackendId].emplace_back(partialBlock.m_StartOfLife,
2351  timestep,
2352  partialBlock.m_MemSize,
2353  0,
2354  partialBlock.m_Index);
2355  }
2356  }
2357  ++timestep;
2358  }
2359 
2360 }
2361 
2362 std::unique_ptr<MemoryManager> LoadedNetwork::CreateExternalMemoryManger(
2363  std::vector<std::pair<std::shared_ptr<TensorMemory>, MemorySource>>& tensorMemoryVec)
2364 {
2365  std::unique_ptr<MemoryManager> memoryManager = std::make_unique<MemoryManager>();
2366  auto allocatorMap = BackendRegistryInstance().GetAllocators();
2367 
2368  for (auto& backend : m_MemBinMap)
2369  {
2370  std::vector<BufferStorage> bufferStorageVec;
2371 
2372  std::shared_ptr<ICustomAllocator> backendAllocator;
2373  if (allocatorMap.find(backend.first) != allocatorMap.end())
2374  {
2375  backendAllocator = allocatorMap[backend.first];
2376  }
2377  else
2378  {
2379  backendAllocator = m_Backends[backend.first]->GetDefaultAllocator();
2380  }
2381 
2382  for (auto& memBin : backend.second)
2383  {
2384  BufferStorage bufferStorage;
2385  bufferStorage.m_BufferSize = memBin.m_MemSize;
2386  bufferStorage.m_TensorMemoryVector.reserve(memBin.m_MemBlocks.size());
2387 
2388  for (auto& memBlock : memBin.m_MemBlocks)
2389  {
2390  auto tensorMemory = std::make_shared<TensorMemory>(TensorMemory{memBlock.m_Offset, memBlock.m_Index});
2391 
2392  tensorMemoryVec.emplace_back(tensorMemory, backendAllocator->GetMemorySourceType());
2393  bufferStorage.m_TensorMemoryVector.emplace_back(tensorMemory);
2394  }
2395 
2396  bufferStorageVec.emplace_back(std::move(bufferStorage));
2397  }
2398 
2399  memoryManager->StoreMemToAllocate(bufferStorageVec, backendAllocator, 4);
2400  }
2401 
2402  return memoryManager;
2403 }
2404 
2405 LayerBindingId LoadedNetwork::ValidateImportedInputID(ImportedInputId id)
2406 {
2407  try
2408  {
2409  const auto& importedTensorHandlePin = m_PreImportedInputHandles.at(id);
2410  if (!importedTensorHandlePin.m_TensorHandle)
2411  {
2412  throw InvalidArgumentException(fmt::format("LoadedNetwork::Execute:"
2413  "PreImportedInput: {} has been deleted", id));
2414  }
2415  return importedTensorHandlePin.m_LayerBindingId;
2416  }
2417  catch (const std::out_of_range&)
2418  {
2419  throw InvalidArgumentException(fmt::format("LoadedNetwork::Execute: Unknown ImportedInputId: {}", id));
2420  }
2421 }
2422 
2423 LayerBindingId LoadedNetwork::ValidateImportedOutputID(ImportedOutputId id)
2424 {
2425  try
2426  {
2427  const auto& importedTensorHandlePin = m_PreImportedOutputHandles.at(id);
2428  if (!importedTensorHandlePin.m_TensorHandle)
2429  {
2430  throw InvalidArgumentException(fmt::format("LoadedNetwork::Execute: "
2431  "PreImportedOutput: {} has been deleted", id));
2432  }
2433  return importedTensorHandlePin.m_LayerBindingId;
2434  }
2435  catch (const std::out_of_range&)
2436  {
2437  throw InvalidArgumentException(fmt::format("LoadedNetwork::Execute: Unknown ImportedOutputId: {}", id));
2438  }
2439 }
2440 
2441 }
Status Execute(const InputTensors &inputTensors, const OutputTensors &outputTensors, IWorkingMemHandle &workingMemHandle, std::vector< ImportedInputId > preImportedInputs={}, std::vector< ImportedOutputId > preImportedOutputs={})
Thread safe execution of the loaded network.
std::vector< std::shared_ptr< TensorMemory > > m_TensorMemoryVector
Vector of pointer to .
std::unique_ptr< IWorkingMemHandle > CreateWorkingMemHandle(NetworkId networkId)
Create a new unique WorkingMemHandle object.
bool HasCapability(const std::string &name, const BackendCapabilities &capabilities)
Convenience function to check if a capability exists in a BackendCapabilites struct.
virtual bool Import(void *memory, MemorySource source)
Import externally allocated memory.
FactoryFunction GetFactory(const BackendId &id) const
ConstIteratorInputs begin() const
Definition: Graph.hpp:65
std::unique_ptr< IWorkloadFactory > IWorkloadFactoryPtr
unsigned int GetNumInputSlots() const override
Returns the number of connectable input slots.
Definition: Layer.hpp:319
static ProfilerManager & GetInstance()
Definition: Profiling.cpp:572
#define ARMNN_NO_DEPRECATE_WARN_BEGIN
Definition: Deprecated.hpp:33
virtual IMemoryManagerUniquePtr CreateMemoryManager() const
LayerBindingId GetBindingId() const
Definition: Layer.hpp:463
virtual unsigned int GetImportFlags() const
Get flags describing supported import sources.
const armnn::Tensor GetOutputTensor(const LayerBindingId layerId, const OutputTensors &outputTensors)
MemoryOptimizerStrategiesMapRef GetMemoryOptimizerStrategies()
unsigned int ImportedOutputId
Definition: Types.hpp:292
WorkingMemDescriptor & GetWorkingMemDescriptorAt(unsigned int id) override
Get the WorkingMemDescriptor at an index.
size_t m_Offset
Number of bytes the value is away from the .m_Buffer.
virtual void Allocate()=0
Indicate to the memory manager that this resource is no longer active.
TensorInfo GetInputTensorInfo(LayerBindingId layerId) const
#define ARMNN_LOG(severity)
Definition: Logging.hpp:205
size_t m_BufferSize
Total size of the buffer.
ITensorHandle * GetOutputHandle(LayerBindingId layerBindingId) const
BackendRegistry & BackendRegistryInstance()
std::vector< std::pair< LayerBindingId, class ConstTensor > > InputTensors
Definition: Tensor.hpp:392
const ProfilingDetailsMethod m_OutputNetworkDetailsMethod
Definition: IRuntime.hpp:62
unsigned int MemorySourceFlags
MemoryType GetMemoryArea() const
Definition: Tensor.hpp:305
size_t GetNumOutputs() const
Definition: Graph.hpp:188
void CopyToOutputTensor(const Tensor &outputTensor, ITensorHandle *outputTensorHandle)
TensorInfo GetOutputTensorInfo(LayerBindingId layerId) const
Copyright (c) 2021 ARM Limited and Contributors.
void IgnoreUnused(Ts &&...)
const std::vector< InputSlot > & GetInputSlots() const
Definition: Layer.hpp:243
std::vector< ImportedInputId > ImportInputs(const InputTensors &inputTensors, MemorySource forceImportMemorySource=MemorySource::Undefined)
std::function< void(LayerGuid guid, unsigned int slotIndex, ITensorHandle *tensorHandle)> DebugCallbackFunction
Define the type of callback for the Debug layer to call.
Definition: Types.hpp:379
unsigned int GetNumOutputSlots() const override
Returns the number of connectable output slots.
Definition: Layer.hpp:320
int LayerBindingId
Type of identifiers for bindable layers (inputs, outputs).
Definition: Types.hpp:290
#define ARMNN_SCOPED_PROFILING_EVENT(backendId, name)
Definition: Profiling.hpp:220
virtual const BackendId & GetId() const =0
ConstIteratorOutputs begin() const
Definition: Graph.hpp:84
A tensor defined by a TensorInfo (shape and data type) and a mutable backing store.
Definition: Tensor.hpp:319
virtual IWorkloadFactoryPtr CreateWorkloadFactory(const IMemoryManagerSharedPtr &memoryManager=nullptr) const =0
unsigned int GetNumConnections() const override
Definition: Layer.hpp:143
const InputSlot & GetInputSlot(unsigned int index) const override
Get a const input slot handle by slot index.
Definition: Layer.hpp:322
std::vector< TensorInfo > m_InputTensorInfos
const std::vector< std::vector< ITensorHandle * >::iterator > & GetOutputConnection(LayerBindingId layerBindingId) const
#define ARMNN_NO_DEPRECATE_WARN_END
Definition: Deprecated.hpp:34
#define ARMNN_ASSERT_MSG(COND, MSG)
Definition: Assert.hpp:15
bool SupportsTensorAllocatorAPI() const
#define ARMNN_SCOPED_HEAP_PROFILING(TAG)
int NetworkId
Definition: IRuntime.hpp:27
A tensor defined by a TensorInfo (shape and data type) and an immutable backing store.
Definition: Tensor.hpp:327
virtual ITensorHandle * GetParent() const =0
Get the parent tensor if this is a subtensor.
std::vector< std::pair< LayerBindingId, class Tensor > > OutputTensors
Definition: Tensor.hpp:393
const std::string & GetNameStr() const
Definition: Layer.hpp:225
LayerType GetType() const override
Returns the armnn::LayerType of this layer.
Definition: Layer.hpp:271
Status
enumeration
Definition: Types.hpp:42
void SendNetworkStructure(arm::pipe::IProfilingService &profilingService)
const std::vector< std::vector< ITensorHandle * >::iterator > & GetInputConnections(LayerBindingId layerBindingId) const
#define ARMNN_ASSERT(COND)
Definition: Assert.hpp:14
const OutputSlot * GetConnectedOutputSlot() const
Definition: Layer.hpp:56
void ClearImportedInputs(const std::vector< ImportedInputId > inputIds)
std::vector< TensorInfo > m_OutputTensorInfos
static bool IsLayerSupported(const BackendId &backendId, const IConnectableLayer &layer, Optional< DataType > dataType, std::string &outReasonIfUnsupported)
ITensorHandle * GetData() const
Gets the allocated tensor memory.
std::vector< std::unique_ptr< IWorkload > > WorkloadQueue
const TensorInfo & GetInfo() const
Definition: Tensor.hpp:295
#define CHECK_LOCATION()
Definition: Exceptions.hpp:203
const BackendId & GetBackendId() const
Definition: Layer.hpp:275
void Allocate() override
Allocate the backing memory required for execution.
arm::pipe::ProfilingGuid GetNetworkGuid()
const std::vector< OutputSlot > & GetOutputSlots() const
Definition: Layer.hpp:244
virtual bool CanBeImported(void *memory, MemorySource source)
Implementations must determine if this memory block can be imported.
OutputLayersAccessor GetOutputLayers() const
Returns a wrapper object with begin(), end() methods to iterate over the output layers in a range-bas...
Definition: Graph.hpp:196
unsigned int ImportedInputId
Definition: Types.hpp:291
Status EnqueueWorkload(const InputTensors &inputTensors, const OutputTensors &outputTensors, std::vector< ImportedInputId > preImportedInputIds={}, std::vector< ImportedOutputId > preImportedOutputIds={})
Single thread execution of the loaded network.
void RegisterProfiler(IProfiler *profiler)
Definition: Profiling.cpp:579
virtual const void * Map(bool blocking=true) const =0
Map the tensor data for access.
std::vector< LayerBindingId > & GetBindingIdVector()
std::unordered_map< BackendId, std::shared_ptr< ICustomAllocator > > GetAllocators()
virtual BackendCapabilities GetCapabilities() const
Returns a BackendCapability if the backend lists the capability The BackendCapability must then be in...
virtual void Unmap() const =0
Unmap the tensor data.
bool IsAllocated() override
IsAllocated returns true if the backing memory is currently allocated.
std::vector< ITensorHandle * > m_Outputs
Base class for all ArmNN exceptions so that users can filter to just those.
Definition: Exceptions.hpp:46
const OutputHandler & GetOutputHandler(unsigned int i=0) const
Definition: Layer.hpp:230
std::vector< ImportedOutputId > ImportOutputs(const OutputTensors &outputTensors, MemorySource forceImportMemorySource=MemorySource::Undefined)
MemorySource
Define the Memory Source to reduce copies.
Definition: Types.hpp:230
const std::string & Get() const
Definition: BackendId.hpp:138
void RegisterDebugCallback(const DebugCallbackFunction &func)
ConstIteratorOutputs end() const
Definition: Graph.hpp:90
std::enable_if_t< std::is_unsigned< Source >::value &&std::is_unsigned< Dest >::value, Dest > numeric_cast(Source source)
Definition: NumericCast.hpp:35
Contains information about TensorInfos of a layer.
const char * GetName() const override
Returns the name of the layer.
Definition: Layer.hpp:317
ITensorHandleFactory::FactoryId GetTensorHandleFactoryId() const
Definition: Layer.cpp:205
bool CheckFlag(MemorySourceFlags flags, MemorySource source)
void CopyTensorContentsGeneric(const ITensorHandle *srcTensor, ITensorHandle *dstTensor, CopyFunc copy)
Graph & TopologicalSort()
Sorts layers in topological order and return this.
Definition: Graph.hpp:184
InputLayersAccessor GetInputLayers() const
Returns a wrapper object with begin(), end() methods to iterate over the input layers in a range-base...
Definition: Graph.hpp:192
std::vector< ITensorHandle * > m_Inputs
size_t GetNumLayers() const
Definition: Graph.hpp:198
ConstIteratorInputs end() const
Definition: Graph.hpp:70
const armnn::ConstTensor GetInputTensor(const LayerBindingId layerId, const InputTensors &inputTensors)
const TensorInfo & GetTensorInfo(const ITensorHandle *tensorHandle)
float32 helpers
const TensorInfo & GetTensorInfo() const override
Definition: Layer.cpp:92
static std::unique_ptr< LoadedNetwork > MakeLoadedNetwork(std::unique_ptr< IOptimizedNetwork > net, std::string &errorMessage, const INetworkProperties &networkProperties, arm::pipe::IProfilingService *profilingService)
ITensorHandle * GetInputHandle(LayerBindingId layerBindingId) const
size_t GetNumInputs() const
Definition: Graph.hpp:187
virtual std::unique_ptr< ITensorHandle > CreateTensorHandle(const TensorInfo &tensorInfo) const =0
static const FactoryId LegacyFactoryId
const bool m_ProfilingEnabled
Definition: IRuntime.hpp:60
void ClearImportedOutputs(const std::vector< ImportedOutputId > outputIds)
LayerType
When adding a new layer, adapt also the LastLayer enum value in the enum class LayerType below...
Definition: Types.hpp:467
LayerGuid GetGuid() const final
Returns the unique id of the layer.
Definition: Layer.hpp:328