ArmNN
 22.02
LoadedNetwork.cpp
Go to the documentation of this file.
1 //
2 // Copyright © 2017 Arm Ltd and Contributors. All rights reserved.
3 // SPDX-License-Identifier: MIT
4 //
5 
6 #include "LoadedNetwork.hpp"
7 #include "Layer.hpp"
8 #include "Graph.hpp"
9 #include <Processes.hpp>
10 #include "Profiling.hpp"
11 #include "HeapProfiling.hpp"
12 #include "WorkingMemHandle.hpp"
13 
15 #include <armnn/Logging.hpp>
16 #include <armnn/utility/Assert.hpp>
17 
22 #include <armnn/BackendHelper.hpp>
23 
24 #include <fmt/format.h>
25 
26 namespace armnn
27 {
28 
29 using namespace std;
30 using namespace armnn::profiling;
31 
32 namespace
33 {
34 
35 template <typename ExceptionType>
36 std::string ToErrorMessage(const char * prefix, const ExceptionType & error)
37 {
38  std::stringstream ss;
39  ss << prefix << " " << error.what();
40  return ss.str();
41 }
42 
43 void AddLayerStructure(std::unique_ptr<TimelineUtilityMethods>& timelineUtils,
44  const Layer& layer,
45  ProfilingGuid networkGuid)
46 {
47  // Add layer to the post-optimisation network structure
48  std::string layerName = layer.GetNameStr().empty() ? "<Unnamed>" : layer.GetNameStr();
49  timelineUtils->CreateNamedTypedChildEntity(layer.GetGuid(),
50  networkGuid,
51  layerName,
52  LabelsAndEventClasses::LAYER_GUID);
53  for (auto&& input : layer.GetInputSlots())
54  {
55  const IOutputSlot* source = input.GetConnectedOutputSlot();
56  ARMNN_ASSERT(source != NULL);
57  timelineUtils->CreateConnectionRelationship(ProfilingRelationshipType::RetentionLink,
58  source->GetOwningLayerGuid(),
59  layer.GetGuid());
60  }
61 }
62 
63 void AddWorkloadStructure(std::unique_ptr<TimelineUtilityMethods>& timelineUtils,
64  std::unique_ptr<IWorkload>& workload,
65  const Layer& layer)
66 {
67  // Add workload to the post-optimisation network structure
68  timelineUtils->CreateTypedEntity(workload->GetGuid(), LabelsAndEventClasses::WORKLOAD_GUID);
69  timelineUtils->MarkEntityWithLabel(workload->GetGuid(),
70  layer.GetBackendId().Get(),
71  LabelsAndEventClasses::BACKENDID_GUID);
72 
73  // Link the workload to the layer
74  timelineUtils->CreateRelationship(ProfilingRelationshipType::RetentionLink,
75  layer.GetGuid(),
76  workload->GetGuid(),
77  LabelsAndEventClasses::CHILD_GUID);
78 }
79 
80 } // anonymous
81 
82 std::unique_ptr<LoadedNetwork> LoadedNetwork::MakeLoadedNetwork(std::unique_ptr<IOptimizedNetwork> net,
83  std::string& errorMessage,
84  const INetworkProperties& networkProperties,
85  profiling::ProfilingService& profilingService)
86 {
87  std::unique_ptr<LoadedNetwork> loadedNetwork;
88 
89  auto Fail = [&](const std::exception& error) -> std::unique_ptr<LoadedNetwork>
90  {
91  errorMessage = ToErrorMessage("An error occurred when preparing the network workloads: ", error);
92  ARMNN_LOG(error) << errorMessage;
93 
94  return std::unique_ptr<LoadedNetwork>();
95  };
96 
97  try
98  {
99  loadedNetwork.reset(new LoadedNetwork(std::move(net), networkProperties, profilingService));
100  }
101  catch (const armnn::RuntimeException& error)
102  {
103  return Fail(error);
104  }
105  catch (const armnn::Exception& error)
106  {
107  return Fail(error);
108  }
109  catch (const std::runtime_error& error)
110  {
111  return Fail(error);
112  }
113 
114  return loadedNetwork;
115 }
116 
117 LoadedNetwork::LoadedNetwork(std::unique_ptr<IOptimizedNetwork> net,
118  const INetworkProperties& networkProperties,
119  profiling::ProfilingService& profilingService) :
120  m_OptimizedNetwork(std::move(net)),
121  m_NetworkProperties(networkProperties),
122  m_TensorHandleFactoryRegistry(),
123  m_ProfilingService(profilingService)
124 {
126  // Get the profiler and register it for the current thread.
127  const std::shared_ptr<IProfiler>& profiler = m_OptimizedNetwork->GetProfiler();
129 
130  profiler->EnableProfiling(networkProperties.m_ProfilingEnabled);
131 
132  profiler->EnableNetworkDetailsToStdOut(networkProperties.m_OutputNetworkDetailsMethod);
133 
134  //First create tensor handlers, backends and workload factories.
135  //Handlers are created before workloads are.
136  //Because workload creation can modify some of the handlers,
137  //(for example the splitter and concat layers).
138 
139  bool useExternalMemoryManager = false;
140  bool useInternalMemoryManager = false;
141  Graph& order = m_OptimizedNetwork->pOptimizedNetworkImpl->GetGraph().TopologicalSort();
142 
143  if (!networkProperties.m_AsyncEnabled)
144  {
145  m_IsInputImported = std::vector<bool>(order.GetNumInputs(), false);
146  m_IsOutputImported = std::vector<bool>(order.GetNumOutputs(), false);
147  }
148 
149  for (auto&& layer : order)
150  {
151  auto const& backendId = layer->GetBackendId();
152  if (m_Backends.count(backendId) == 0)
153  {
154  auto createBackend = BackendRegistryInstance().GetFactory(backendId);
155  auto it = m_Backends.emplace(std::make_pair(backendId, createBackend()));
156 
157  IBackendInternal* backend = it.first->second.get();
158 
159  if (networkProperties.m_AsyncEnabled &&
160  !HasCapability(BackendOptions::BackendOption{"AsyncExecution", true}, backend->GetCapabilities()))
161  {
162  std::string er = backend->GetId();
163  er += " does not support AsyncExecution";
164  throw BackendCapabilityException(er);
165  }
166 
167  if (networkProperties.m_AsyncEnabled &&
168  !HasCapability(BackendOptions::BackendOption{"ExternallyManagedMemory", true},
169  backend->GetCapabilities()))
170  {
171  std::string er = backend->GetId();
172  er += " does not support ExternallyManagedMemory\n";
173  er += "AsyncEnabled networks require all backends to support ExternallyManagedMemory";
174  throw BackendCapabilityException(er);
175  }
176 
177  if (HasCapability(BackendOptions::BackendOption{"ExternallyManagedMemory", true},backend->GetCapabilities())
178  && (m_NetworkProperties.m_ExternalMemoryManagementEnabled || m_NetworkProperties.m_AsyncEnabled))
179  {
180  m_SupportsExternallyManagedMemory[backend->GetId()] = true;
181  useExternalMemoryManager = true;
182  }
183  else
184  {
185  m_SupportsExternallyManagedMemory[backend->GetId()] = false;
186  useInternalMemoryManager = true;
187  }
188 
190  if (backend->SupportsTensorAllocatorAPI())
191  {
192  workloadFactory = backend->CreateWorkloadFactory(
193  m_TensorHandleFactoryRegistry,
194  m_OptimizedNetwork->pOptimizedNetworkImpl->GetModelOptions(),
195  static_cast<MemorySourceFlags>(m_NetworkProperties.m_InputSource),
196  static_cast<MemorySourceFlags>(m_NetworkProperties.m_OutputSource));
197  }
198  else
199  {
200  m_BackendMemoryMangers.emplace_back(backend->CreateMemoryManager());
201  workloadFactory = backend->CreateWorkloadFactory(
202  m_BackendMemoryMangers.back(), m_OptimizedNetwork->pOptimizedNetworkImpl->GetModelOptions());
203  }
204  m_WorkloadFactories[backendId ] = std::move(workloadFactory);
205  }
206  }
207 
208  if (!networkProperties.m_AsyncEnabled)
209  {
210  for (auto&& layer : order)
211  {
212  auto& workloadFactory = GetWorkloadFactory(*layer);
213  bool supportsExternalManager = m_SupportsExternallyManagedMemory[layer->GetBackendId()];
214 
215  switch (layer->GetType())
216  {
217  case LayerType::Input:
219  {
220  // If IsImportEnabled is true then we need to set IsMemoryManaged
221  // to false when creating TensorHandles
222  layer->CreateTensorHandles(m_TensorHandleFactoryRegistry,
223  workloadFactory,
224  !supportsExternalManager && !m_NetworkProperties.m_ImportEnabled);
225  break;
226  }
227  case LayerType::Constant:
228  {
229  layer->CreateTensorHandles(m_TensorHandleFactoryRegistry, workloadFactory, true);
230  break;
231  }
232  default:
233  {
234  // Look for a layer with 1 OutputSlot which has 1 connection and that connection is an Output Layer
235  // If Export is enabled disable memory management so we can export, otherwise we do a copy
236  if ((layer->GetNumOutputSlots() == 1) &&
237  (layer->GetOutputSlots()[0].GetNumConnections() == 1) &&
238  (layer->GetOutputSlots()[0].GetConnection(0)->GetOwningLayer().GetType() == LayerType::Output))
239  {
240  layer->CreateTensorHandles(m_TensorHandleFactoryRegistry,
241  workloadFactory,
242  !supportsExternalManager && !m_NetworkProperties.m_ExportEnabled);
243  }
244  else
245  {
246  layer->CreateTensorHandles(m_TensorHandleFactoryRegistry,
247  workloadFactory,
248  !supportsExternalManager);
249  }
250  }
251  }
252  }
253  }
254 
255  ProfilingGuid networkGuid = m_OptimizedNetwork->GetGuid();
256  std::unique_ptr<TimelineUtilityMethods> timelineUtils =
257  TimelineUtilityMethods::GetTimelineUtils(m_ProfilingService);
258  if (timelineUtils)
259  {
260  timelineUtils->CreateTypedEntity(networkGuid, LabelsAndEventClasses::NETWORK_GUID);
261  // Mark the network with a start of life event
262  timelineUtils->RecordEvent(networkGuid, LabelsAndEventClasses::ARMNN_PROFILING_SOL_EVENT_CLASS);
263  // and with the process ID
264  int processID = armnnUtils::Processes::GetCurrentId();
265  std::stringstream ss;
266  ss << processID;
267  timelineUtils->MarkEntityWithLabel(networkGuid, ss.str(), LabelsAndEventClasses::PROCESS_ID_GUID);
268  }
269 
270  //Then create workloads.
271  {
272  ARMNN_SCOPED_PROFILING_EVENT(Compute::Undefined, "LoadNetwork_CreateWorkloads");
273  for (auto&& layer: order)
274  {
275  if (timelineUtils)
276  {
277  // Add layer to the post-optimisation network structure
278  AddLayerStructure(timelineUtils, *layer, networkGuid);
279  }
280 
281  const IWorkloadFactory& workloadFactory = GetWorkloadFactory(*layer);
282 
283  switch (layer->GetType())
284  {
285  case LayerType::Input:
286  case LayerType::Output:
287  {
288  // Inputs and outputs are treated in a special way - see EnqueueInput() and EnqueueOutput().
289  break;
290  }
291  default:
292  {
293  auto workload = layer->CreateWorkload(workloadFactory);
294 
295  if (!workload)
296  {
297  const char* const layerName =
298  layer->GetNameStr().length() != 0 ? layer->GetName() : "<Unnamed>";
300  fmt::format("No workload created for layer (name: '{0}' type: '{1}') (compute '{2}')",
301  layerName, static_cast<int>(layer->GetType()), layer->GetBackendId().Get()
302  ));
303  }
304 
305  if (timelineUtils)
306  {
307  // Add workload to the post-optimisation network structure
308  AddWorkloadStructure(timelineUtils, workload, *layer);
309  }
310 
311  // For async networks ConstantWorkloads are managed exclusively by LoadedNetwork
312  // and are separated out from the other workloads
313  if((networkProperties.m_AsyncEnabled || useExternalMemoryManager) &&
314  layer->GetType() == LayerType::Constant)
315  {
316  m_ConstantTensorHandles[layer->GetGuid()] =
317  layer->GetOutputSlot(0).GetOutputHandler().GetData();
318  m_ConstantWorkloads[layer->GetGuid()] = std::move(workload);
319  }
320  else
321  {
322  m_WorkloadQueue.push_back(std::move(workload));
323  }
324 
325  // release the constant data in the layer..
326  layer->ReleaseConstantData();
327  break;
328  }
329  }
330  }
331  }
332 
333  // Gather information about workloads for inputs & outputs
334  if (!networkProperties.m_AsyncEnabled && m_WorkloadQueue.size() != 0)
335  {
336  const int noOfInputs = armnn::numeric_cast<int>(order.GetNumInputs());
337 
338  // Get indices of all workloads connected to each input and
339  // check if they support tensor handle replacement
340  for (const BindableLayer* layer: order.GetInputLayers())
341  {
342  const auto bindingId = layer->GetBindingId();
343 
344  bool supportsReplacement = true;
345 
346  for (const auto inputSlot: layer->GetOutputSlot(0).GetConnections())
347  {
348  auto workloadIndex = std::distance(order.begin(), order.GetPosInGraph(inputSlot->GetOwningLayer()));
349  workloadIndex -= noOfInputs;
350 
351  m_InputWorkloadSlotPairs[bindingId].emplace_back(WorkloadIndices{
352  armnn::numeric_cast<unsigned int>(workloadIndex), inputSlot->GetSlotIndex()});
353 
354  auto workload = m_WorkloadQueue[m_InputWorkloadSlotPairs[bindingId].back().m_WorkloadIndex].get();
355  supportsReplacement &= workload->SupportsTensorHandleReplacement();
356  }
357 
358  ITensorHandleFactory::FactoryId factoryId = layer->GetOutputSlot(0).GetTensorHandleFactoryId();
359  // Get matching import factory Id
360  ITensorHandleFactory::FactoryId importFactoryId =
361  m_TensorHandleFactoryRegistry.GetMatchingImportFactoryId(factoryId);
362 
363  ITensorHandleFactory *importFactory = m_TensorHandleFactoryRegistry.GetFactory(importFactoryId);
364 
365  if (supportsReplacement && importFactory)
366  {
367  m_PreImportedInputHandles.emplace_back(
368  bindingId, importFactory->CreateTensorHandle(layer->GetOutputSlot(0).GetTensorInfo(), false));
369  }
370  else
371  {
372  m_PreImportedInputHandles.emplace_back(bindingId, nullptr);
373  }
374  }
375 
376  // Get indices of all workloads connected to each output and
377  // check if they support tensor handle replacement
378  for (const BindableLayer* layer: order.GetOutputLayers())
379  {
380  const auto bindingId = layer->GetBindingId();
381 
382  const auto outputSlot = layer->GetInputSlot(0).GetConnectedOutputSlot();
383  auto& indices = m_OutputWorkloadSlotPairs[bindingId];
384 
385  auto workloadIndex = std::distance(order.begin(), order.GetPosInGraph(outputSlot->GetOwningLayer()));
386  workloadIndex -= noOfInputs;
387 
388  indices.m_OutputSlotIndices = WorkloadIndices{numeric_cast<unsigned int>(workloadIndex),
389  outputSlot->CalculateIndexOnOwner()};
390 
391  bool supportsReplacement = true;
392  auto outputWorkload = m_WorkloadQueue[indices.m_OutputSlotIndices.m_WorkloadIndex].get();
393  supportsReplacement &= outputWorkload->SupportsTensorHandleReplacement();
394 
395  for (auto &inputSlot: outputSlot->GetConnections())
396  {
397  if(inputSlot->GetOwningLayer().GetType() != LayerType::Output)
398  {
399  auto inWorkloadIndex = std::distance(order.begin(),
400  order.GetPosInGraph(inputSlot->GetOwningLayer()));
401  inWorkloadIndex -= noOfInputs;
402  indices.m_InputSlotIndices.emplace_back(WorkloadIndices{numeric_cast<unsigned int>(inWorkloadIndex),
403  inputSlot->GetSlotIndex()});
404  auto inputWorkload = m_WorkloadQueue[indices.m_InputSlotIndices.back().m_WorkloadIndex].get();
405  supportsReplacement &= inputWorkload->SupportsTensorHandleReplacement();
406  }
407  }
408 
409  ITensorHandleFactory::FactoryId factoryId = outputSlot->GetTensorHandleFactoryId();
410  // Get matching import factory Id
411  ITensorHandleFactory::FactoryId importFactoryId =
412  m_TensorHandleFactoryRegistry.GetMatchingImportFactoryId(factoryId);
413  ITensorHandleFactory *importFactory = m_TensorHandleFactoryRegistry.GetFactory(importFactoryId);
414 
415  if (supportsReplacement && importFactory)
416  {
417  m_PreImportedOutputHandles.emplace_back(
418  bindingId, importFactory->CreateTensorHandle(outputSlot->GetTensorInfo(), false));
419  }
420  else
421  {
422  m_PreImportedOutputHandles.emplace_back(bindingId, nullptr);
423  }
424  }
425  }
426 
427  for (auto&& workloadFactory : m_WorkloadFactories)
428  {
429  workloadFactory.second->AfterWorkloadsCreated();
430  }
431 
432  if (timelineUtils)
433  {
434  // Commit to send the post-optimisation network structure
435  timelineUtils->Commit();
436  }
437 
438  if (useExternalMemoryManager)
439  {
440  if (networkProperties.m_AsyncEnabled)
441  {
442  CreateMemoryProfileAsync();
443  }
444  else
445  {
446  CreateMemoryProfile();
447  }
448 
449  auto backendStrategyMap = BackendRegistryInstance().GetMemoryOptimizerStrategies();
450  for (auto& backendMemoryProfile : m_MemBlockMap)
451  {
452  const BackendId& backendId = backendMemoryProfile.first;
453  if (backendStrategyMap.find(backendId) != backendStrategyMap.end())
454  {
455  m_MemBinMap[backendId] = backendStrategyMap[backendId]->Optimize(backendMemoryProfile.second);
456  }
457  else
458  {
459  m_MemBinMap[backendId] = m_ConstantStrategy->Optimize(backendMemoryProfile.second);
460  }
461  }
462 
463  if (!networkProperties.m_AsyncEnabled)
464  {
465  m_ExternalMemoryManager = CreateExternalMemoryManger(m_TensorMemory);
466 
467  // Sort m_TensorMemory, so it's order matches m_Tensorhandles
468  std::sort(m_TensorMemory.begin(), m_TensorMemory.end(),
469  [](const std::pair<std::shared_ptr<TensorMemory>, MemorySource>& lhs,
470  const std::pair<std::shared_ptr<TensorMemory>, MemorySource>& rhs)
471  {
472  return lhs.first->m_OutputSlotId < rhs.first->m_OutputSlotId;
473  });
474  }
475  }
476 
477  // Now that the intermediate tensor memory has been set-up,
478  // do any post allocation configuration for each workload.
479  if (!networkProperties.m_AsyncEnabled)
480  {
481  if (useInternalMemoryManager)
482  {
483  // Set up memory.
484  m_OptimizedNetwork->pOptimizedNetworkImpl->GetGraph().AllocateDynamicBuffers();
485  }
486 
487  for (auto &workload : m_WorkloadQueue)
488  {
489  workload->PostAllocationConfigure();
490  }
491  }
492 
493  if (useExternalMemoryManager)
494  {
495  if (!networkProperties.m_AsyncEnabled)
496  {
497  AllocateAndExecuteConstantWorkloads();
498  }
499  else
500  {
501  AllocateAndExecuteConstantWorkloadsAsync();
502  }
503  }
504 }
505 
506 void LoadedNetwork::AllocateAndExecuteConstantWorkloads()
507 {
508  ARMNN_SCOPED_PROFILING_EVENT(Compute::Undefined, "LoadNetwork_AllocateAndExecuteConstants");
509  for (auto& pair : m_ConstantWorkloads)
510  {
511  auto tensorHandle = m_ConstantTensorHandles[pair.first];
512  tensorHandle->Allocate();
513  pair.second->Execute();
514  }
515 }
516 
517 
518 
519 void LoadedNetwork::AllocateAndExecuteConstantWorkloadsAsync()
520 {
521  ARMNN_SCOPED_PROFILING_EVENT(Compute::Undefined, "LoadNetwork_AllocateAndExecuteConstants");
522  Graph& order = m_OptimizedNetwork->pOptimizedNetworkImpl->GetGraph();
523  for (auto&& layer : order)
524  {
525  if (layer->GetType() == LayerType::Constant)
526  {
527  const auto& outSlot = layer->GetOutputSlots()[0];
528  const auto factoryId = outSlot.GetTensorHandleFactoryId();
530  auto& workloadFactory = GetWorkloadFactory(*layer);
531 
532  layer->CreateTensorHandles(m_TensorHandleFactoryRegistry, workloadFactory);
533  ITensorHandle* tensorHandle = outSlot.GetOutputHandler().GetData();
534 
535  m_ConstantTensorHandles[layer->GetGuid()] = tensorHandle;
536  tensorHandle->Allocate();
537 
538  WorkingMemDescriptor memDesc;
539  memDesc.m_Outputs.push_back(tensorHandle);
540  m_ConstantWorkloads[layer->GetGuid()]->ExecuteAsync(memDesc);
541  }
542  }
543 }
544 
546 {
547  ARMNN_SCOPED_PROFILING_EVENT(Compute::Undefined, "LoadNetwork_SendNetworkStructure");
548  Graph& order = m_OptimizedNetwork->pOptimizedNetworkImpl->GetGraph().TopologicalSort();
549  ProfilingGuid networkGuid = m_OptimizedNetwork->GetGuid();
550 
551  std::unique_ptr<TimelineUtilityMethods> timelineUtils =
552  TimelineUtilityMethods::GetTimelineUtils(m_ProfilingService);
553 
554  timelineUtils->CreateTypedEntity(networkGuid, LabelsAndEventClasses::NETWORK_GUID);
555 
556  for (auto&& layer : order)
557  {
558  // Add layer to the post-optimisation network structure
559  AddLayerStructure(timelineUtils, *layer, networkGuid);
560  switch (layer->GetType())
561  {
562  case LayerType::Input:
563  case LayerType::Output:
564  {
565  // Inputs and outputs are treated in a special way - see EnqueueInput() and EnqueueOutput().
566  break;
567  }
568  default:
569  {
570  for (auto& workload : m_WorkloadQueue)
571  {
572  // Add workload to the post-optimisation network structure
573  AddWorkloadStructure(timelineUtils, workload, *layer);
574  }
575  break;
576  }
577  }
578  }
579  // Commit to send the post-optimisation network structure
580  timelineUtils->Commit();
581 }
582 
583 profiling::ProfilingGuid LoadedNetwork::GetNetworkGuid()
584 {
585  return m_OptimizedNetwork->GetGuid();
586 }
587 
589 {
590  for (auto&& inputLayer : m_OptimizedNetwork->pOptimizedNetworkImpl->GetGraph().GetInputLayers())
591  {
592  ARMNN_ASSERT_MSG(inputLayer->GetNumOutputSlots() == 1, "Input layer should have exactly 1 output slot");
593  if (inputLayer->GetBindingId() == layerId)
594  {
595  return inputLayer->GetOutputSlot(0).GetTensorInfo();
596  }
597  }
598 
599  throw InvalidArgumentException(fmt::format("No input layer is associated with id {}", layerId));
600 }
601 
603 {
604  for (auto&& outputLayer : m_OptimizedNetwork->pOptimizedNetworkImpl->GetGraph().GetOutputLayers())
605  {
606  ARMNN_ASSERT_MSG(outputLayer->GetNumInputSlots() == 1, "Output layer should have exactly 1 input slot");
607  ARMNN_ASSERT_MSG(outputLayer->GetInputSlot(0).GetConnection(), "Input slot on Output layer must be connected");
608  if (outputLayer->GetBindingId() == layerId)
609  {
610  return outputLayer->GetInputSlot(0).GetConnection()->GetTensorInfo();
611  }
612  }
613 
614  throw InvalidArgumentException(fmt::format("No output layer is associated with id {}", layerId));
615 }
616 
617 const IWorkloadFactory& LoadedNetwork::GetWorkloadFactory(const Layer& layer) const
618 {
619  const IWorkloadFactory* workloadFactory = nullptr;
620 
621  auto it = m_WorkloadFactories.find(layer.GetBackendId());
622  if (it == m_WorkloadFactories.end())
623  {
624  throw RuntimeException(fmt::format("No workload factory for {0} to be used for layer: {1}",
625  layer.GetBackendId().Get(),
626  layer.GetNameStr()),
627  CHECK_LOCATION());
628  }
629 
630  workloadFactory = it->second.get();
631 
632  ARMNN_ASSERT_MSG(workloadFactory, "No workload factory");
633 
634  std::string reasonIfUnsupported;
636  {},
637  reasonIfUnsupported,
638  m_OptimizedNetwork->pOptimizedNetworkImpl->GetModelOptions()),
639  "Factory does not support layer");
640  IgnoreUnused(reasonIfUnsupported);
641  return *workloadFactory;
642 }
643 
644 namespace {
645 
646 // Non-copyable class owning accelerator-specific tensor data.
647 class TensorPin
648 {
649 public:
650  TensorPin(std::unique_ptr<ITensorHandle> handle, const TensorInfo& info, LayerBindingId id)
651  : m_TensorHandle(std::move(handle))
652  , m_TensorInfo(info)
653  , m_Id(id)
654  {
655  }
656 
657  ITensorHandle* GetTensorHandle() const { return m_TensorHandle.get(); }
658  const TensorInfo& GetTensorInfo() const { return m_TensorInfo; }
659  LayerBindingId GetBindingId() const { return m_Id; }
660 
661 private:
662  std::unique_ptr<ITensorHandle> m_TensorHandle;
663  TensorInfo m_TensorInfo;
664  LayerBindingId m_Id;
665 };
666 
667 static const TensorPin& GetTensorPin(LayerBindingId id,
668  const std::vector<TensorPin>& pins,
669  char const* bindingPointDesc)
670 {
671  auto it = std::find_if(pins.begin(), pins.end(),
672  [id](const TensorPin& pin)
673  {
674  return pin.GetBindingId() == id;
675  });
676 
677  if (it != pins.end())
678  {
679  return *it;
680  }
681  else
682  {
683  throw InvalidArgumentException(fmt::format("No tensor supplied for {0} {1}", bindingPointDesc, id));
684  }
685 }
686 
687 // Stores data that needs to be kept accessible for the entire execution of a workload.
688 class WorkloadData
689 {
690 public:
691  WorkloadData(const InputTensors& inputTensors, const OutputTensors& outputTensors)
692  {
693  m_InputTensorPins.reserve(inputTensors.size());
694  m_OutputTensorPins.reserve(outputTensors.size());
695 
696  for (auto inputTensorPair : inputTensors)
697  {
698  auto inputTensor = inputTensorPair.second;
699 
700  std::unique_ptr<ITensorHandle> tensorHandle =
701  std::make_unique<ConstPassthroughTensorHandle>(inputTensor.GetInfo(),inputTensor.GetMemoryArea());
702  LayerBindingId layerId = inputTensorPair.first;
703 
704  m_InputTensorPins.emplace_back(std::move(tensorHandle), inputTensor.GetInfo(), layerId);
705  }
706 
707  for (auto outputTensorPair : outputTensors)
708  {
709  auto outputTensor = outputTensorPair.second;
710 
711  std::unique_ptr<ITensorHandle> tensorHandle =
712  std::make_unique<PassthroughTensorHandle>(outputTensor.GetInfo(), outputTensor.GetMemoryArea());
713  LayerBindingId layerId = outputTensorPair.first;
714 
715  m_OutputTensorPins.emplace_back(std::move(tensorHandle), outputTensor.GetInfo(), layerId);
716  }
717  }
718 
719  const TensorPin& GetInputTensorPin(LayerBindingId id) const
720  {
721  return GetTensorPin(id, m_InputTensorPins, "input");
722  }
723 
724  const TensorPin& GetOutputTensorPin(LayerBindingId id) const
725  {
726  return GetTensorPin(id, m_OutputTensorPins, "output");
727  }
728 
729 private:
730 
731  std::vector<TensorPin> m_InputTensorPins;
732  std::vector<TensorPin> m_OutputTensorPins;
733 };
734 
735 }
736 
738  const OutputTensors& outputTensors,
739  std::vector<ImportedInputId> preImportedInputIds,
740  std::vector<ImportedOutputId> preImportedOutputIds)
741 {
742  const Graph& graph = m_OptimizedNetwork->pOptimizedNetworkImpl->GetGraph();
743 
744  // Walk graph to determine the order of execution.
745  if (graph.GetNumLayers() < 2)
746  {
747  ARMNN_LOG(warning) << "IRuntime::EnqueueWorkload()::Less than two nodes in graph";
748  return Status::Failure;
749  }
750 
751  // Data that must be kept alive for the entire execution of the workload.
752  WorkloadData workloadData(inputTensors, outputTensors);
753 
754  if (graph.GetNumInputs() != inputTensors.size())
755  {
756  throw InvalidArgumentException("Number of inputs provided does not match network.");
757  }
758 
759  // For each input to the network, call EnqueueInput with the data passed by the user.
760  {
762  m_InputQueue.clear();
763  m_InputQueue.reserve(graph.GetNumInputs());
764 
765  if (preImportedInputIds.size() > graph.GetNumInputs())
766  {
767  throw InvalidArgumentException("Invalid number of preImportedInputIds");
768  }
769 
770  unsigned int inputIndex = 0;
771  unsigned int importedInputIdIndex = 0;
772  std::sort(preImportedInputIds.begin(), preImportedInputIds.end());
773  for (const BindableLayer* inputLayer : graph.GetInputLayers())
774  {
775  if (importedInputIdIndex < preImportedInputIds.size() &&
776  inputIndex == preImportedInputIds[importedInputIdIndex])
777  {
778  // Only replace tensorhandles if they have not already been replaced
779  if (!m_IsInputImported[inputIndex])
780  {
781  auto outputTensorHandle = m_PreImportedInputHandles[inputIndex].m_TensorHandle.get();
782 
783  for (const auto& workloadInfo: m_InputWorkloadSlotPairs[inputLayer->GetBindingId()])
784  {
785  auto workload = m_WorkloadQueue[workloadInfo.m_WorkloadIndex].get();
786  workload->ReplaceInputTensorHandle(outputTensorHandle, workloadInfo.m_SlotIndex);
787  }
788  m_IsInputImported[inputIndex] = true;
789  }
790  importedInputIdIndex++;
791  }
792  else
793  {
794  if (m_IsInputImported[inputIndex])
795  {
796  OutputHandler& handler = const_cast<OutputHandler&>(inputLayer->GetOutputHandler(0));
797 
798  for (const auto& workloadInfo: m_InputWorkloadSlotPairs[inputLayer->GetBindingId()])
799  {
800  auto workload = m_WorkloadQueue[workloadInfo.m_WorkloadIndex].get();
801  workload->ReplaceInputTensorHandle(handler.GetData(), workloadInfo.m_SlotIndex);
802  }
803 
804  m_IsInputImported[inputIndex] = false;
805  }
806 
807  // InputTensorHandle is not imported yet, process to enqueue input
808  const TensorPin& pin = workloadData.GetInputTensorPin(inputLayer->GetBindingId());
809  EnqueueInput(*inputLayer, pin.GetTensorHandle(), pin.GetTensorInfo());
810  }
811  inputIndex++;
812  }
813  }
814  // For each output to the network, call EnqueueOutput with the data passed by the user.
815  {
817  m_OutputQueue.clear();
818  m_OutputQueue.reserve(graph.GetNumOutputs());
819 
820  if (preImportedOutputIds.size() > graph.GetNumOutputs())
821  {
822  throw InvalidArgumentException("Invalid number of preImportedOutputIds");
823  }
824 
825  unsigned int outputIndex = 0;
826  unsigned int importedOutputIdIndex = 0;
827  std::sort(preImportedOutputIds.begin(), preImportedOutputIds.end());
828  for (const BindableLayer* outputLayer : graph.GetOutputLayers())
829  {
830  if (importedOutputIdIndex < preImportedOutputIds.size() &&
831  outputIndex == preImportedOutputIds[importedOutputIdIndex])
832  {
833  // Only replace tensorhandles if they have not already been replaced
834  ITensorHandle* inputTensorHandle = m_PreImportedOutputHandles[outputIndex].m_TensorHandle.get();
835 
836  if (!m_IsOutputImported[outputIndex])
837  {
838  const auto bindingId = outputLayer->GetBindingId();
839  const auto& indices = m_OutputWorkloadSlotPairs[bindingId];
840 
841  auto outputWorkload = m_WorkloadQueue[indices.m_OutputSlotIndices.m_WorkloadIndex].get();
842 
843  outputWorkload->ReplaceOutputTensorHandle(inputTensorHandle,
844  indices.m_OutputSlotIndices.m_SlotIndex);
845 
846  for (const auto& workloadInfo: indices.m_InputSlotIndices)
847  {
848  auto inputWorkload = m_WorkloadQueue[workloadInfo.m_WorkloadIndex].get();
849  inputWorkload->ReplaceInputTensorHandle(inputTensorHandle, workloadInfo.m_SlotIndex);
850  }
851  m_IsOutputImported[outputIndex] = true;
852  }
853 
854  ARMNN_ASSERT_MSG(inputTensorHandle != nullptr, "Data should have been allocated.");
855  MemSyncQueueDescriptor syncDesc;
856  syncDesc.m_Inputs.push_back(inputTensorHandle);
858  info.m_InputTensorInfos.push_back(
859  outputLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo());
860  auto syncWorkload = std::make_unique<SyncMemGenericWorkload>(syncDesc, info);
861  ARMNN_ASSERT_MSG(syncWorkload, "No sync workload created");
862  m_OutputQueue.push_back(move(syncWorkload));
863  importedOutputIdIndex++;
864  }
865  else
866  {
867  if (m_IsOutputImported[outputIndex])
868  {
869  const auto bindingId = outputLayer->GetBindingId();
870  const auto& indices = m_OutputWorkloadSlotPairs[bindingId];
871 
872  auto outputWorkload = m_WorkloadQueue[indices.m_OutputSlotIndices.m_WorkloadIndex].get();
873  const OutputHandler& outputHandler =
874  outputLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetOutputHandler();
875 
876  outputWorkload->ReplaceOutputTensorHandle(
877  outputHandler.GetData(), indices.m_OutputSlotIndices.m_SlotIndex);
878 
879  for (const auto& workloadInfo: indices.m_InputSlotIndices)
880  {
881  auto inputWorkload = m_WorkloadQueue[workloadInfo.m_WorkloadIndex].get();
882  inputWorkload->ReplaceInputTensorHandle(outputHandler.GetData(), workloadInfo.m_SlotIndex);
883  }
884  m_IsOutputImported[outputIndex] = false;
885  }
886 
887  const TensorPin& pin = workloadData.GetOutputTensorPin(outputLayer->GetBindingId());
888  // OutputTensorHandle is not imported yet, process to enqueue Output
889  EnqueueOutput(*outputLayer, pin.GetTensorHandle(), pin.GetTensorInfo());
890  }
891  outputIndex++;
892  }
893  }
894 
895  std::unique_ptr<TimelineUtilityMethods> timelineUtils =
896  TimelineUtilityMethods::GetTimelineUtils(m_ProfilingService);
897  ProfilingGuid inferenceGuid = m_ProfilingService.GetNextGuid();
898  if (timelineUtils)
899  {
900  // Add inference timeline trace if profiling is enabled.
901  ProfilingGuid networkGuid = m_OptimizedNetwork->GetGuid();
902  timelineUtils->CreateTypedEntity(inferenceGuid, LabelsAndEventClasses::INFERENCE_GUID);
903  timelineUtils->CreateRelationship(ProfilingRelationshipType::RetentionLink,
904  networkGuid,
905  inferenceGuid,
906  LabelsAndEventClasses::EXECUTION_OF_GUID);
907  timelineUtils->RecordEvent(inferenceGuid, LabelsAndEventClasses::ARMNN_PROFILING_SOL_EVENT_CLASS);
908  }
909 
910  bool executionSucceeded = true;
911 
912  {
913  if (m_ProfilingService.IsProfilingEnabled())
914  {
915  m_ProfilingService.IncrementCounterValue(armnn::profiling::INFERENCES_RUN);
916  }
918  ARMNN_SCOPED_HEAP_PROFILING("Executing");
919  executionSucceeded = Execute(timelineUtils, inferenceGuid);
920  }
921 
922  if (timelineUtils)
923  {
924  // Add end of life of the inference timeline if profiling is enabled.
925  timelineUtils->RecordEvent(inferenceGuid, LabelsAndEventClasses::ARMNN_PROFILING_EOL_EVENT_CLASS);
926  timelineUtils->Commit();
927  }
928 
929  return executionSucceeded ? Status::Success : Status::Failure;
930 }
931 
932 void LoadedNetwork::EnqueueInput(const BindableLayer& layer, ITensorHandle* tensorHandle, const TensorInfo& tensorInfo)
933 {
934  if (layer.GetType() != LayerType::Input)
935  {
936  throw InvalidArgumentException("EnqueueInput: given layer not an InputLayer");
937  }
938 
939  if (tensorHandle == nullptr)
940  {
941  throw InvalidArgumentException("EnqueueInput: tensorHandle must not be NULL");
942  }
943 
944  InputQueueDescriptor inputQueueDescriptor;
946 
947  inputQueueDescriptor.m_Inputs.push_back(tensorHandle);
948  info.m_InputTensorInfos.push_back(tensorInfo);
949 
950  ARMNN_ASSERT_MSG(layer.GetNumOutputSlots() == 1, "Can only handle Input Layer with one output");
951  const OutputHandler& handler = layer.GetOutputHandler();
952  const TensorInfo& outputTensorInfo = handler.GetTensorInfo();
953  ITensorHandle* outputTensorHandle = handler.GetData();
954  ARMNN_ASSERT_MSG(outputTensorHandle != nullptr,
955  "Data should have been allocated.");
956  inputQueueDescriptor.m_Outputs.push_back(outputTensorHandle);
957  info.m_OutputTensorInfos.push_back(outputTensorInfo);
958 
959  MemorySourceFlags importFlags = outputTensorHandle->GetImportFlags();
960  bool needMemCopy = true;
961  if (m_NetworkProperties.m_ImportEnabled) // Try import the input tensor
962  {
963  if(CheckFlag(importFlags, m_NetworkProperties.m_InputSource))
964  {
965  needMemCopy = false;
966  // This assumes a CPU Tensor handle
967  void* mem = tensorHandle->Map(false);
968  if (outputTensorHandle->Import(mem, m_NetworkProperties.m_InputSource))
969  {
970  tensorHandle->Unmap();
971  return; // No need for a workload since the import has been done.
972  }
973  tensorHandle->Unmap();
974  throw MemoryImportException("EnqueueInput: Memory Import failed");
975  }
976  }
977  if (needMemCopy)
978  {
979  // Create a mem copy workload for input since we did not import
980  std::unique_ptr<IWorkload> inputWorkload = std::make_unique<CopyMemGenericWorkload>(inputQueueDescriptor, info);
981 
982  ARMNN_ASSERT_MSG(inputWorkload, "No input workload created");
983 
984  std::unique_ptr<TimelineUtilityMethods> timelineUtils =
985  TimelineUtilityMethods::GetTimelineUtils(m_ProfilingService);
986  if (timelineUtils)
987  {
988  // Add Input Workload to the post-optimisation network structure
989  AddWorkloadStructure(timelineUtils, inputWorkload, layer);
990  timelineUtils->Commit();
991  }
992 
993  m_InputQueue.push_back(move(inputWorkload));
994  }
995 }
996 
997 void LoadedNetwork::EnqueueOutput(const BindableLayer& layer, ITensorHandle* tensorHandle, const TensorInfo& tensorInfo)
998 {
999  if (layer.GetType() != LayerType::Output)
1000  {
1001  throw InvalidArgumentException("EnqueueOutput: given layer not an OutputLayer");
1002  }
1003 
1004  if (tensorHandle == nullptr)
1005  {
1006  throw InvalidArgumentException("EnqueueOutput: tensorHandle must not be NULL");
1007  }
1008 
1009  OutputQueueDescriptor outputQueueDescriptor;
1011 
1012  outputQueueDescriptor.m_Outputs.push_back(tensorHandle);
1013  info.m_OutputTensorInfos.push_back(tensorInfo);
1014 
1015  ARMNN_ASSERT_MSG(layer.GetNumInputSlots() == 1, "Output Layer should have exactly one input.");
1016 
1017  // Gets the output handler from the previous node.
1018  const OutputHandler& outputHandler = layer.GetInputSlots()[0].GetConnectedOutputSlot()->GetOutputHandler();
1019 
1020  const TensorInfo& inputTensorInfo = outputHandler.GetTensorInfo();
1021  ITensorHandle* inputTensorHandle = outputHandler.GetData();
1022  ARMNN_ASSERT_MSG(inputTensorHandle != nullptr, "Data should have been allocated.");
1023 
1024  // Try import the output tensor.
1025  // Note: We can only import the output pointer if all of the following hold true:
1026  // a) The imported pointer is aligned sufficiently
1027  // b) The tensor has zero padding
1028  // c) There is only one connection to the OutputSlot and it is to an OutputLayer.
1029  // d) The output pointer is allocated via malloc. (Other types will be supported in a later release)
1030  // e) m_IsExportEnabled must be set to true
1031  bool needMemCopy = true;
1032  if (m_NetworkProperties.m_ExportEnabled &&
1033  (layer.GetInputSlots()[0].GetConnectedOutputSlot()->GetNumConnections() == 1))
1034  {
1035  if(layer.GetInputSlots()[0].GetConnectedOutputSlot()->GetOwningLayer().GetType() != LayerType::Input)
1036  {
1037  MemorySourceFlags importFlags = inputTensorHandle->GetImportFlags();
1038  if (CheckFlag(importFlags, m_NetworkProperties.m_OutputSource))
1039  {
1040  needMemCopy = false;
1041  void *mem = tensorHandle->Map(false);
1042  bool importOk = inputTensorHandle->Import(mem, m_NetworkProperties.m_OutputSource);
1043  tensorHandle->Unmap();
1044 
1045  if (importOk)
1046  {
1047  // Insert synchronization workload
1048  MemSyncQueueDescriptor syncDesc;
1049  syncDesc.m_Inputs.push_back(inputTensorHandle);
1050  info.m_InputTensorInfos.push_back(inputTensorInfo);
1051  auto syncWorkload = std::make_unique<SyncMemGenericWorkload>(syncDesc, info);
1052  ARMNN_ASSERT_MSG(syncWorkload, "No sync workload created");
1053  m_OutputQueue.push_back(move(syncWorkload));
1054  }
1055  else
1056  {
1057  throw MemoryExportException("EnqueueOutput: Memory Export failed");
1058  }
1059  }
1060  }
1061  }
1062  if (needMemCopy)
1063  {
1064  // If we got here then we didn't export the memory, so add an output workload which performs a memcopy.
1065  outputQueueDescriptor.m_Inputs.push_back(inputTensorHandle);
1066  info.m_InputTensorInfos.push_back(inputTensorInfo);
1067 
1068  std::unique_ptr<IWorkload> outputWorkload =
1069  std::make_unique<CopyMemGenericWorkload>(outputQueueDescriptor, info);
1070  ARMNN_ASSERT_MSG(outputWorkload, "No output workload created");
1071 
1072  std::unique_ptr<TimelineUtilityMethods> timelineUtils =
1073  TimelineUtilityMethods::GetTimelineUtils(m_ProfilingService);
1074  if (timelineUtils)
1075  {
1076  // Add Output Workload to the post-optimisation network structure
1077  AddWorkloadStructure(timelineUtils, outputWorkload, layer);
1078  timelineUtils->Commit();
1079  }
1080 
1081  m_OutputQueue.push_back(move(outputWorkload));
1082  }
1083 }
1084 
1085 void LoadedNetwork::AllocateWorkingMemory(std::lock_guard<std::mutex>& lock)
1086 {
1087  ARMNN_SCOPED_PROFILING_EVENT(Compute::Undefined, "Working Memory Allocation");
1088 
1089  // this unused parameter makes sure we can only call this function with a valid lock
1090  IgnoreUnused(lock);
1091 
1092  if (m_IsWorkingMemAllocated)
1093  {
1094  return;
1095  }
1096 
1097  if (m_ExternalMemoryManager)
1098  {
1099  m_ExternalMemoryManager->Allocate();
1100 
1101  for (unsigned int i = 0; i < m_TensorMemory.size(); ++i)
1102  {
1103  m_Tensorhandles[i]->Import(m_TensorMemory[i].first->m_Data, m_TensorMemory[i].second);
1104  }
1105  }
1106 
1107  for (auto&& memoryManager : m_BackendMemoryMangers)
1108  {
1109  if (memoryManager)
1110  {
1111  memoryManager->Acquire();
1112  }
1113  }
1114  m_TensorHandleFactoryRegistry.AquireMemory();
1115  m_IsWorkingMemAllocated = true;
1116 }
1117 
1119 {
1120  std::lock_guard<std::mutex> lockGuard(m_WorkingMemMutex);
1121 
1122  if (!m_IsWorkingMemAllocated)
1123  {
1124  return;
1125  }
1126 
1127  if (m_ExternalMemoryManager)
1128  {
1129  m_ExternalMemoryManager->Deallocate();
1130  }
1131 
1132  // Informs the memory managers to release memory in its respective memory group
1133  for (auto&& memoryManager : m_BackendMemoryMangers)
1134  {
1135  if (memoryManager)
1136  {
1137  memoryManager->Release();
1138  }
1139  }
1140  m_TensorHandleFactoryRegistry.ReleaseMemory();
1141  m_IsWorkingMemAllocated = false;
1142 }
1143 
1144 bool LoadedNetwork::Execute(std::unique_ptr<TimelineUtilityMethods>& timelineUtils,
1145  profiling::ProfilingGuid inferenceGuid)
1146 {
1147  bool success = true;
1148 
1149  auto Fail = [&](const std::exception& error)
1150  {
1151  ARMNN_LOG(error) << "An error occurred attempting to execute a workload: " << error.what();
1152  success = false;
1153  };
1154 
1155  try
1156  {
1157  std::lock_guard<std::mutex> lockGuard(m_WorkingMemMutex);
1158  AllocateWorkingMemory(lockGuard);
1159 
1160  ProfilingDynamicGuid workloadInferenceID(0);
1161  auto ExecuteQueue = [&timelineUtils, &workloadInferenceID, &inferenceGuid](WorkloadQueue& queue)
1162  {
1163  for (auto& workload : queue)
1164  {
1165  if(timelineUtils)
1166  {
1167  workloadInferenceID = timelineUtils->RecordWorkloadInferenceAndStartOfLifeEvent(workload->GetGuid(),
1168  inferenceGuid);
1169  }
1170  workload->Execute();
1171  if(timelineUtils)
1172  {
1173  timelineUtils->RecordEndOfLifeEvent(workloadInferenceID);
1174  }
1175  }
1176  };
1177 
1178  ExecuteQueue(m_InputQueue);
1179  ExecuteQueue(m_WorkloadQueue);
1180  ExecuteQueue(m_OutputQueue);
1181  }
1182  catch (const RuntimeException& error)
1183  {
1184  Fail(error);
1185  }
1186  catch (const std::runtime_error& error)
1187  {
1188  Fail(error);
1189  }
1190 
1191  return success;
1192 }
1193 
1194 void LoadedNetwork::EnqueueInput(const ConstTensor& inputTensor, ITensorHandle* inputTensorHandle)
1195 {
1196  if (m_NetworkProperties.m_ImportEnabled) // Try import the input tensor
1197  {
1198  MemorySourceFlags importFlags = inputTensorHandle->GetImportFlags();
1199  if (CheckFlag(importFlags, m_NetworkProperties.m_InputSource) )
1200  {
1201  std::unique_ptr<ITensorHandle> tensorHandle =
1202  std::make_unique<ConstPassthroughTensorHandle>(inputTensor.GetInfo(),
1203  inputTensor.GetMemoryArea());
1204  void* mem = tensorHandle->Map(false);
1205 
1206  if (inputTensorHandle->Import(mem, m_NetworkProperties.m_InputSource))
1207  {
1208  tensorHandle->Unmap();
1209  return;
1210  }
1211  tensorHandle->Unmap();
1212  throw MemoryImportException("EnqueueInput: Memory Import failed");
1213  }
1214  else
1215  {
1216  throw MemoryImportException("EnqueueInput: Memory Import failed, backend does not support Import");
1217  }
1218  }
1219  else
1220  {
1221  std::unique_ptr<ITensorHandle> tensorHandle =
1222  std::make_unique<ConstPassthroughTensorHandle>(inputTensor.GetInfo(), inputTensor.GetMemoryArea());
1223 
1224  auto copyFunc = [](void* dst, const void* src, size_t size)
1225  {
1226  memcpy(dst, src, size);
1227  };
1228 
1229  CopyTensorContentsGeneric(tensorHandle.get(), inputTensorHandle, copyFunc);
1230  }
1231 }
1232 
1233 // Note: We can only import the output pointer if all of the following hold true:
1234 // a) The imported pointer is aligned sufficiently
1235 // b) The tensor has zero padding
1236 // c) There is only one connection to the OutputSlot and it is to an OutputLayer.
1237 // d) The output pointer is allocated via malloc. (Other types will be supported in a later release)
1238 // e) m_IsExportEnabled must be set to true
1239 void LoadedNetwork::ImportOutputTensor(const Tensor& outputTensor, ITensorHandle* outputTensorHandle)
1240 {
1241  ARMNN_ASSERT_MSG(outputTensorHandle != nullptr, "Data should have been allocated.");
1242  MemorySourceFlags importFlags = outputTensorHandle->GetImportFlags();
1243  if (CheckFlag(importFlags, m_NetworkProperties.m_OutputSource))
1244  {
1245  std::unique_ptr<ITensorHandle> tensorHandle =
1246  std::make_unique<PassthroughTensorHandle>(outputTensor.GetInfo(),
1247  outputTensor.GetMemoryArea());
1248 
1249  void* mem = tensorHandle->Map(false);
1250  bool importOk = outputTensorHandle->Import(mem, m_NetworkProperties.m_OutputSource);
1251  tensorHandle->Unmap();
1252 
1253  if (!importOk)
1254  {
1255  throw MemoryExportException("ImportOutputTensor: Memory Export failed");
1256  }
1257  }
1258  else
1259  {
1260  throw MemoryExportException("ImportOutputTensor: Memory Export failed, attempting to export Input Layer");
1261  }
1262 
1263 }
1264 
1265 void CopyToOutputTensor(const Tensor& outputTensor, ITensorHandle* outputTensorHandle)
1266 {
1267  auto copyFunc = [](void* dst, const void* src, size_t size)
1268  {
1269  memcpy(dst, src, size);
1270  };
1271 
1272  std::unique_ptr<ITensorHandle> tensorHandle =
1273  std::make_unique<PassthroughTensorHandle>(outputTensor.GetInfo(),
1274  outputTensor.GetMemoryArea());
1275 
1276  CopyTensorContentsGeneric(outputTensorHandle, tensorHandle.get(), copyFunc);
1277 }
1278 
1279 
1280 const armnn::ConstTensor GetInputTensor(const LayerBindingId layerId, const InputTensors& inputTensors)
1281 {
1282  for (auto inputTensorPair : inputTensors)
1283  {
1284  LayerBindingId id = inputTensorPair.first;
1285  if (id == layerId)
1286  {
1287  return inputTensorPair.second;
1288  }
1289  }
1290  throw InvalidArgumentException("Input does not exist.");
1291 }
1292 
1293 const armnn::Tensor GetOutputTensor(const LayerBindingId layerId, const OutputTensors& outputTensors)
1294 {
1295  for (auto outputTensorPair : outputTensors)
1296  {
1297  LayerBindingId id = outputTensorPair.first;
1298  if (id == layerId)
1299  {
1300  return outputTensorPair.second;
1301  }
1302  }
1303  throw InvalidArgumentException("Output does not exist.");
1304 }
1305 
1306 std::vector<ImportedInputId> LoadedNetwork::ImportInputs(const InputTensors& inputTensors,
1307  MemorySource forceImportMemorySource)
1308 {
1309  if (!m_NetworkProperties.m_AsyncEnabled)
1310  {
1311  // Cannot import if import is not enabled and forceImportMemorySource is undefined
1312  if (forceImportMemorySource == MemorySource::Undefined)
1313  {
1314  throw MemoryImportException("ImportInputs: Memory Import failed, NetworkProperties.m_ImportEnabled");
1315  }
1316  if (inputTensors.size() != m_OptimizedNetwork->pOptimizedNetworkImpl->GetGraph().GetNumInputs())
1317  {
1318  throw MemoryImportException("ImportInputs: Force Import failed, incorrect number of tensors");
1319  }
1320 
1321  std::vector<ImportedInputId> importedInputs;
1322  Graph& graph = m_OptimizedNetwork->pOptimizedNetworkImpl->GetGraph().TopologicalSort();
1323  unsigned int inputIndex = 0;
1324  for (const BindableLayer* inputLayer : graph.GetInputLayers())
1325  {
1326  auto outputTensorHandle = m_PreImportedInputHandles[inputIndex].m_TensorHandle.get();
1327 
1328  if (!outputTensorHandle)
1329  {
1330  inputIndex++;
1331  continue;
1332  }
1333 
1334  auto layerBindingId = inputLayer->GetBindingId();
1335  auto it = std::find_if(inputTensors.begin(), inputTensors.end(), [=](const auto& inputTensor)
1336  {
1337  return inputTensor.first == layerBindingId;
1338  });
1339 
1340  if (it == inputTensors.end())
1341  {
1342  inputIndex++;
1343  continue;
1344  }
1345 
1346  const auto& inputTensor = *it;
1347  std::unique_ptr<ITensorHandle> passThroughTensorHandle =
1348  std::make_unique<ConstPassthroughTensorHandle>(inputTensor.second.GetInfo(),
1349  inputTensor.second.GetMemoryArea());
1350 
1351  if (outputTensorHandle->CanBeImported(passThroughTensorHandle->Map(), forceImportMemorySource)
1352  && (outputTensorHandle->Import(passThroughTensorHandle->Map(), forceImportMemorySource)))
1353  {
1354  importedInputs.push_back(inputIndex);
1355  }
1356  passThroughTensorHandle->Unmap();
1357 
1358  inputIndex++;
1359  }
1360 
1361  return importedInputs;
1362  }
1363  else
1364  {
1365  // Import when the import of network properties is enabled
1366  std::vector<ImportedInputId> importedInputs;
1367  Graph& graph = m_OptimizedNetwork->pOptimizedNetworkImpl->GetGraph().TopologicalSort();
1368 
1369  for (auto inputTensor : inputTensors)
1370  {
1371  auto layerBindingId = inputTensor.first;
1372  auto it = std::find_if(graph.GetInputLayers().begin(), graph.GetInputLayers().end(), [=](auto* layer)
1373  {
1374  return layer->GetBindingId() == layerBindingId;
1375  });
1376 
1377  if (it == graph.GetInputLayers().end())
1378  {
1379  throw MemoryImportException(fmt::format(
1380  "ImportInputs: Memory Import failed, unknown LayerBindingId: {}", layerBindingId));
1381  }
1382 
1383  const Layer* layer = *it;
1384  if (layer->GetType() != LayerType::Input)
1385  {
1386  throw InvalidArgumentException("ImportInputs: given layer not an InputLayer");
1387  }
1388 
1389  auto& backend = m_Backends.at(layer->GetBackendId());
1390  if (!HasCapability(BackendOptions::BackendOption{"PreImportIOTensors", true}, backend->GetCapabilities()))
1391  {
1392  std::string er = backend->GetId();
1393  er += " does not have PreImportIOTensors capability";
1394  throw BackendCapabilityException(er);
1395  }
1396 
1397  const OutputSlot& outputSlot = layer->GetOutputSlots()[0];
1398 
1400  const TensorInfo& tensorInfo = outputSlot.GetTensorInfo();
1401 
1402  ITensorHandleFactory* handleFactory = m_TensorHandleFactoryRegistry.GetFactory(factoryId);
1403  ARMNN_ASSERT(handleFactory);
1404 
1405  ImportedTensorHandlePin importedTensorHandlePin{layerBindingId,
1406  handleFactory->CreateTensorHandle(tensorInfo, false)};
1407 
1408  ITensorHandle* tensorHandle = importedTensorHandlePin.m_TensorHandle.get();
1409 
1410  if (!CheckFlag(tensorHandle->GetImportFlags(), m_NetworkProperties.m_InputSource))
1411  {
1412  throw MemoryImportException(
1413  fmt::format("ImportInputs: Memory Import failed, backend: "
1414  "{} does not support importing from source {}"
1415  , factoryId, m_NetworkProperties.m_InputSource));
1416  }
1417 
1418  std::unique_ptr<ITensorHandle> passThroughTensorHandle =
1419  std::make_unique<ConstPassthroughTensorHandle>(inputTensor.second.GetInfo(),
1420  inputTensor.second.GetMemoryArea());
1421 
1422  if (tensorHandle->Import(passThroughTensorHandle->Map(), m_NetworkProperties.m_InputSource))
1423  {
1424  importedInputs.push_back(m_CurImportedInputId++);
1425  passThroughTensorHandle->Unmap();
1426  }
1427  else
1428  {
1429  passThroughTensorHandle->Unmap();
1430  throw MemoryImportException("ImportInputs: Memory Import failed");
1431  }
1432 
1433  m_PreImportedInputHandles.push_back(std::move(importedTensorHandlePin));
1434  }
1435  return importedInputs;
1436  }
1437 }
1438 
1439 std::vector<ImportedOutputId> LoadedNetwork::ImportOutputs(const OutputTensors& outputTensors,
1440  MemorySource forceImportMemorySource)
1441 {
1442  if (!m_NetworkProperties.m_AsyncEnabled)
1443  {
1444  // Cannot import if import is not enabled and forceImportMemorySource is undefined
1445  if (forceImportMemorySource == MemorySource::Undefined)
1446  {
1447  throw MemoryImportException("ImportOutputs: Memory Import failed, NetworkProperties.m_ImportEnabled");
1448  }
1449  // If forceImportMemorySource is defined, try import if memory is aligned
1450  if (outputTensors.size() != m_OptimizedNetwork->pOptimizedNetworkImpl->GetGraph().GetNumOutputs())
1451  {
1452  throw MemoryImportException("ImportOutputs: Force Import failed, incorrect number of tensors");
1453  }
1454  std::vector<ImportedInputId> importedOutputs;
1455  Graph& graph = m_OptimizedNetwork->pOptimizedNetworkImpl->GetGraph().TopologicalSort();
1456 
1457  unsigned int outputIndex = 0;
1458  for (const BindableLayer* const outputLayer : graph.GetOutputLayers())
1459  {
1460  auto inputTensorHandle = m_PreImportedOutputHandles[outputIndex].m_TensorHandle.get();
1461 
1462  if (!inputTensorHandle)
1463  {
1464  outputIndex++;
1465  continue;
1466  }
1467 
1468  auto layerBindingId = outputLayer->GetBindingId();
1469  auto it = std::find_if(outputTensors.begin(), outputTensors.end(), [=] (const auto& outputTensor)
1470  {
1471  return outputTensor.first == layerBindingId;
1472  });
1473 
1474  if (it == outputTensors.end())
1475  {
1476  outputIndex++;
1477  continue;
1478  }
1479 
1480  const auto outputTensor = *it;
1481  // Check if the output memory can be imported
1482  if (inputTensorHandle->CanBeImported(outputTensor.second.GetMemoryArea(), forceImportMemorySource)
1483  && inputTensorHandle->Import(outputTensor.second.GetMemoryArea(), forceImportMemorySource))
1484  {
1485  importedOutputs.push_back(outputIndex);
1486  }
1487  outputIndex++;
1488  }
1489  return importedOutputs;
1490  }
1491 
1492  std::vector<ImportedOutputId> importedOutputs;
1493  Graph& graph = m_OptimizedNetwork->pOptimizedNetworkImpl->GetGraph().TopologicalSort();
1494 
1495  for (const auto& outputTensor : outputTensors)
1496  {
1497  auto layerBindingId = outputTensor.first;
1498  auto it = std::find_if(graph.GetOutputLayers().begin(), graph.GetOutputLayers().end(), [=](auto* layer)
1499  {
1500  return layer->GetBindingId() == layerBindingId;
1501  });
1502 
1503  if (it == graph.GetOutputLayers().end())
1504  {
1505  throw MemoryImportException(fmt::format("ImportOutputs: Memory Import failed, unknown LayerBindingId: {}",
1506  layerBindingId));
1507  }
1508 
1509  const Layer* layer = *it;
1510  if (layer->GetType() != LayerType::Output)
1511  {
1512  throw InvalidArgumentException("ImportOutputs: given layer not an OutputLayer");
1513  }
1514 
1515  auto& backend = m_Backends.at(layer->GetBackendId());
1516  if (!HasCapability(BackendOptions::BackendOption{"PreImportIOTensors", true}, backend->GetCapabilities()))
1517  {
1518  std::string er = backend->GetId();
1519  er += " does not have PreImportIOTensors capability";
1520  throw BackendCapabilityException(er);
1521  }
1522 
1523  const InputSlot& inputSlot = layer->GetInputSlots()[0];
1525  const TensorInfo& tensorInfo = inputSlot.GetConnectedOutputSlot()->GetTensorInfo();
1526 
1527  ITensorHandleFactory* handleFactory = m_TensorHandleFactoryRegistry.GetFactory(factoryId);
1528  ARMNN_ASSERT(handleFactory);
1529 
1530  ImportedTensorHandlePin importedTensorHandlePin{layerBindingId,
1531  handleFactory->CreateTensorHandle(tensorInfo, false)};
1532 
1533  ITensorHandle* tensorHandle = importedTensorHandlePin.m_TensorHandle.get();
1534 
1535  if (!CheckFlag(tensorHandle->GetImportFlags(), m_NetworkProperties.m_OutputSource))
1536  {
1537  throw MemoryImportException(fmt::format("ImportInputs: Memory Import failed, backend: "
1538  "{} does not support importing from source {}"
1539  , factoryId, m_NetworkProperties.m_OutputSource));
1540  }
1541 
1542  if (tensorHandle->Import(outputTensor.second.GetMemoryArea(), m_NetworkProperties.m_OutputSource))
1543  {
1544  importedOutputs.push_back(m_CurImportedOutputId++);
1545  }
1546  else
1547  {
1548  throw MemoryImportException("ImportInputs: Memory Import failed");
1549  }
1550 
1551  m_PreImportedOutputHandles.push_back(std::move(importedTensorHandlePin));
1552  }
1553 
1554  return importedOutputs;
1555 }
1556 
1557 void LoadedNetwork::ClearImportedInputs(const std::vector<ImportedInputId> inputIds)
1558 {
1559  for (auto id : inputIds)
1560  {
1561  if (id > m_PreImportedInputHandles.size())
1562  {
1563  throw InvalidArgumentException(fmt::format("ClearImportedInputs::Unknown ImportedInputId: {}", id));
1564  }
1565 
1566  auto& importedTensorHandle = m_PreImportedInputHandles[id].m_TensorHandle;
1567  if (!importedTensorHandle)
1568  {
1570  fmt::format("ClearImportedInputs::ImportedInput with id: {} has already been deleted", id));
1571  }
1572  // Call Unimport then destroy the tensorHandle
1573  importedTensorHandle->Unimport();
1574  importedTensorHandle = {};
1575  }
1576 }
1577 
1578 void LoadedNetwork::ClearImportedOutputs(const std::vector<ImportedOutputId> outputIds)
1579 {
1580  for (auto id : outputIds)
1581  {
1582  if (id > m_PreImportedOutputHandles.size())
1583  {
1584  throw InvalidArgumentException(fmt::format("ClearImportedOutputs::Unknown ImportedOutputId: {}", id));
1585  }
1586 
1587  auto& importedTensorHandle = m_PreImportedOutputHandles[id].m_TensorHandle;
1588  if (!importedTensorHandle)
1589  {
1591  fmt::format("ClearImportedOutputs::ImportedOutput with id: {} has already been deleted", id));
1592  }
1593  // Call Unimport then destroy the tensorHandle
1594  importedTensorHandle->Unimport();
1595  importedTensorHandle = {};
1596  }
1597 }
1598 
1600  const OutputTensors& outputTensors,
1601  IWorkingMemHandle& iWorkingMemHandle,
1602  std::vector<ImportedInputId> preImportedInputs,
1603  std::vector<ImportedOutputId> preImportedOutputs)
1604 {
1605  const Graph& graph = m_OptimizedNetwork->pOptimizedNetworkImpl->GetGraph();
1606 
1607  if (inputTensors.size() + preImportedInputs.size() != graph.GetNumInputs())
1608  {
1609  if (preImportedInputs.empty())
1610  {
1611  throw InvalidArgumentException("LoadedNetwork::Execute: Number of inputs provided does not match network.");
1612  }
1613  else
1614  {
1615  throw InvalidArgumentException("LoadedNetwork::Execute: "
1616  "Number of inputs + preImportedInputs provided does not match network.");
1617  }
1618  }
1619 
1620  if (outputTensors.size() + preImportedOutputs.size() != graph.GetNumOutputs())
1621  {
1622  if (preImportedOutputs.empty())
1623  {
1624  throw InvalidArgumentException("LoadedNetwork::Execute: "
1625  "Number of outputs provided does not match network.");
1626  }
1627  else
1628  {
1629  throw InvalidArgumentException("LoadedNetwork::Execute: "
1630  "Number of outputs + preImportedOutputs provided does not match network.");
1631  }
1632  }
1633 
1634  WorkingMemHandle& workingMemHandle = dynamic_cast<WorkingMemHandle&>(iWorkingMemHandle);
1635  // Collect all the given LayerBindingIds and check them for duplicates and unknowns.
1636  std::vector<LayerBindingId>& bindingIds = workingMemHandle.GetBindingIdVector();
1637  unsigned int index = 0;
1638  for (auto pair : inputTensors)
1639  {
1640  bindingIds[index++] = pair.first;
1641  }
1642  for (ImportedInputId id : preImportedInputs)
1643  {
1644  bindingIds[index++] = ValidateImportedInputID(id);
1645  }
1646  for (auto pair : outputTensors)
1647  {
1648  bindingIds[index++] = pair.first;
1649  }
1650  for (ImportedOutputId id : preImportedOutputs)
1651  {
1652  bindingIds[index++] = ValidateImportedOutputID(id);
1653  }
1654 
1655  workingMemHandle.ValidateBindingIds();
1656 
1657  auto resetMemHandle = [&]()
1658  {
1659  for (ImportedInputId id: preImportedInputs)
1660  {
1661  const LayerBindingId layerBindingId = m_PreImportedInputHandles[id].m_LayerBindingId;
1662 
1663  auto inputHandle = workingMemHandle.GetInputHandle(layerBindingId);
1664  auto inputConnections = workingMemHandle.GetInputConnections(layerBindingId);
1665  for (auto it : inputConnections)
1666  {
1667  *it = inputHandle;
1668  }
1669  }
1670 
1671  for (ImportedOutputId id: preImportedOutputs)
1672  {
1673  const LayerBindingId layerBindingId = m_PreImportedOutputHandles[id].m_LayerBindingId;
1674 
1675  auto outputHandle = workingMemHandle.GetOutputHandle(layerBindingId);
1676  auto outputConnections = workingMemHandle.GetOutputConnection(layerBindingId);
1677 
1678  for (auto it : outputConnections)
1679  {
1680  *it = outputHandle;
1681  }
1682  }
1683  };
1684 
1685  std::unique_ptr<profiling::TimelineUtilityMethods> timelineUtils =
1687  profiling::ProfilingGuid inferenceGuid = m_ProfilingService.GetNextGuid();
1688  if (timelineUtils)
1689  {
1690  // Add inference timeline trace if profiling is enabled.
1691  profiling::ProfilingGuid networkGuid = m_OptimizedNetwork->GetGuid();
1692  timelineUtils->CreateTypedEntity(inferenceGuid, profiling::LabelsAndEventClasses::INFERENCE_GUID);
1693  timelineUtils->CreateRelationship(profiling::ProfilingRelationshipType::RetentionLink,
1694  networkGuid,
1695  inferenceGuid,
1696  profiling::LabelsAndEventClasses::EXECUTION_OF_GUID);
1697  timelineUtils->RecordEvent(inferenceGuid, profiling::LabelsAndEventClasses::ARMNN_PROFILING_SOL_EVENT_CLASS);
1698  }
1699 
1700  bool executionSucceeded = true;
1701 
1702  if (timelineUtils)
1703  {
1704  // Add end of life of the inference timeline if profiling is enabled.
1705  timelineUtils->RecordEvent(inferenceGuid, profiling::LabelsAndEventClasses::ARMNN_PROFILING_EOL_EVENT_CLASS);
1706  timelineUtils->Commit();
1707  }
1708 
1709  if (!workingMemHandle.IsAllocated())
1710  {
1711  workingMemHandle.Allocate();
1712  }
1713 
1714  {
1716  for (auto pair : inputTensors)
1717  {
1718  EnqueueInput(pair.second, workingMemHandle.GetInputHandle(pair.first));
1719  }
1720 
1721  // Swap in the pre-imported inputs if any
1722  for (ImportedInputId id : preImportedInputs)
1723  {
1724  const ImportedTensorHandlePin& importedInputPin = m_PreImportedInputHandles[id];
1725  const LayerBindingId layerBindingId = m_PreImportedInputHandles[id].m_LayerBindingId;
1726  const auto& preimportedHandle = importedInputPin.m_TensorHandle;
1727 
1728  auto inputConnections = workingMemHandle.GetInputConnections(layerBindingId);
1729  for (auto it : inputConnections)
1730  {
1731  *it = preimportedHandle.get();
1732  }
1733  }
1734  }
1735  {
1737  if (m_NetworkProperties.m_ExportEnabled)
1738  {
1739  for (auto pair: outputTensors)
1740  {
1741  ImportOutputTensor(pair.second, workingMemHandle.GetOutputHandle(pair.first));
1742  }
1743  }
1744 
1745  for (ImportedOutputId id : preImportedOutputs)
1746  {
1747  const ImportedTensorHandlePin& importedOutputPin = m_PreImportedOutputHandles[id];
1748  const LayerBindingId layerBindingId = m_PreImportedOutputHandles[id].m_LayerBindingId;
1749  const auto& preimportedHandle = importedOutputPin.m_TensorHandle;
1750 
1751  auto outputConnections = workingMemHandle.GetOutputConnection(layerBindingId);
1752 
1753  for (auto it : outputConnections)
1754  {
1755  *it = preimportedHandle.get();
1756  }
1757  }
1758  }
1759 
1760  auto Fail = [&](const std::exception& error)
1761  {
1762  ARMNN_LOG(error) << "An error occurred attempting to execute a workload: " << error.what();
1763  executionSucceeded = false;
1764  };
1765  profiling::ProfilingDynamicGuid workloadInferenceID(0);
1766 
1767  try
1768  {
1769  for (unsigned int i = 0; i < m_WorkloadQueue.size(); ++i)
1770  {
1771  auto& workload = m_WorkloadQueue[i];
1772  if (timelineUtils)
1773  {
1774  workloadInferenceID = timelineUtils->RecordWorkloadInferenceAndStartOfLifeEvent(workload->GetGuid(),
1775  inferenceGuid);
1776  }
1777  workload->ExecuteAsync(workingMemHandle.GetWorkingMemDescriptorAt(i));
1778 
1779  if (timelineUtils)
1780  {
1781  timelineUtils->RecordEndOfLifeEvent(workloadInferenceID);
1782  }
1783  }
1784  }
1785  catch (const RuntimeException& error)
1786  {
1787  resetMemHandle();
1788  Fail(error);
1789  }
1790  catch (const std::runtime_error& error)
1791  {
1792  resetMemHandle();
1793  Fail(error);
1794  }
1795  catch (...)
1796  {
1797  resetMemHandle();
1798  throw;
1799  }
1800 
1801  if (!m_NetworkProperties.m_ExportEnabled)
1802  {
1803  for (auto pair: outputTensors)
1804  {
1805  CopyToOutputTensor(pair.second, workingMemHandle.GetOutputHandle(pair.first));
1806  }
1807  }
1808  else
1809  {
1810  ARMNN_SCOPED_PROFILING_EVENT(Compute::Undefined, "SyncMemGeneric_Execute");
1811  workingMemHandle.MemSyncOutputs();
1812  }
1813 
1814  resetMemHandle();
1815 
1816  return executionSucceeded ? Status::Success : Status::Failure;
1817 }
1818 
1819 /// Create a new unique WorkingMemHandle object. Create multiple handles if you wish to have
1820 /// overlapped Execution by calling this function from different threads.
1821 std::unique_ptr<IWorkingMemHandle> LoadedNetwork::CreateWorkingMemHandle(NetworkId networkId)
1822 {
1823  Graph& order = m_OptimizedNetwork->pOptimizedNetworkImpl->GetGraph();
1824 
1825  // Tensors that will need to be allocated internally within armnn
1826  std::vector<std::unique_ptr<ITensorHandle>> managedTensorHandles;
1827  // Tensors that will be allocated externally by the user
1828  std::vector<std::unique_ptr<ITensorHandle>> unmanagedTensorHandles;
1829 
1830  std::vector<WorkingMemDescriptor> workingMemDescriptors;
1831  std::unordered_map<LayerGuid, WorkingMemDescriptor> workingMemDescriptorMap;
1832 
1833  auto GetTensorHandle = [&](Layer* layer, const OutputSlot& outputSlot)
1834  {
1835  ITensorHandleFactory::FactoryId factoryId = outputSlot.GetTensorHandleFactoryId();
1836  const TensorInfo& tensorInfo = outputSlot.GetTensorInfo();
1837 
1838  if (factoryId == ITensorHandleFactory::LegacyFactoryId)
1839  {
1840  BackendId id = layer->GetBackendId();
1842  return m_WorkloadFactories.at(id)->CreateTensorHandle(tensorInfo, false);
1844  }
1845  else
1846  {
1847  ITensorHandleFactory* handleFactory = m_TensorHandleFactoryRegistry.GetFactory(factoryId);
1848  ARMNN_ASSERT(handleFactory);
1849  return handleFactory->CreateTensorHandle(tensorInfo, false);
1850  }
1851  };
1852 
1853  struct HandleInfo
1854  {
1855  ITensorHandle* m_TensorHandle;
1856 
1857  bool m_IsInputLayerHandle = false;
1858  bool m_IsOutputLayerHandle = false;
1859 
1860  WorkingMemHandle::InputMemDescriptorCoords m_InputMemDescriptorCoords;
1861  WorkingMemHandle::OutputMemDescriptorCoords m_OutputMemDescriptorCoords;
1862  };
1863 
1864  std::unordered_map<const OutputSlot*, HandleInfo> outputToHandleInfoMap;
1865 
1866  unsigned int layerIndex = 0;
1867  for (auto&& layer : order)
1868  {
1869  // Constant layers execution and management is handled during loaded network construction
1870  if (layer->GetType() == LayerType::Constant)
1871  {
1872  continue;
1873  }
1874 
1875  WorkingMemDescriptor workingMemDescriptor;
1876 
1877  bool isMemoryManaged = true;
1878  bool isInputLayer = false;
1879  bool isOutputLayer = false;
1880  bool isConnectedToOutputLayer = false;
1881 
1882  if (layer->GetType() == LayerType::Input || layer->GetType() == LayerType::MemImport)
1883  {
1884  // Input layers/workloads will not be executed so the descriptor is not added to workingMemDescriptors
1885  // However we will still need to manage the tensorHandle
1886  isInputLayer = true;
1887  isMemoryManaged = !m_NetworkProperties.m_ImportEnabled;
1888  }
1889  else if (layer->GetType() == LayerType::Output)
1890  {
1891  isOutputLayer = true;
1892  }
1893 
1894  unsigned int slotIndex = 0;
1895  // Create a tensor handle for each output slot of a layer
1896  // Once we create it, we start managing its lifetime
1897  for (auto& slot : layer->GetOutputSlots())
1898  {
1899  for (unsigned int i = 0; i < slot.GetNumConnections(); ++i)
1900  {
1901  if ((slot.GetConnection(i)->GetOwningLayer().GetType() == LayerType::Output))
1902  {
1903  if (!isConnectedToOutputLayer)
1904  {
1905  isConnectedToOutputLayer = true;
1906  // If Export is enabled disable memory management, so we can export, otherwise we do a copy
1907  isMemoryManaged = !m_NetworkProperties.m_ExportEnabled;
1908  }
1909  else
1910  {
1911  // Importing in this case would likely cause unexpected behaviour, so we disallow it.
1912  ARMNN_LOG(warning) <<
1913  fmt::format("Layer name: '{0}' guid: '{1}' has two or more OutputLayers connected to it. "
1914  "This will prevent importing on the connected OutputLayers.",
1915  layer->GetName(), layer->GetGuid());
1916  isMemoryManaged = true;
1917  }
1918  }
1919  }
1920 
1921  ITensorHandle* tensorHandle;
1922  if (isMemoryManaged)
1923  {
1924  managedTensorHandles.emplace_back(GetTensorHandle(layer, slot));
1925  tensorHandle = managedTensorHandles.back().get();
1926  }
1927  else
1928  {
1929  unmanagedTensorHandles.emplace_back(GetTensorHandle(layer, slot));
1930  tensorHandle = unmanagedTensorHandles.back().get();
1931  }
1932 
1933  workingMemDescriptor.m_Outputs.push_back(tensorHandle);
1934 
1935  HandleInfo& handleInfo = outputToHandleInfoMap[&slot];
1936  handleInfo.m_TensorHandle = tensorHandle;
1937 
1938  // Store the coordinates of the current layer's OutputSlot that is connected to the OutputLayer
1939  if (isConnectedToOutputLayer)
1940  {
1941  handleInfo.m_IsOutputLayerHandle = true;
1942  handleInfo.m_OutputMemDescriptorCoords.m_OutputSlotCoords = {layerIndex, slotIndex};
1943  }
1944  // Store the LayerBindingId of the InputLayer
1945  if (isInputLayer)
1946  {
1947  handleInfo.m_IsInputLayerHandle = true;
1948  LayerBindingId bindingId = static_cast<BindableLayer*>(layer)->GetBindingId();
1949  handleInfo.m_InputMemDescriptorCoords.m_LayerBindingId = bindingId;
1950  }
1951  slotIndex++;
1952  }
1953  // Loop through the input slots in the same layer and decrement the reference counter associated
1954  // to each tensor handle we encounter.
1955  // Once it reaches zero, the lifetime of the tensor handle has ended, and we mark its memory as available
1956  // so that the next tensor handle with a non overlapping lifetime can share its memory.
1957  for (auto& slot : layer->GetInputSlots())
1958  {
1959  ARMNN_ASSERT(slot.GetConnection());
1960  auto outputSlot = slot.GetConnectedOutputSlot();
1961  auto key = outputSlot->GetOwningLayer().GetGuid();
1962 
1963  // Constant layers execution and management is handled during loaded network construction
1964  auto found = m_ConstantTensorHandles.find(key);
1965  if (found != m_ConstantTensorHandles.end())
1966  {
1967  ITensorHandle* tensorHandle = found->second;
1968  workingMemDescriptor.m_Inputs.push_back(tensorHandle);
1969 
1970  // Odd case where a constant layer is connected to an output layer
1971  // We will need to create a HandleInfo to track it
1972  if (isOutputLayer)
1973  {
1974  LayerBindingId bindingId = static_cast<BindableLayer*>(layer)->GetBindingId();
1975 
1976  HandleInfo& handleInfo = outputToHandleInfoMap[outputSlot];
1977  handleInfo.m_TensorHandle = tensorHandle;
1978  handleInfo.m_IsOutputLayerHandle = true;
1979  handleInfo.m_OutputMemDescriptorCoords.m_LayerBindingIds.push_back(bindingId);
1980  handleInfo.m_OutputMemDescriptorCoords.m_InputSlotCoords.push_back({layerIndex, 0});
1981  }
1982  continue;
1983  }
1984 
1985  HandleInfo& handleInfo = outputToHandleInfoMap.at(outputSlot);
1986 
1987  ITensorHandle* inputTensorHandle = handleInfo.m_TensorHandle;
1988  workingMemDescriptor.m_Inputs.push_back(inputTensorHandle);
1989 
1990  // Store the LayerBindingId of the OutputLayer
1991  if (isOutputLayer)
1992  {
1993  LayerBindingId bindingId = static_cast<BindableLayer*>(layer)->GetBindingId();
1994  handleInfo.m_OutputMemDescriptorCoords.m_LayerBindingIds.push_back(bindingId);
1995  handleInfo.m_OutputMemDescriptorCoords.m_InputSlotCoords.push_back({layerIndex, 0});
1996  }
1997  // In this case the layer is not an Output Layer but shares its input tensorhandle with an OutputLayer
1998  // It will need to be updated as well, if we swap out the tensorhandle
1999  else if (handleInfo.m_IsOutputLayerHandle)
2000  {
2001  handleInfo.m_OutputMemDescriptorCoords.m_InputSlotCoords.push_back({layerIndex, slot.GetSlotIndex()});
2002  }
2003 
2004  // Store the coordinates of the InputSlots connected to the InputLayer
2005  // There can be more than one InputSlot connected to an InputLayer, so we use a vector
2006  if (handleInfo.m_IsInputLayerHandle)
2007  {
2008  std::pair<LayerGuid, unsigned int> connectionLocation{layerIndex, slot.GetSlotIndex()};
2009  handleInfo.m_InputMemDescriptorCoords.m_InputSlotCoords.emplace_back(connectionLocation);
2010  }
2011  }
2012  workingMemDescriptorMap.insert({layer->GetGuid(), workingMemDescriptor});
2013 
2014  // Input/Output layers/workloads will not be executed, so the descriptor is not added to workingMemDescriptors
2015  // However we will still need to manage the tensorHandle
2016  if (!isInputLayer)
2017  {
2018  workingMemDescriptors.push_back(workingMemDescriptor);
2019  layerIndex++;
2020  }
2021  }
2022 
2023  std::vector<std::pair<std::shared_ptr<TensorMemory>, MemorySource>> tensorMemory;
2024 
2025  auto externalMemoryManager = CreateExternalMemoryManger(tensorMemory);
2026 
2027  // Sort m_TensorMemory, so it's order matches the outputSlot order
2028  std::sort(tensorMemory.begin(), tensorMemory.end(),
2029  [](const std::pair<std::shared_ptr<TensorMemory>, MemorySource>& lhs,
2030  const std::pair<std::shared_ptr<TensorMemory>, MemorySource>& rhs)
2031  {
2032  return lhs.first->m_OutputSlotId < rhs.first->m_OutputSlotId;
2033  });
2034 
2035  std::vector<WorkingMemHandle::InputMemDescriptorCoords> inputConnectionsInfo;
2036  std::vector<WorkingMemHandle::OutputMemDescriptorCoords> outputConnectionsInfo;
2037 
2038  for (const auto& handleInfo: outputToHandleInfoMap)
2039  {
2040  if (handleInfo.second.m_IsOutputLayerHandle)
2041  {
2042  outputConnectionsInfo.emplace_back(handleInfo.second.m_OutputMemDescriptorCoords);
2043  }
2044 
2045  if (handleInfo.second.m_IsInputLayerHandle)
2046  {
2047  inputConnectionsInfo.emplace_back(handleInfo.second.m_InputMemDescriptorCoords);
2048  }
2049  }
2050 
2051  return std::make_unique<WorkingMemHandle>(networkId,
2052  inputConnectionsInfo,
2053  outputConnectionsInfo,
2054  workingMemDescriptors,
2055  workingMemDescriptorMap,
2056  std::move(externalMemoryManager),
2057  std::move(tensorMemory),
2058  std::move(managedTensorHandles),
2059  std::move(unmanagedTensorHandles));
2060 }
2061 
2063 {
2064  for (auto&& workloadPtr: m_WorkloadQueue)
2065  {
2066  workloadPtr.get()->RegisterDebugCallback(func);
2067  }
2068 }
2069 
2070 
2071 void LoadedNetwork::CreateMemoryProfileAsync()
2072 {
2073  struct PartialBlock
2074  {
2075  unsigned int m_StartOfLife;
2076  unsigned int m_Lifetime;
2077 
2078  size_t m_MemSize;
2079  unsigned int m_Index;
2080 
2081  BackendId m_BackendId;
2082  };
2083 
2084  auto align = [](size_t numToAlign)
2085  {
2086  const size_t alignment = sizeof(float);
2087  return ((numToAlign + alignment - 1) / alignment) * alignment;
2088  };
2089 
2090  std::unordered_map<const OutputSlot*, PartialBlock> memBlockTrackerMap;
2091 
2092  const bool inputImportingEnabled = m_NetworkProperties.m_InputSource != MemorySource::Undefined;
2093  const bool outputImportingEnabled = m_NetworkProperties.m_OutputSource != MemorySource::Undefined;
2094 
2095  unsigned int timestep = 0;
2096  unsigned int outputIndex = 0;
2097  Graph& order = m_OptimizedNetwork->pOptimizedNetworkImpl->GetGraph().TopologicalSort();
2098 
2099  for (auto&& layer : order)
2100  {
2101  const LayerType& layerType = layer->GetType();
2102  // Don't manage memory if importing.
2103  if (layerType == LayerType::Input && inputImportingEnabled)
2104  {
2105  continue;
2106  }
2107  // Don't manage memory if importing.
2108  if (layerType == LayerType::Output && outputImportingEnabled
2109  && layer->GetInputSlot(0).GetConnectedOutputSlot()->GetNumConnections() == 1)
2110  {
2111  continue;
2112  }
2113  // Because Constant Layer memory can not be shared, the memory must persist for the lifetime of execution,
2114  // management is done separately.
2115  if (layerType == LayerType::Constant)
2116  {
2117  continue;
2118  }
2119 
2120  BackendId backendId = layer->GetBackendId();
2121  for (auto& outputSlot : layer->GetOutputSlots())
2122  {
2123  if (!m_SupportsExternallyManagedMemory[backendId])
2124  {
2125  continue;
2126  }
2127 
2128  PartialBlock partialBlock;
2129 
2130  partialBlock.m_StartOfLife = timestep;
2131 
2132  size_t alignedSize = align(outputSlot.GetOutputHandler().GetTensorInfo().GetNumBytes());
2133  partialBlock.m_MemSize = alignedSize;
2134  partialBlock.m_Index = outputIndex++;
2135  partialBlock.m_Lifetime = outputSlot.GetNumConnections();
2136  partialBlock.m_BackendId = backendId;
2137 
2138  if (partialBlock.m_Lifetime == 0)
2139  {
2140  m_MemBlockMap[partialBlock.m_BackendId].emplace_back(partialBlock.m_StartOfLife,
2141  partialBlock.m_StartOfLife,
2142  partialBlock.m_MemSize,
2143  0,
2144  partialBlock.m_Index);
2145  }
2146  else
2147  {
2148  memBlockTrackerMap[&outputSlot] = partialBlock;
2149  }
2150  }
2151 
2152  for (auto& inputSlot : layer->GetInputSlots())
2153  {
2154  const Layer& connectedInputLayer = inputSlot.GetConnectedOutputSlot()->GetOwningLayer();
2155  const LayerType& owningLayerType = connectedInputLayer.GetType();
2156 
2157  if (owningLayerType == LayerType::Constant)
2158  {
2159  continue;
2160  }
2161  if (inputImportingEnabled && owningLayerType == LayerType::Input)
2162  {
2163  continue;
2164  }
2165 
2166  auto outputSlot = inputSlot.GetConnectedOutputSlot();
2167 
2168  PartialBlock& partialBlock = memBlockTrackerMap.at(outputSlot);
2169 
2170  auto& lifetime = partialBlock.m_Lifetime;
2171  --lifetime;
2172 
2173  if (lifetime == 0)
2174  {
2175  m_MemBlockMap[partialBlock.m_BackendId].emplace_back(partialBlock.m_StartOfLife,
2176  timestep,
2177  partialBlock.m_MemSize,
2178  0,
2179  partialBlock.m_Index);
2180  }
2181  }
2182  ++timestep;
2183  }
2184 }
2185 
2186 void LoadedNetwork::CreateMemoryProfile()
2187 {
2188  // Finds the first TensorHandle ancestor of a SubTensorHandle. If the ITensorHandle provided
2189  // is a TensorHandle, the function just returns it
2190  auto TraceSubTensorHandleAncestry = [](ITensorHandle* const subTensorHandle)
2191  {
2192  ITensorHandle* ancestor = subTensorHandle;
2193  while (ancestor && ancestor->GetParent())
2194  {
2195  ancestor = ancestor->GetParent();
2196  }
2197  return ancestor;
2198  };
2199 
2200  struct PartialBlock
2201  {
2202  unsigned int m_StartOfLife;
2203  unsigned int m_Lifetime;
2204 
2205  size_t m_MemSize;
2206  unsigned int m_Index;
2207 
2208  BackendId m_BackendId;
2209  };
2210 
2211  auto align = [](size_t numToAlign)
2212  {
2213  const size_t alignment = sizeof(float);
2214  return ((numToAlign + alignment - 1) / alignment) * alignment;
2215  };
2216 
2217  std::unordered_map<ITensorHandle*, PartialBlock> memBlockTrackerMap;
2218 
2219  const bool inputImportingEnabled = m_NetworkProperties.m_InputSource != MemorySource::Undefined;
2220  const bool outputImportingEnabled = m_NetworkProperties.m_OutputSource != MemorySource::Undefined;
2221 
2222  unsigned int timestep = 0;
2223  unsigned int outputIndex = 0;
2224  Graph& order = m_OptimizedNetwork->pOptimizedNetworkImpl->GetGraph().TopologicalSort();
2225 
2226  for (auto&& layer : order)
2227  {
2228  const LayerType& layerType = layer->GetType();
2229  // Don't manage memory if importing.
2230  if (layerType == LayerType::Input && inputImportingEnabled)
2231  {
2232  continue;
2233  }
2234  // Don't manage memory if importing.
2235  if (layerType == LayerType::Output && outputImportingEnabled
2236  && layer->GetInputSlot(0).GetConnectedOutputSlot()->GetNumConnections() == 1)
2237  {
2238  continue;
2239  }
2240  // Because Constant Layer memory can not be shared, the memory must persist for the lifetime of execution,
2241  // management is done separately.
2242  if (layerType == LayerType::Constant)
2243  {
2244  continue;
2245  }
2246 
2247  BackendId backendId = layer->GetBackendId();
2248  for (auto& outputSlot : layer->GetOutputSlots())
2249  {
2250  if (!m_SupportsExternallyManagedMemory[backendId])
2251  {
2252  continue;
2253  }
2254 
2255  ITensorHandle* tensorHandle = outputSlot.GetOutputHandler().GetData();
2256  tensorHandle = TraceSubTensorHandleAncestry(tensorHandle);
2257 
2258  if (memBlockTrackerMap.find(tensorHandle) == memBlockTrackerMap.end())
2259  {
2260  PartialBlock partialBlock;
2261 
2262  partialBlock.m_StartOfLife = timestep;
2263 
2264  size_t alignedSize = align(outputSlot.GetOutputHandler().GetTensorInfo().GetNumBytes());
2265  partialBlock.m_MemSize = alignedSize;
2266  partialBlock.m_Index = outputIndex++;
2267  partialBlock.m_Lifetime = outputSlot.GetNumConnections();
2268  partialBlock.m_BackendId = backendId;
2269 
2270  if (partialBlock.m_Lifetime == 0)
2271  {
2272  m_MemBlockMap[partialBlock.m_BackendId].emplace_back(partialBlock.m_StartOfLife,
2273  partialBlock.m_StartOfLife,
2274  partialBlock.m_MemSize,
2275  0,
2276  partialBlock.m_Index);
2277  }
2278  else
2279  {
2280  memBlockTrackerMap[tensorHandle] = partialBlock;
2281  }
2282  m_Tensorhandles.push_back(tensorHandle);
2283 
2284  }
2285  else
2286  {
2287  memBlockTrackerMap.at(tensorHandle).m_Lifetime += outputSlot.GetNumConnections();
2288  }
2289  }
2290 
2291  for (auto& inputSlot : layer->GetInputSlots())
2292  {
2293  const Layer& connectedInputLayer = inputSlot.GetConnectedOutputSlot()->GetOwningLayer();
2294  const LayerType& owningLayerType = connectedInputLayer.GetType();
2295 
2296  if (owningLayerType == LayerType::Constant)
2297  {
2298  continue;
2299  }
2300  if (inputImportingEnabled && owningLayerType == LayerType::Input)
2301  {
2302  continue;
2303  }
2304  if (!m_SupportsExternallyManagedMemory[connectedInputLayer.GetBackendId()])
2305  {
2306  continue;
2307  }
2308 
2309  auto outputSlot = inputSlot.GetConnectedOutputSlot();
2310 
2311  ITensorHandle* tensorHandle = outputSlot->GetOutputHandler().GetData();
2312  tensorHandle = TraceSubTensorHandleAncestry(tensorHandle);
2313 
2314  PartialBlock& partialBlock = memBlockTrackerMap.at(tensorHandle);
2315 
2316  auto& lifetime = partialBlock.m_Lifetime;
2317  --lifetime;
2318 
2319  if (lifetime == 0)
2320  {
2321  m_MemBlockMap[partialBlock.m_BackendId].emplace_back(partialBlock.m_StartOfLife,
2322  timestep,
2323  partialBlock.m_MemSize,
2324  0,
2325  partialBlock.m_Index);
2326  }
2327  }
2328  ++timestep;
2329  }
2330 
2331 }
2332 
2333 std::unique_ptr<MemoryManager> LoadedNetwork::CreateExternalMemoryManger(
2334  std::vector<std::pair<std::shared_ptr<TensorMemory>, MemorySource>>& tensorMemoryVec)
2335 {
2336  std::unique_ptr<MemoryManager> memoryManager = std::make_unique<MemoryManager>();
2337  auto allocatorMap = BackendRegistryInstance().GetAllocators();
2338 
2339  for (auto& backend : m_MemBinMap)
2340  {
2341  std::vector<BufferStorage> bufferStorageVec;
2342 
2343  std::shared_ptr<ICustomAllocator> backendAllocator;
2344  if (allocatorMap.find(backend.first) != allocatorMap.end())
2345  {
2346  backendAllocator = allocatorMap[backend.first];
2347  }
2348  else
2349  {
2350  backendAllocator = m_Backends[backend.first]->GetDefaultAllocator();
2351  }
2352 
2353  for (auto& memBin : backend.second)
2354  {
2355  BufferStorage bufferStorage;
2356  bufferStorage.m_BufferSize = memBin.m_MemSize;
2357  bufferStorage.m_TensorMemoryVector.reserve(memBin.m_MemBlocks.size());
2358 
2359  for (auto& memBlock : memBin.m_MemBlocks)
2360  {
2361  auto tensorMemory = std::make_shared<TensorMemory>(TensorMemory{memBlock.m_Offset, memBlock.m_Index});
2362 
2363  tensorMemoryVec.emplace_back(tensorMemory, backendAllocator->GetMemorySourceType());
2364  bufferStorage.m_TensorMemoryVector.emplace_back(tensorMemory);
2365  }
2366 
2367  bufferStorageVec.emplace_back(std::move(bufferStorage));
2368  }
2369 
2370  memoryManager->StoreMemToAllocate(bufferStorageVec, backendAllocator, 4);
2371  }
2372 
2373  return memoryManager;
2374 }
2375 
2376 LayerBindingId LoadedNetwork::ValidateImportedInputID(ImportedInputId id)
2377 {
2378  try
2379  {
2380  const auto& importedTensorHandlePin = m_PreImportedInputHandles.at(id);
2381  if (!importedTensorHandlePin.m_TensorHandle)
2382  {
2383  throw InvalidArgumentException(fmt::format("LoadedNetwork::Execute:"
2384  "PreImportedInput: {} has been deleted", id));
2385  }
2386  return importedTensorHandlePin.m_LayerBindingId;
2387  }
2388  catch (const std::out_of_range&)
2389  {
2390  throw InvalidArgumentException(fmt::format("LoadedNetwork::Execute: Unknown ImportedInputId: {}", id));
2391  }
2392 }
2393 
2394 LayerBindingId LoadedNetwork::ValidateImportedOutputID(ImportedOutputId id)
2395 {
2396  try
2397  {
2398  const auto& importedTensorHandlePin = m_PreImportedOutputHandles.at(id);
2399  if (!importedTensorHandlePin.m_TensorHandle)
2400  {
2401  throw InvalidArgumentException(fmt::format("LoadedNetwork::Execute: "
2402  "PreImportedOutput: {} has been deleted", id));
2403  }
2404  return importedTensorHandlePin.m_LayerBindingId;
2405  }
2406  catch (const std::out_of_range&)
2407  {
2408  throw InvalidArgumentException(fmt::format("LoadedNetwork::Execute: Unknown ImportedOutputId: {}", id));
2409  }
2410 }
2411 
2412 }
Status Execute(const InputTensors &inputTensors, const OutputTensors &outputTensors, IWorkingMemHandle &workingMemHandle, std::vector< ImportedInputId > preImportedInputs={}, std::vector< ImportedOutputId > preImportedOutputs={})
Thread safe execution of the loaded network.
std::vector< std::shared_ptr< TensorMemory > > m_TensorMemoryVector
Vector of pointer to .
std::unique_ptr< IWorkingMemHandle > CreateWorkingMemHandle(NetworkId networkId)
Create a new unique WorkingMemHandle object.
bool HasCapability(const std::string &name, const BackendCapabilities &capabilities)
Convenience function to check if a capability exists in a BackendCapabilites struct.
virtual bool Import(void *memory, MemorySource source)
Import externally allocated memory.
FactoryFunction GetFactory(const BackendId &id) const
ConstIteratorInputs begin() const
Definition: Graph.hpp:65
std::unique_ptr< IWorkloadFactory > IWorkloadFactoryPtr
unsigned int GetNumInputSlots() const override
Returns the number of connectable input slots.
Definition: Layer.hpp:318
static std::unique_ptr< TimelineUtilityMethods > GetTimelineUtils(ProfilingService &profilingService)
static ProfilerManager & GetInstance()
Definition: Profiling.cpp:568
#define ARMNN_NO_DEPRECATE_WARN_BEGIN
Definition: Deprecated.hpp:33
virtual IMemoryManagerUniquePtr CreateMemoryManager() const
LayerBindingId GetBindingId() const
Definition: Layer.hpp:455
virtual unsigned int GetImportFlags() const
Get flags describing supported import sources.
const armnn::Tensor GetOutputTensor(const LayerBindingId layerId, const OutputTensors &outputTensors)
MemoryOptimizerStrategiesMapRef GetMemoryOptimizerStrategies()
unsigned int ImportedOutputId
Definition: Types.hpp:279
WorkingMemDescriptor & GetWorkingMemDescriptorAt(unsigned int id) override
Get the WorkingMemDescriptor at an index.
size_t m_Offset
Number of bytes the value is away from the .m_Buffer.
virtual void Allocate()=0
Indicate to the memory manager that this resource is no longer active.
TensorInfo GetInputTensorInfo(LayerBindingId layerId) const
#define ARMNN_LOG(severity)
Definition: Logging.hpp:205
size_t m_BufferSize
Total size of the buffer.
ITensorHandle * GetOutputHandle(LayerBindingId layerBindingId) const
BackendRegistry & BackendRegistryInstance()
std::vector< std::pair< LayerBindingId, class ConstTensor > > InputTensors
Definition: Tensor.hpp:392
const ProfilingDetailsMethod m_OutputNetworkDetailsMethod
Definition: IRuntime.hpp:60
unsigned int MemorySourceFlags
MemoryType GetMemoryArea() const
Definition: Tensor.hpp:305
size_t GetNumOutputs() const
Definition: Graph.hpp:186
void CopyToOutputTensor(const Tensor &outputTensor, ITensorHandle *outputTensorHandle)
TensorInfo GetOutputTensorInfo(LayerBindingId layerId) const
Copyright (c) 2021 ARM Limited and Contributors.
void IgnoreUnused(Ts &&...)
const std::vector< InputSlot > & GetInputSlots() const
Definition: Layer.hpp:242
std::vector< ImportedInputId > ImportInputs(const InputTensors &inputTensors, MemorySource forceImportMemorySource=MemorySource::Undefined)
std::function< void(LayerGuid guid, unsigned int slotIndex, ITensorHandle *tensorHandle)> DebugCallbackFunction
Define the type of callback for the Debug layer to call.
Definition: Types.hpp:371
unsigned int GetNumOutputSlots() const override
Returns the number of connectable output slots.
Definition: Layer.hpp:319
int LayerBindingId
Type of identifiers for bindable layers (inputs, outputs).
Definition: Types.hpp:277
#define ARMNN_SCOPED_PROFILING_EVENT(backendId, name)
Definition: Profiling.hpp:220
virtual const BackendId & GetId() const =0
ConstIteratorOutputs begin() const
Definition: Graph.hpp:84
A tensor defined by a TensorInfo (shape and data type) and a mutable backing store.
Definition: Tensor.hpp:319
virtual IWorkloadFactoryPtr CreateWorkloadFactory(const IMemoryManagerSharedPtr &memoryManager=nullptr) const =0
unsigned int GetNumConnections() const override
Definition: Layer.hpp:143
const InputSlot & GetInputSlot(unsigned int index) const override
Get a const input slot handle by slot index.
Definition: Layer.hpp:321
std::vector< TensorInfo > m_InputTensorInfos
const std::vector< std::vector< ITensorHandle * >::iterator > & GetOutputConnection(LayerBindingId layerBindingId) const
#define ARMNN_NO_DEPRECATE_WARN_END
Definition: Deprecated.hpp:34
#define ARMNN_ASSERT_MSG(COND, MSG)
Definition: Assert.hpp:15
bool SupportsTensorAllocatorAPI() const
#define ARMNN_SCOPED_HEAP_PROFILING(TAG)
int NetworkId
Definition: IRuntime.hpp:25
A tensor defined by a TensorInfo (shape and data type) and an immutable backing store.
Definition: Tensor.hpp:327
virtual ITensorHandle * GetParent() const =0
Get the parent tensor if this is a subtensor.
std::vector< std::pair< LayerBindingId, class Tensor > > OutputTensors
Definition: Tensor.hpp:393
const std::string & GetNameStr() const
Definition: Layer.hpp:225
LayerType GetType() const override
Returns the armnn::LayerType of this layer.
Definition: Layer.hpp:270
Status
enumeration
Definition: Types.hpp:29
const std::vector< std::vector< ITensorHandle * >::iterator > & GetInputConnections(LayerBindingId layerBindingId) const
#define ARMNN_ASSERT(COND)
Definition: Assert.hpp:14
const OutputSlot * GetConnectedOutputSlot() const
Definition: Layer.hpp:56
void ClearImportedInputs(const std::vector< ImportedInputId > inputIds)
std::vector< TensorInfo > m_OutputTensorInfos
static bool IsLayerSupported(const BackendId &backendId, const IConnectableLayer &layer, Optional< DataType > dataType, std::string &outReasonIfUnsupported)
ITensorHandle * GetData() const
Gets the allocated tensor memory.
std::vector< std::unique_ptr< IWorkload > > WorkloadQueue
const TensorInfo & GetInfo() const
Definition: Tensor.hpp:295
#define CHECK_LOCATION()
Definition: Exceptions.hpp:209
const BackendId & GetBackendId() const
Definition: Layer.hpp:274
void Allocate() override
Allocate the backing memory required for execution.
const std::vector< OutputSlot > & GetOutputSlots() const
Definition: Layer.hpp:243
virtual bool CanBeImported(void *memory, MemorySource source)
Implementations must determine if this memory block can be imported.
OutputLayersAccessor GetOutputLayers() const
Returns a wrapper object with begin(), end() methods to iterate over the output layers in a range-bas...
Definition: Graph.hpp:194
unsigned int ImportedInputId
Definition: Types.hpp:278
Status EnqueueWorkload(const InputTensors &inputTensors, const OutputTensors &outputTensors, std::vector< ImportedInputId > preImportedInputIds={}, std::vector< ImportedOutputId > preImportedOutputIds={})
Single thread execution of the loaded network.
void RegisterProfiler(IProfiler *profiler)
Definition: Profiling.cpp:575
virtual const void * Map(bool blocking=true) const =0
Map the tensor data for access.
std::vector< LayerBindingId > & GetBindingIdVector()
profiling::ProfilingGuid GetNetworkGuid()
std::unordered_map< BackendId, std::shared_ptr< ICustomAllocator > > GetAllocators()
virtual BackendCapabilities GetCapabilities() const
Returns a BackendCapability if the backend lists the capability The BackendCapability must then be in...
virtual void Unmap() const =0
Unmap the tensor data.
bool IsAllocated() override
IsAllocated returns true if the backing memory is currently allocated.
std::vector< ITensorHandle * > m_Outputs
Base class for all ArmNN exceptions so that users can filter to just those.
Definition: Exceptions.hpp:46
const OutputHandler & GetOutputHandler(unsigned int i=0) const
Definition: Layer.hpp:230
std::vector< ImportedOutputId > ImportOutputs(const OutputTensors &outputTensors, MemorySource forceImportMemorySource=MemorySource::Undefined)
MemorySource
Define the Memory Source to reduce copies.
Definition: Types.hpp:217
const std::string & Get() const
Definition: BackendId.hpp:138
void RegisterDebugCallback(const DebugCallbackFunction &func)
ConstIteratorOutputs end() const
Definition: Graph.hpp:90
std::enable_if_t< std::is_unsigned< Source >::value &&std::is_unsigned< Dest >::value, Dest > numeric_cast(Source source)
Definition: NumericCast.hpp:35
Contains information about TensorInfos of a layer.
const char * GetName() const override
Returns the name of the layer.
Definition: Layer.hpp:316
ITensorHandleFactory::FactoryId GetTensorHandleFactoryId() const
Definition: Layer.cpp:179
bool CheckFlag(MemorySourceFlags flags, MemorySource source)
void CopyTensorContentsGeneric(const ITensorHandle *srcTensor, ITensorHandle *dstTensor, CopyFunc copy)
Graph & TopologicalSort()
Sorts layers in topological order and return this.
Definition: Graph.hpp:182
InputLayersAccessor GetInputLayers() const
Returns a wrapper object with begin(), end() methods to iterate over the input layers in a range-base...
Definition: Graph.hpp:190
std::vector< ITensorHandle * > m_Inputs
size_t GetNumLayers() const
Definition: Graph.hpp:196
ConstIteratorInputs end() const
Definition: Graph.hpp:70
const armnn::ConstTensor GetInputTensor(const LayerBindingId layerId, const InputTensors &inputTensors)
const TensorInfo & GetTensorInfo(const ITensorHandle *tensorHandle)
float32 helpers
static std::unique_ptr< LoadedNetwork > MakeLoadedNetwork(std::unique_ptr< IOptimizedNetwork > net, std::string &errorMessage, const INetworkProperties &networkProperties, profiling::ProfilingService &profilingService)
const TensorInfo & GetTensorInfo() const override
Definition: Layer.cpp:66
ITensorHandle * GetInputHandle(LayerBindingId layerBindingId) const
size_t GetNumInputs() const
Definition: Graph.hpp:185
virtual std::unique_ptr< ITensorHandle > CreateTensorHandle(const TensorInfo &tensorInfo) const =0
static const FactoryId LegacyFactoryId
const bool m_ProfilingEnabled
Definition: IRuntime.hpp:58
void ClearImportedOutputs(const std::vector< ImportedOutputId > outputIds)
LayerType
When adding a new layer, adapt also the LastLayer enum value in the enum class LayerType below...
Definition: Types.hpp:458
LayerGuid GetGuid() const final
Returns the unique id of the layer.
Definition: Layer.hpp:327