ArmNN
 21.11
LoadedNetwork.cpp
Go to the documentation of this file.
1 //
2 // Copyright © 2017 Arm Ltd and Contributors. All rights reserved.
3 // SPDX-License-Identifier: MIT
4 //
5 
6 #include "LoadedNetwork.hpp"
7 #include "Layer.hpp"
8 #include "Graph.hpp"
9 #include <Processes.hpp>
10 #include "Profiling.hpp"
11 #include "HeapProfiling.hpp"
12 #include "WorkingMemHandle.hpp"
13 
15 #include <armnn/Logging.hpp>
16 #include <armnn/utility/Assert.hpp>
17 
22 #include <armnn/BackendHelper.hpp>
23 
24 #include <fmt/format.h>
25 
26 namespace armnn
27 {
28 
29 using namespace std;
30 using namespace armnn::profiling;
31 
32 namespace
33 {
34 
35 template <typename ExceptionType>
36 std::string ToErrorMessage(const char * prefix, const ExceptionType & error)
37 {
38  std::stringstream ss;
39  ss << prefix << " " << error.what();
40  return ss.str();
41 }
42 
43 void AddLayerStructure(std::unique_ptr<TimelineUtilityMethods>& timelineUtils,
44  const Layer& layer,
45  ProfilingGuid networkGuid)
46 {
47  // Add layer to the post-optimisation network structure
48  std::string layerName = layer.GetNameStr().empty() ? "<Unnamed>" : layer.GetNameStr();
49  timelineUtils->CreateNamedTypedChildEntity(layer.GetGuid(),
50  networkGuid,
51  layerName,
52  LabelsAndEventClasses::LAYER_GUID);
53  for (auto&& input : layer.GetInputSlots())
54  {
55  const IOutputSlot* source = input.GetConnectedOutputSlot();
56  ARMNN_ASSERT(source != NULL);
57  timelineUtils->CreateConnectionRelationship(ProfilingRelationshipType::RetentionLink,
58  source->GetOwningLayerGuid(),
59  layer.GetGuid());
60  }
61 }
62 
63 void AddWorkloadStructure(std::unique_ptr<TimelineUtilityMethods>& timelineUtils,
64  std::unique_ptr<IWorkload>& workload,
65  const Layer& layer)
66 {
67  // Add workload to the post-optimisation network structure
68  timelineUtils->CreateTypedEntity(workload->GetGuid(), LabelsAndEventClasses::WORKLOAD_GUID);
69  timelineUtils->MarkEntityWithLabel(workload->GetGuid(),
70  layer.GetBackendId().Get(),
71  LabelsAndEventClasses::BACKENDID_GUID);
72 
73  // Link the workload to the layer
74  timelineUtils->CreateRelationship(ProfilingRelationshipType::RetentionLink,
75  layer.GetGuid(),
76  workload->GetGuid(),
77  LabelsAndEventClasses::CHILD_GUID);
78 }
79 
80 } // anonymous
81 
82 std::unique_ptr<LoadedNetwork> LoadedNetwork::MakeLoadedNetwork(std::unique_ptr<IOptimizedNetwork> net,
83  std::string& errorMessage,
84  const INetworkProperties& networkProperties,
85  profiling::ProfilingService& profilingService)
86 {
87  std::unique_ptr<LoadedNetwork> loadedNetwork;
88 
89  auto Fail = [&](const std::exception& error) -> std::unique_ptr<LoadedNetwork>
90  {
91  errorMessage = ToErrorMessage("An error occurred when preparing the network workloads: ", error);
92  ARMNN_LOG(error) << errorMessage;
93 
94  return std::unique_ptr<LoadedNetwork>();
95  };
96 
97  try
98  {
99  loadedNetwork.reset(new LoadedNetwork(std::move(net), networkProperties, profilingService));
100  }
101  catch (const armnn::RuntimeException& error)
102  {
103  return Fail(error);
104  }
105  catch (const armnn::Exception& error)
106  {
107  return Fail(error);
108  }
109  catch (const std::runtime_error& error)
110  {
111  return Fail(error);
112  }
113 
114  return loadedNetwork;
115 }
116 
117 LoadedNetwork::LoadedNetwork(std::unique_ptr<IOptimizedNetwork> net,
118  const INetworkProperties& networkProperties,
119  profiling::ProfilingService& profilingService) :
120  m_OptimizedNetwork(std::move(net)),
121  m_NetworkProperties(networkProperties),
122  m_TensorHandleFactoryRegistry(),
123  m_ProfilingService(profilingService)
124 {
126  // Get the profiler and register it for the current thread.
127  const std::shared_ptr<IProfiler>& profiler = m_OptimizedNetwork->GetProfiler();
129 
130  profiler->EnableProfiling(networkProperties.m_ProfilingEnabled);
131 
132  profiler->EnableNetworkDetailsToStdOut(networkProperties.m_OutputNetworkDetailsMethod);
133 
134  //First create tensor handlers, backends and workload factories.
135  //Handlers are created before workloads are.
136  //Because workload creation can modify some of the handlers,
137  //(for example the splitter and concat layers).
138 
139  bool useExternalMemoryManager = false;
140  bool useInternalMemoryManager = false;
141  Graph& order = m_OptimizedNetwork->pOptimizedNetworkImpl->GetGraph().TopologicalSort();
142  for (auto&& layer : order)
143  {
144  auto const& backendId = layer->GetBackendId();
145  if (m_Backends.count(backendId) == 0)
146  {
147  auto createBackend = BackendRegistryInstance().GetFactory(backendId);
148  auto it = m_Backends.emplace(std::make_pair(backendId, createBackend()));
149 
150  IBackendInternal* backend = it.first->second.get();
151 
152  if (networkProperties.m_AsyncEnabled &&
153  !HasCapability(BackendOptions::BackendOption{"AsyncExecution", true}, backend->GetCapabilities()))
154  {
155  std::string er = backend->GetId();
156  er += " does not support AsyncExecution";
157  throw BackendCapabilityException(er);
158  }
159 
160  if (networkProperties.m_AsyncEnabled &&
161  !HasCapability(BackendOptions::BackendOption{"ExternallyManagedMemory", true},
162  backend->GetCapabilities()))
163  {
164  std::string er = backend->GetId();
165  er += " does not support ExternallyManagedMemory\n";
166  er += "AsyncEnabled networks require all backends to support ExternallyManagedMemory";
167  throw BackendCapabilityException(er);
168  }
169 
170  if (HasCapability(BackendOptions::BackendOption{"ExternallyManagedMemory", true},backend->GetCapabilities())
171  && (m_NetworkProperties.m_ExternalMemoryManagementEnabled || m_NetworkProperties.m_AsyncEnabled))
172  {
173  m_SupportsExternallyManagedMemory[backend->GetId()] = true;
174  useExternalMemoryManager = true;
175  }
176  else
177  {
178  m_SupportsExternallyManagedMemory[backend->GetId()] = false;
179  useInternalMemoryManager = true;
180  }
181 
183  if (backend->SupportsTensorAllocatorAPI())
184  {
185  workloadFactory = backend->CreateWorkloadFactory(
186  m_TensorHandleFactoryRegistry,
187  m_OptimizedNetwork->pOptimizedNetworkImpl->GetModelOptions(),
188  static_cast<MemorySourceFlags>(m_NetworkProperties.m_InputSource),
189  static_cast<MemorySourceFlags>(m_NetworkProperties.m_OutputSource));
190  }
191  else
192  {
193  m_BackendMemoryMangers.emplace_back(backend->CreateMemoryManager());
194  workloadFactory = backend->CreateWorkloadFactory(
195  m_BackendMemoryMangers.back(), m_OptimizedNetwork->pOptimizedNetworkImpl->GetModelOptions());
196  }
197  m_WorkloadFactories[backendId ] = std::move(workloadFactory);
198  }
199  }
200 
201  if (!networkProperties.m_AsyncEnabled)
202  {
203  for (auto&& layer : order)
204  {
205  auto& workloadFactory = GetWorkloadFactory(*layer);
206  bool supportsExternalManager = m_SupportsExternallyManagedMemory[layer->GetBackendId()];
207 
208  switch (layer->GetType())
209  {
210  case LayerType::Input:
212  {
213  // If IsImportEnabled is true then we need to set IsMemoryManaged
214  // to false when creating TensorHandles
215  layer->CreateTensorHandles(m_TensorHandleFactoryRegistry,
216  workloadFactory,
217  !supportsExternalManager && !m_NetworkProperties.m_ImportEnabled);
218  break;
219  }
220  case LayerType::Constant:
221  {
222  layer->CreateTensorHandles(m_TensorHandleFactoryRegistry, workloadFactory, true);
223  break;
224  }
225  default:
226  {
227  // Look for a layer with 1 OutputSlot which has 1 connection and that connection is an Output Layer
228  // If Export is enabled disable memory management so we can export, otherwise we do a copy
229  if ((layer->GetNumOutputSlots() == 1) &&
230  (layer->GetOutputSlots()[0].GetNumConnections() == 1) &&
231  (layer->GetOutputSlots()[0].GetConnection(0)->GetOwningLayer().GetType() == LayerType::Output))
232  {
233  layer->CreateTensorHandles(m_TensorHandleFactoryRegistry,
234  workloadFactory,
235  !supportsExternalManager && !m_NetworkProperties.m_ExportEnabled);
236  }
237  else
238  {
239  layer->CreateTensorHandles(m_TensorHandleFactoryRegistry,
240  workloadFactory,
241  !supportsExternalManager);
242  }
243  }
244  }
245  }
246  }
247 
248  ProfilingGuid networkGuid = m_OptimizedNetwork->GetGuid();
249  std::unique_ptr<TimelineUtilityMethods> timelineUtils =
250  TimelineUtilityMethods::GetTimelineUtils(m_ProfilingService);
251  if (timelineUtils)
252  {
253  timelineUtils->CreateTypedEntity(networkGuid, LabelsAndEventClasses::NETWORK_GUID);
254  // Mark the network with a start of life event
255  timelineUtils->RecordEvent(networkGuid, LabelsAndEventClasses::ARMNN_PROFILING_SOL_EVENT_CLASS);
256  // and with the process ID
257  int processID = armnnUtils::Processes::GetCurrentId();
258  std::stringstream ss;
259  ss << processID;
260  timelineUtils->MarkEntityWithLabel(networkGuid, ss.str(), LabelsAndEventClasses::PROCESS_ID_GUID);
261  }
262 
263  //Then create workloads.
264  {
265  ARMNN_SCOPED_PROFILING_EVENT(Compute::Undefined, "LoadNetwork_CreateWorkloads");
266  for (auto&& layer: order)
267  {
268  if (timelineUtils)
269  {
270  // Add layer to the post-optimisation network structure
271  AddLayerStructure(timelineUtils, *layer, networkGuid);
272  }
273 
274  const IWorkloadFactory& workloadFactory = GetWorkloadFactory(*layer);
275 
276  switch (layer->GetType())
277  {
278  case LayerType::Input:
279  case LayerType::Output:
280  {
281  // Inputs and outputs are treated in a special way - see EnqueueInput() and EnqueueOutput().
282  break;
283  }
284  default:
285  {
286  auto workload = layer->CreateWorkload(workloadFactory);
287 
288  if (!workload)
289  {
290  const char* const layerName =
291  layer->GetNameStr().length() != 0 ? layer->GetName() : "<Unnamed>";
293  fmt::format("No workload created for layer (name: '{0}' type: '{1}') (compute '{2}')",
294  layerName, static_cast<int>(layer->GetType()), layer->GetBackendId().Get()
295  ));
296  }
297 
298  if (timelineUtils)
299  {
300  // Add workload to the post-optimisation network structure
301  AddWorkloadStructure(timelineUtils, workload, *layer);
302  }
303 
304  // For async networks ConstantWorkloads are managed exclusively by LoadedNetwork
305  // and are separated out from the other workloads
306  if((networkProperties.m_AsyncEnabled || useExternalMemoryManager) &&
307  layer->GetType() == LayerType::Constant)
308  {
309  m_ConstantTensorHandles[layer->GetGuid()] =
310  layer->GetOutputSlot(0).GetOutputHandler().GetData();
311  m_ConstantWorkloads[layer->GetGuid()] = std::move(workload);
312  }
313  else
314  {
315  m_WorkloadQueue.push_back(std::move(workload));
316  }
317 
318  // release the constant data in the layer..
319  layer->ReleaseConstantData();
320  break;
321  }
322  }
323  }
324  }
325 
326  for (auto&& workloadFactory : m_WorkloadFactories)
327  {
328  workloadFactory.second->AfterWorkloadsCreated();
329  }
330 
331  if (timelineUtils)
332  {
333  // Commit to send the post-optimisation network structure
334  timelineUtils->Commit();
335  }
336 
337  if (useExternalMemoryManager)
338  {
339  if (networkProperties.m_AsyncEnabled)
340  {
341  CreateMemoryProfileAsync();
342  }
343  else
344  {
345  CreateMemoryProfile();
346  }
347 
348  auto backendStrategyMap = BackendRegistryInstance().GetMemoryOptimizerStrategies();
349  for (auto& backendMemoryProfile : m_MemBlockMap)
350  {
351  const BackendId& backendId = backendMemoryProfile.first;
352  if (backendStrategyMap.find(backendId) != backendStrategyMap.end())
353  {
354  m_MemBinMap[backendId] = backendStrategyMap[backendId]->Optimize(backendMemoryProfile.second);
355  }
356  else
357  {
358  m_MemBinMap[backendId] = m_ConstantStrategy->Optimize(backendMemoryProfile.second);
359  }
360  }
361 
362  if (!networkProperties.m_AsyncEnabled)
363  {
364  m_ExternalMemoryManager = CreateExternalMemoryManger(m_TensorMemory);
365 
366  // Sort m_TensorMemory, so it's order matches m_Tensorhandles
367  std::sort(m_TensorMemory.begin(), m_TensorMemory.end(),
368  [](const std::pair<std::shared_ptr<TensorMemory>, MemorySource>& lhs,
369  const std::pair<std::shared_ptr<TensorMemory>, MemorySource>& rhs)
370  {
371  return lhs.first->m_OutputSlotId < rhs.first->m_OutputSlotId;
372  });
373  }
374  }
375 
376  // Now that the intermediate tensor memory has been set-up,
377  // do any post allocation configuration for each workload.
378  if (!networkProperties.m_AsyncEnabled)
379  {
380  if (useInternalMemoryManager)
381  {
382  // Set up memory.
383  m_OptimizedNetwork->pOptimizedNetworkImpl->GetGraph().AllocateDynamicBuffers();
384  }
385 
386  for (auto &workload : m_WorkloadQueue)
387  {
388  workload->PostAllocationConfigure();
389  }
390  }
391 
392  if (useExternalMemoryManager)
393  {
394  if (!networkProperties.m_AsyncEnabled)
395  {
396  AllocateAndExecuteConstantWorkloads();
397  }
398  else
399  {
400  AllocateAndExecuteConstantWorkloadsAsync();
401  }
402  }
403 }
404 
405 void LoadedNetwork::AllocateAndExecuteConstantWorkloads()
406 {
407  ARMNN_SCOPED_PROFILING_EVENT(Compute::Undefined, "LoadNetwork_AllocateAndExecuteConstants");
408  for (auto& pair : m_ConstantWorkloads)
409  {
410  auto tensorHandle = m_ConstantTensorHandles[pair.first];
411  tensorHandle->Allocate();
412  pair.second->Execute();
413  }
414 }
415 
416 
417 
418 void LoadedNetwork::AllocateAndExecuteConstantWorkloadsAsync()
419 {
420  ARMNN_SCOPED_PROFILING_EVENT(Compute::Undefined, "LoadNetwork_AllocateAndExecuteConstants");
421  Graph& order = m_OptimizedNetwork->pOptimizedNetworkImpl->GetGraph();
422  for (auto&& layer : order)
423  {
424  if (layer->GetType() == LayerType::Constant)
425  {
426  const auto& outSlot = layer->GetOutputSlots()[0];
427  const auto factoryId = outSlot.GetTensorHandleFactoryId();
429  auto& workloadFactory = GetWorkloadFactory(*layer);
430 
431  layer->CreateTensorHandles(m_TensorHandleFactoryRegistry, workloadFactory);
432  ITensorHandle* tensorHandle = outSlot.GetOutputHandler().GetData();
433 
434  m_ConstantTensorHandles[layer->GetGuid()] = tensorHandle;
435  tensorHandle->Allocate();
436 
437  WorkingMemDescriptor memDesc;
438  memDesc.m_Outputs.push_back(tensorHandle);
439  m_ConstantWorkloads[layer->GetGuid()]->ExecuteAsync(memDesc);
440  }
441  }
442 }
443 
445 {
446  ARMNN_SCOPED_PROFILING_EVENT(Compute::Undefined, "LoadNetwork_SendNetworkStructure");
447  Graph& order = m_OptimizedNetwork->pOptimizedNetworkImpl->GetGraph().TopologicalSort();
448  ProfilingGuid networkGuid = m_OptimizedNetwork->GetGuid();
449 
450  std::unique_ptr<TimelineUtilityMethods> timelineUtils =
451  TimelineUtilityMethods::GetTimelineUtils(m_ProfilingService);
452 
453  timelineUtils->CreateTypedEntity(networkGuid, LabelsAndEventClasses::NETWORK_GUID);
454 
455  for (auto&& layer : order)
456  {
457  // Add layer to the post-optimisation network structure
458  AddLayerStructure(timelineUtils, *layer, networkGuid);
459  switch (layer->GetType())
460  {
461  case LayerType::Input:
462  case LayerType::Output:
463  {
464  // Inputs and outputs are treated in a special way - see EnqueueInput() and EnqueueOutput().
465  break;
466  }
467  default:
468  {
469  for (auto& workload : m_WorkloadQueue)
470  {
471  // Add workload to the post-optimisation network structure
472  AddWorkloadStructure(timelineUtils, workload, *layer);
473  }
474  break;
475  }
476  }
477  }
478  // Commit to send the post-optimisation network structure
479  timelineUtils->Commit();
480 }
481 
482 profiling::ProfilingGuid LoadedNetwork::GetNetworkGuid()
483 {
484  return m_OptimizedNetwork->GetGuid();
485 }
486 
488 {
489  for (auto&& inputLayer : m_OptimizedNetwork->pOptimizedNetworkImpl->GetGraph().GetInputLayers())
490  {
491  ARMNN_ASSERT_MSG(inputLayer->GetNumOutputSlots() == 1, "Input layer should have exactly 1 output slot");
492  if (inputLayer->GetBindingId() == layerId)
493  {
494  return inputLayer->GetOutputSlot(0).GetTensorInfo();
495  }
496  }
497 
498  throw InvalidArgumentException(fmt::format("No input layer is associated with id {}", layerId));
499 }
500 
502 {
503  for (auto&& outputLayer : m_OptimizedNetwork->pOptimizedNetworkImpl->GetGraph().GetOutputLayers())
504  {
505  ARMNN_ASSERT_MSG(outputLayer->GetNumInputSlots() == 1, "Output layer should have exactly 1 input slot");
506  ARMNN_ASSERT_MSG(outputLayer->GetInputSlot(0).GetConnection(), "Input slot on Output layer must be connected");
507  if (outputLayer->GetBindingId() == layerId)
508  {
509  return outputLayer->GetInputSlot(0).GetConnection()->GetTensorInfo();
510  }
511  }
512 
513  throw InvalidArgumentException(fmt::format("No output layer is associated with id {}", layerId));
514 }
515 
516 const IWorkloadFactory& LoadedNetwork::GetWorkloadFactory(const Layer& layer) const
517 {
518  const IWorkloadFactory* workloadFactory = nullptr;
519 
520  auto it = m_WorkloadFactories.find(layer.GetBackendId());
521  if (it == m_WorkloadFactories.end())
522  {
523  throw RuntimeException(fmt::format("No workload factory for {0} to be used for layer: {1}",
524  layer.GetBackendId().Get(),
525  layer.GetNameStr()),
526  CHECK_LOCATION());
527  }
528 
529  workloadFactory = it->second.get();
530 
531  ARMNN_ASSERT_MSG(workloadFactory, "No workload factory");
532 
533  std::string reasonIfUnsupported;
535  {},
536  reasonIfUnsupported,
537  m_OptimizedNetwork->pOptimizedNetworkImpl->GetModelOptions()),
538  "Factory does not support layer");
539  IgnoreUnused(reasonIfUnsupported);
540  return *workloadFactory;
541 }
542 
543 namespace {
544 
545 // Non-copyable class owning accelerator-specific tensor data.
546 class TensorPin
547 {
548 public:
549  TensorPin(std::unique_ptr<ITensorHandle> handle, const TensorInfo& info, LayerBindingId id)
550  : m_TensorHandle(std::move(handle))
551  , m_TensorInfo(info)
552  , m_Id(id)
553  {
554  }
555 
556  ITensorHandle* GetTensorHandle() const { return m_TensorHandle.get(); }
557  const TensorInfo& GetTensorInfo() const { return m_TensorInfo; }
558  LayerBindingId GetBindingId() const { return m_Id; }
559 
560 private:
561  std::unique_ptr<ITensorHandle> m_TensorHandle;
562  TensorInfo m_TensorInfo;
563  LayerBindingId m_Id;
564 };
565 
566 static const TensorPin& GetTensorPin(LayerBindingId id,
567  const std::vector<TensorPin>& pins,
568  char const* bindingPointDesc)
569 {
570  auto it = std::find_if(pins.begin(), pins.end(),
571  [id](const TensorPin& pin)
572  {
573  return pin.GetBindingId() == id;
574  });
575 
576  if (it != pins.end())
577  {
578  return *it;
579  }
580  else
581  {
582  throw InvalidArgumentException(fmt::format("No tensor supplied for {0} {1}", bindingPointDesc, id));
583  }
584 }
585 
586 // Stores data that needs to be kept accessible for the entire execution of a workload.
587 class WorkloadData
588 {
589 public:
590  WorkloadData(const InputTensors& inputTensors, const OutputTensors& outputTensors)
591  {
592  m_InputTensorPins.reserve(inputTensors.size());
593  m_OutputTensorPins.reserve(outputTensors.size());
594 
595  for (auto inputTensorPair : inputTensors)
596  {
597  auto inputTensor = inputTensorPair.second;
598 
599  std::unique_ptr<ITensorHandle> tensorHandle =
600  std::make_unique<ConstPassthroughTensorHandle>(inputTensor.GetInfo(),inputTensor.GetMemoryArea());
601  LayerBindingId layerId = inputTensorPair.first;
602 
603  m_InputTensorPins.emplace_back(std::move(tensorHandle), inputTensor.GetInfo(), layerId);
604  }
605 
606  for (auto outputTensorPair : outputTensors)
607  {
608  auto outputTensor = outputTensorPair.second;
609 
610  std::unique_ptr<ITensorHandle> tensorHandle =
611  std::make_unique<PassthroughTensorHandle>(outputTensor.GetInfo(), outputTensor.GetMemoryArea());
612  LayerBindingId layerId = outputTensorPair.first;
613 
614  m_OutputTensorPins.emplace_back(std::move(tensorHandle), outputTensor.GetInfo(), layerId);
615  }
616  }
617 
618  const TensorPin& GetInputTensorPin(LayerBindingId id) const
619  {
620  return GetTensorPin(id, m_InputTensorPins, "input");
621  }
622 
623  const TensorPin& GetOutputTensorPin(LayerBindingId id) const
624  {
625  return GetTensorPin(id, m_OutputTensorPins, "output");
626  }
627 
628 private:
629 
630  std::vector<TensorPin> m_InputTensorPins;
631  std::vector<TensorPin> m_OutputTensorPins;
632 };
633 
634 }
635 
637  const OutputTensors& outputTensors)
638 {
639  const Graph& graph = m_OptimizedNetwork->pOptimizedNetworkImpl->GetGraph();
640 
641  // Walk graph to determine the order of execution.
642  if (graph.GetNumLayers() < 2)
643  {
644  ARMNN_LOG(warning) << "IRuntime::EnqueueWorkload()::Less than two nodes in graph";
645  return Status::Failure;
646  }
647 
648  // Data that must be kept alive for the entire execution of the workload.
649  WorkloadData workloadData(inputTensors, outputTensors);
650 
651  if (graph.GetNumInputs() != inputTensors.size())
652  {
653  throw InvalidArgumentException("Number of inputs provided does not match network.");
654  }
655 
656  // For each input to the network, call EnqueueInput with the data passed by the user.
657  {
659  m_InputQueue.clear();
660  m_InputQueue.reserve(graph.GetNumInputs());
661  for (const BindableLayer* inputLayer : graph.GetInputLayers())
662  {
663  const TensorPin& pin = workloadData.GetInputTensorPin(inputLayer->GetBindingId());
664  EnqueueInput(*inputLayer, pin.GetTensorHandle(), pin.GetTensorInfo());
665  }
666  }
667 
668  // For each output to the network, call EnqueueOutput with the data passed by the user.
669  {
671  m_OutputQueue.clear();
672  m_OutputQueue.reserve(graph.GetNumOutputs());
673  for (const BindableLayer* outputLayer : graph.GetOutputLayers())
674  {
675  const TensorPin& pin = workloadData.GetOutputTensorPin(outputLayer->GetBindingId());
676  EnqueueOutput(*outputLayer, pin.GetTensorHandle(), pin.GetTensorInfo());
677  }
678  }
679 
680  std::unique_ptr<TimelineUtilityMethods> timelineUtils =
681  TimelineUtilityMethods::GetTimelineUtils(m_ProfilingService);
682  ProfilingGuid inferenceGuid = m_ProfilingService.GetNextGuid();
683  if (timelineUtils)
684  {
685  // Add inference timeline trace if profiling is enabled.
686  ProfilingGuid networkGuid = m_OptimizedNetwork->GetGuid();
687  timelineUtils->CreateTypedEntity(inferenceGuid, LabelsAndEventClasses::INFERENCE_GUID);
688  timelineUtils->CreateRelationship(ProfilingRelationshipType::RetentionLink,
689  networkGuid,
690  inferenceGuid,
691  LabelsAndEventClasses::EXECUTION_OF_GUID);
692  timelineUtils->RecordEvent(inferenceGuid, LabelsAndEventClasses::ARMNN_PROFILING_SOL_EVENT_CLASS);
693  }
694 
695  bool executionSucceeded = true;
696 
697  {
698  if (m_ProfilingService.IsProfilingEnabled())
699  {
700  m_ProfilingService.IncrementCounterValue(armnn::profiling::INFERENCES_RUN);
701  }
703  ARMNN_SCOPED_HEAP_PROFILING("Executing");
704  executionSucceeded = Execute(timelineUtils, inferenceGuid);
705  }
706 
707  if (timelineUtils)
708  {
709  // Add end of life of the inference timeline if profiling is enabled.
710  timelineUtils->RecordEvent(inferenceGuid, LabelsAndEventClasses::ARMNN_PROFILING_EOL_EVENT_CLASS);
711  timelineUtils->Commit();
712  }
713  return executionSucceeded ? Status::Success : Status::Failure;
714 }
715 
716 void LoadedNetwork::EnqueueInput(const BindableLayer& layer, ITensorHandle* tensorHandle, const TensorInfo& tensorInfo)
717 {
718  if (layer.GetType() != LayerType::Input)
719  {
720  throw InvalidArgumentException("EnqueueInput: given layer not an InputLayer");
721  }
722 
723  if (tensorHandle == nullptr)
724  {
725  throw InvalidArgumentException("EnqueueInput: tensorHandle must not be NULL");
726  }
727 
728  InputQueueDescriptor inputQueueDescriptor;
730 
731  inputQueueDescriptor.m_Inputs.push_back(tensorHandle);
732  info.m_InputTensorInfos.push_back(tensorInfo);
733 
734  ARMNN_ASSERT_MSG(layer.GetNumOutputSlots() == 1, "Can only handle Input Layer with one output");
735  const OutputHandler& handler = layer.GetOutputHandler();
736  const TensorInfo& outputTensorInfo = handler.GetTensorInfo();
737  ITensorHandle* outputTensorHandle = handler.GetData();
738  ARMNN_ASSERT_MSG(outputTensorHandle != nullptr,
739  "Data should have been allocated.");
740  inputQueueDescriptor.m_Outputs.push_back(outputTensorHandle);
741  info.m_OutputTensorInfos.push_back(outputTensorInfo);
742 
743  MemorySourceFlags importFlags = outputTensorHandle->GetImportFlags();
744  bool needMemCopy = true;
745  if (m_NetworkProperties.m_ImportEnabled) // Try import the input tensor
746  {
747  if(CheckFlag(importFlags, m_NetworkProperties.m_InputSource))
748  {
749  needMemCopy = false;
750  // This assumes a CPU Tensor handle
751  void* mem = tensorHandle->Map(false);
752  if (outputTensorHandle->Import(mem, m_NetworkProperties.m_InputSource))
753  {
754  tensorHandle->Unmap();
755  return; // No need for a workload since the import has been done.
756  }
757  tensorHandle->Unmap();
758  throw MemoryImportException("EnqueueInput: Memory Import failed");
759  }
760  }
761  if (needMemCopy)
762  {
763  // Create a mem copy workload for input since we did not import
764  std::unique_ptr<IWorkload> inputWorkload = std::make_unique<CopyMemGenericWorkload>(inputQueueDescriptor, info);
765 
766  ARMNN_ASSERT_MSG(inputWorkload, "No input workload created");
767 
768  std::unique_ptr<TimelineUtilityMethods> timelineUtils =
769  TimelineUtilityMethods::GetTimelineUtils(m_ProfilingService);
770  if (timelineUtils)
771  {
772  // Add Input Workload to the post-optimisation network structure
773  AddWorkloadStructure(timelineUtils, inputWorkload, layer);
774  timelineUtils->Commit();
775  }
776 
777  m_InputQueue.push_back(move(inputWorkload));
778  }
779 }
780 
781 void LoadedNetwork::EnqueueOutput(const BindableLayer& layer, ITensorHandle* tensorHandle, const TensorInfo& tensorInfo)
782 {
783  if (layer.GetType() != LayerType::Output)
784  {
785  throw InvalidArgumentException("EnqueueOutput: given layer not an OutputLayer");
786  }
787 
788  if (tensorHandle == nullptr)
789  {
790  throw InvalidArgumentException("EnqueueOutput: tensorHandle must not be NULL");
791  }
792 
793  OutputQueueDescriptor outputQueueDescriptor;
795 
796  outputQueueDescriptor.m_Outputs.push_back(tensorHandle);
797  info.m_OutputTensorInfos.push_back(tensorInfo);
798 
799  ARMNN_ASSERT_MSG(layer.GetNumInputSlots() == 1, "Output Layer should have exactly one input.");
800 
801  // Gets the output handler from the previous node.
802  const OutputHandler& outputHandler = layer.GetInputSlots()[0].GetConnectedOutputSlot()->GetOutputHandler();
803 
804  const TensorInfo& inputTensorInfo = outputHandler.GetTensorInfo();
805  ITensorHandle* inputTensorHandle = outputHandler.GetData();
806  ARMNN_ASSERT_MSG(inputTensorHandle != nullptr, "Data should have been allocated.");
807 
808  // Try import the output tensor.
809  // Note: We can only import the output pointer if all of the following hold true:
810  // a) The imported pointer is aligned sufficiently
811  // b) The tensor has zero padding
812  // c) There is only one connection to the OutputSlot and it is to an OutputLayer.
813  // d) The output pointer is allocated via malloc. (Other types will be supported in a later release)
814  // e) m_IsExportEnabled must be set to true
815  bool needMemCopy = true;
816  if (m_NetworkProperties.m_ExportEnabled &&
817  (layer.GetInputSlots()[0].GetConnectedOutputSlot()->GetNumConnections() == 1))
818  {
819  if(layer.GetInputSlots()[0].GetConnectedOutputSlot()->GetOwningLayer().GetType() != LayerType::Input)
820  {
821  MemorySourceFlags importFlags = inputTensorHandle->GetImportFlags();
822  if (CheckFlag(importFlags, m_NetworkProperties.m_OutputSource))
823  {
824  needMemCopy = false;
825  void *mem = tensorHandle->Map(false);
826  bool importOk = inputTensorHandle->Import(mem, m_NetworkProperties.m_OutputSource);
827  tensorHandle->Unmap();
828 
829  if (importOk)
830  {
831  // Insert synchronization workload
832  MemSyncQueueDescriptor syncDesc;
833  syncDesc.m_Inputs.push_back(inputTensorHandle);
834  info.m_InputTensorInfos.push_back(inputTensorInfo);
835  auto syncWorkload = std::make_unique<SyncMemGenericWorkload>(syncDesc, info);
836  ARMNN_ASSERT_MSG(syncWorkload, "No sync workload created");
837  m_OutputQueue.push_back(move(syncWorkload));
838  }
839  else
840  {
841  throw MemoryExportException("EnqueueOutput: Memory Export failed");
842  }
843  }
844  }
845  }
846  if (needMemCopy)
847  {
848  // If we got here then we didn't export the memory, so add an output workload which performs a memcopy.
849  outputQueueDescriptor.m_Inputs.push_back(inputTensorHandle);
850  info.m_InputTensorInfos.push_back(inputTensorInfo);
851 
852  std::unique_ptr<IWorkload> outputWorkload =
853  std::make_unique<CopyMemGenericWorkload>(outputQueueDescriptor, info);
854  ARMNN_ASSERT_MSG(outputWorkload, "No output workload created");
855 
856  std::unique_ptr<TimelineUtilityMethods> timelineUtils =
857  TimelineUtilityMethods::GetTimelineUtils(m_ProfilingService);
858  if (timelineUtils)
859  {
860  // Add Output Workload to the post-optimisation network structure
861  AddWorkloadStructure(timelineUtils, outputWorkload, layer);
862  timelineUtils->Commit();
863  }
864 
865  m_OutputQueue.push_back(move(outputWorkload));
866  }
867 }
868 
869 void LoadedNetwork::AllocateWorkingMemory(std::lock_guard<std::mutex>& lock)
870 {
871  ARMNN_SCOPED_PROFILING_EVENT(Compute::Undefined, "Working Memory Allocation");
872 
873  // this unused parameter makes sure we can only call this function with a valid lock
874  IgnoreUnused(lock);
875 
876  if (m_IsWorkingMemAllocated)
877  {
878  return;
879  }
880 
881  if (m_ExternalMemoryManager)
882  {
883  m_ExternalMemoryManager->Allocate();
884 
885  for (unsigned int i = 0; i < m_TensorMemory.size(); ++i)
886  {
887  m_Tensorhandles[i]->Import(m_TensorMemory[i].first->m_Data, m_TensorMemory[i].second);
888  }
889  }
890 
891  for (auto&& memoryManager : m_BackendMemoryMangers)
892  {
893  if (memoryManager)
894  {
895  memoryManager->Acquire();
896  }
897  }
898  m_TensorHandleFactoryRegistry.AquireMemory();
899  m_IsWorkingMemAllocated = true;
900 }
901 
903 {
904  std::lock_guard<std::mutex> lockGuard(m_WorkingMemMutex);
905 
906  if (!m_IsWorkingMemAllocated)
907  {
908  return;
909  }
910 
911  if (m_ExternalMemoryManager)
912  {
913  m_ExternalMemoryManager->Deallocate();
914  }
915 
916  // Informs the memory managers to release memory in its respective memory group
917  for (auto&& memoryManager : m_BackendMemoryMangers)
918  {
919  if (memoryManager)
920  {
921  memoryManager->Release();
922  }
923  }
924  m_TensorHandleFactoryRegistry.ReleaseMemory();
925  m_IsWorkingMemAllocated = false;
926 }
927 
928 bool LoadedNetwork::Execute(std::unique_ptr<TimelineUtilityMethods>& timelineUtils,
929  profiling::ProfilingGuid inferenceGuid)
930 {
931  bool success = true;
932 
933  auto Fail = [&](const std::exception& error)
934  {
935  ARMNN_LOG(error) << "An error occurred attempting to execute a workload: " << error.what();
936  success = false;
937  };
938 
939  try
940  {
941  std::lock_guard<std::mutex> lockGuard(m_WorkingMemMutex);
942  AllocateWorkingMemory(lockGuard);
943 
944  ProfilingDynamicGuid workloadInferenceID(0);
945  auto ExecuteQueue = [&timelineUtils, &workloadInferenceID, &inferenceGuid](WorkloadQueue& queue)
946  {
947  for (auto& workload : queue)
948  {
949  if(timelineUtils)
950  {
951  workloadInferenceID = timelineUtils->RecordWorkloadInferenceAndStartOfLifeEvent(workload->GetGuid(),
952  inferenceGuid);
953  }
954  workload->Execute();
955  if(timelineUtils)
956  {
957  timelineUtils->RecordEndOfLifeEvent(workloadInferenceID);
958  }
959  }
960  };
961 
962  ExecuteQueue(m_InputQueue);
963  ExecuteQueue(m_WorkloadQueue);
964  ExecuteQueue(m_OutputQueue);
965  }
966  catch (const RuntimeException& error)
967  {
968  Fail(error);
969  }
970  catch (const std::runtime_error& error)
971  {
972  Fail(error);
973  }
974 
975  return success;
976 }
977 
978 void LoadedNetwork::EnqueueInput(const ConstTensor& inputTensor, ITensorHandle* inputTensorHandle)
979 {
980  if (m_NetworkProperties.m_ImportEnabled) // Try import the input tensor
981  {
982  MemorySourceFlags importFlags = inputTensorHandle->GetImportFlags();
983  if (CheckFlag(importFlags, m_NetworkProperties.m_InputSource) )
984  {
985  std::unique_ptr<ITensorHandle> tensorHandle =
986  std::make_unique<ConstPassthroughTensorHandle>(inputTensor.GetInfo(),
987  inputTensor.GetMemoryArea());
988  void* mem = tensorHandle->Map(false);
989 
990  if (inputTensorHandle->Import(mem, m_NetworkProperties.m_InputSource))
991  {
992  tensorHandle->Unmap();
993  return;
994  }
995  tensorHandle->Unmap();
996  throw MemoryImportException("EnqueueInput: Memory Import failed");
997  }
998  else
999  {
1000  throw MemoryImportException("EnqueueInput: Memory Import failed, backend does not support Import");
1001  }
1002  }
1003  else
1004  {
1005  std::unique_ptr<ITensorHandle> tensorHandle =
1006  std::make_unique<ConstPassthroughTensorHandle>(inputTensor.GetInfo(), inputTensor.GetMemoryArea());
1007 
1008  auto copyFunc = [](void* dst, const void* src, size_t size)
1009  {
1010  memcpy(dst, src, size);
1011  };
1012 
1013  CopyTensorContentsGeneric(tensorHandle.get(), inputTensorHandle, copyFunc);
1014  }
1015 }
1016 
1017 // Note: We can only import the output pointer if all of the following hold true:
1018 // a) The imported pointer is aligned sufficiently
1019 // b) The tensor has zero padding
1020 // c) There is only one connection to the OutputSlot and it is to an OutputLayer.
1021 // d) The output pointer is allocated via malloc. (Other types will be supported in a later release)
1022 // e) m_IsExportEnabled must be set to true
1023 void LoadedNetwork::ImportOutputTensor(const Tensor& outputTensor, ITensorHandle* outputTensorHandle)
1024 {
1025  ARMNN_ASSERT_MSG(outputTensorHandle != nullptr, "Data should have been allocated.");
1026  MemorySourceFlags importFlags = outputTensorHandle->GetImportFlags();
1027  if (CheckFlag(importFlags, m_NetworkProperties.m_OutputSource))
1028  {
1029  std::unique_ptr<ITensorHandle> tensorHandle =
1030  std::make_unique<PassthroughTensorHandle>(outputTensor.GetInfo(),
1031  outputTensor.GetMemoryArea());
1032 
1033  void* mem = tensorHandle->Map(false);
1034  bool importOk = outputTensorHandle->Import(mem, m_NetworkProperties.m_OutputSource);
1035  tensorHandle->Unmap();
1036 
1037  if (!importOk)
1038  {
1039  throw MemoryExportException("ImportOutputTensor: Memory Export failed");
1040  }
1041  }
1042  else
1043  {
1044  throw MemoryExportException("ImportOutputTensor: Memory Export failed, attempting to export Input Layer");
1045  }
1046 
1047 }
1048 
1049 void CopyToOutputTensor(const Tensor& outputTensor, ITensorHandle* outputTensorHandle)
1050 {
1051  auto copyFunc = [](void* dst, const void* src, size_t size)
1052  {
1053  memcpy(dst, src, size);
1054  };
1055 
1056  std::unique_ptr<ITensorHandle> tensorHandle =
1057  std::make_unique<PassthroughTensorHandle>(outputTensor.GetInfo(),
1058  outputTensor.GetMemoryArea());
1059 
1060  CopyTensorContentsGeneric(outputTensorHandle, tensorHandle.get(), copyFunc);
1061 }
1062 
1063 
1064 const armnn::ConstTensor GetInputTensor(const LayerBindingId layerId, const InputTensors& inputTensors)
1065 {
1066  for (auto inputTensorPair : inputTensors)
1067  {
1068  LayerBindingId id = inputTensorPair.first;
1069  if (id == layerId)
1070  {
1071  return inputTensorPair.second;
1072  }
1073  }
1074  throw InvalidArgumentException("Input does not exist.");
1075 }
1076 
1077 const armnn::Tensor GetOutputTensor(const LayerBindingId layerId, const OutputTensors& outputTensors)
1078 {
1079  for (auto outputTensorPair : outputTensors)
1080  {
1081  LayerBindingId id = outputTensorPair.first;
1082  if (id == layerId)
1083  {
1084  return outputTensorPair.second;
1085  }
1086  }
1087  throw InvalidArgumentException("Output does not exist.");
1088 }
1089 
1090 std::vector<ImportedInputId> LoadedNetwork::ImportInputs(const InputTensors& inputTensors)
1091 {
1092  if (!m_NetworkProperties.m_ImportEnabled) // Try import the input tensor
1093  {
1094  throw MemoryImportException("ImportInputs: Memory Import failed, NetworkProperties.m_ImportEnabled");
1095  }
1096 
1097  std::vector<ImportedInputId> importedInputs;
1098  Graph& graph = m_OptimizedNetwork->pOptimizedNetworkImpl->GetGraph().TopologicalSort();
1099 
1100  for (auto inputTensor : inputTensors)
1101  {
1102  auto layerBindingId = inputTensor.first;
1103  auto it = std::find_if(graph.GetInputLayers().begin(), graph.GetInputLayers().end(), [=](auto* layer)
1104  {
1105  return layer->GetBindingId() == layerBindingId;
1106  });
1107 
1108  if (it == graph.GetInputLayers().end())
1109  {
1110  throw MemoryImportException(fmt::format("ImportInputs: Memory Import failed, unknown LayerBindingId: {}",
1111  layerBindingId));
1112  }
1113 
1114  const Layer* layer = *it;
1115  if (layer->GetType() != LayerType::Input)
1116  {
1117  throw InvalidArgumentException("ImportInputs: given layer not an InputLayer");
1118  }
1119 
1120  auto& backend = m_Backends.at(layer->GetBackendId());
1121  if (!HasCapability(BackendOptions::BackendOption{"PreImportIOTensors", true}, backend->GetCapabilities()))
1122  {
1123  std::string er = backend->GetId();
1124  er += " does not have PreImportIOTensors capability";
1125  throw BackendCapabilityException(er);
1126  }
1127 
1128  const OutputSlot& outputSlot = layer->GetOutputSlots()[0];
1129 
1131  const TensorInfo& tensorInfo = outputSlot.GetTensorInfo();
1132 
1133  ITensorHandleFactory* handleFactory = m_TensorHandleFactoryRegistry.GetFactory(factoryId);
1134  ARMNN_ASSERT(handleFactory);
1135 
1136  ImportedTensorHandlePin importedTensorHandlePin{layerBindingId,
1137  handleFactory->CreateTensorHandle(tensorInfo, false)};
1138 
1139  ITensorHandle* tensorHandle = importedTensorHandlePin.m_TensorHandle.get();
1140 
1141  if (!CheckFlag(tensorHandle->GetImportFlags(), m_NetworkProperties.m_InputSource))
1142  {
1143  throw MemoryImportException(
1144  fmt::format("ImportInputs: Memory Import failed, backend: {} does not support importing from source {}"
1145  , factoryId, m_NetworkProperties.m_InputSource));
1146  }
1147 
1148  std::unique_ptr<ITensorHandle> passThroughTensorHandle =
1149  std::make_unique<ConstPassthroughTensorHandle>(inputTensor.second.GetInfo(),
1150  inputTensor.second.GetMemoryArea());
1151 
1152  if (tensorHandle->Import(passThroughTensorHandle->Map(), m_NetworkProperties.m_InputSource))
1153  {
1154  importedInputs.push_back(m_CurImportedInputId++);
1155  passThroughTensorHandle->Unmap();
1156  }
1157  else
1158  {
1159  passThroughTensorHandle->Unmap();
1160  throw MemoryImportException("ImportInputs: Memory Import failed");
1161  }
1162 
1163  m_PreImportedInputHandles.push_back(std::move(importedTensorHandlePin));
1164  }
1165 
1166  return importedInputs;
1167 }
1168 
1169 std::vector<ImportedOutputId> LoadedNetwork::ImportOutputs(const OutputTensors& outputTensors)
1170 {
1171  if (!m_NetworkProperties.m_ExportEnabled) // Try import the output tensor
1172  {
1173  throw MemoryImportException("ImportOutputs: Memory Import failed, NetworkProperties.m_ImportEnabled");
1174  }
1175 
1176  std::vector<ImportedOutputId> importedOutputs;
1177  Graph& graph = m_OptimizedNetwork->pOptimizedNetworkImpl->GetGraph().TopologicalSort();
1178 
1179  for (const auto& outputTensor : outputTensors)
1180  {
1181  auto layerBindingId = outputTensor.first;
1182  auto it = std::find_if(graph.GetOutputLayers().begin(), graph.GetOutputLayers().end(), [=](auto* layer)
1183  {
1184  return layer->GetBindingId() == layerBindingId;
1185  });
1186 
1187  if (it == graph.GetOutputLayers().end())
1188  {
1189  throw MemoryImportException(fmt::format("ImportOutputs: Memory Import failed, unknown LayerBindingId: {}",
1190  layerBindingId));
1191  }
1192 
1193  const Layer* layer = *it;
1194  if (layer->GetType() != LayerType::Output)
1195  {
1196  throw InvalidArgumentException("ImportOutputs: given layer not an OutputLayer");
1197  }
1198 
1199  auto& backend = m_Backends.at(layer->GetBackendId());
1200  if (!HasCapability(BackendOptions::BackendOption{"PreImportIOTensors", true}, backend->GetCapabilities()))
1201  {
1202  std::string er = backend->GetId();
1203  er += " does not have PreImportIOTensors capability";
1204  throw BackendCapabilityException(er);
1205  }
1206 
1207  const InputSlot& inputSlot = layer->GetInputSlots()[0];
1209  const TensorInfo& tensorInfo = inputSlot.GetConnectedOutputSlot()->GetTensorInfo();
1210 
1211  ITensorHandleFactory* handleFactory = m_TensorHandleFactoryRegistry.GetFactory(factoryId);
1212  ARMNN_ASSERT(handleFactory);
1213 
1214  ImportedTensorHandlePin importedTensorHandlePin{layerBindingId,
1215  handleFactory->CreateTensorHandle(tensorInfo, false)};
1216 
1217  ITensorHandle* tensorHandle = importedTensorHandlePin.m_TensorHandle.get();
1218 
1219  if (!CheckFlag(tensorHandle->GetImportFlags(), m_NetworkProperties.m_OutputSource))
1220  {
1221  throw MemoryImportException(fmt::format("ImportInputs: Memory Import failed, backend: "
1222  "{} does not support importing from source {}"
1223  , factoryId, m_NetworkProperties.m_OutputSource));
1224  }
1225 
1226  if (tensorHandle->Import(outputTensor.second.GetMemoryArea(), m_NetworkProperties.m_OutputSource))
1227  {
1228  importedOutputs.push_back(m_CurImportedOutputId++);
1229  }
1230  else
1231  {
1232  throw MemoryImportException("ImportInputs: Memory Import failed");
1233  }
1234 
1235  m_PreImportedOutputHandles.push_back(std::move(importedTensorHandlePin));
1236  }
1237 
1238  return importedOutputs;
1239 }
1240 
1241 void LoadedNetwork::ClearImportedInputs(const std::vector<ImportedInputId> inputIds)
1242 {
1243  for (auto id : inputIds)
1244  {
1245  if (id > m_PreImportedInputHandles.size())
1246  {
1247  throw InvalidArgumentException(fmt::format("ClearImportedInputs::Unknown ImportedInputId: {}", id));
1248  }
1249 
1250  auto& importedTensorHandle = m_PreImportedInputHandles[id].m_TensorHandle;
1251  if (!importedTensorHandle)
1252  {
1254  fmt::format("ClearImportedInputs::ImportedInput with id: {} has already been deleted", id));
1255  }
1256  // Call Unimport then destroy the tensorHandle
1257  importedTensorHandle->Unimport();
1258  importedTensorHandle = {};
1259  }
1260 }
1261 
1262 void LoadedNetwork::ClearImportedOutputs(const std::vector<ImportedOutputId> outputIds)
1263 {
1264  for (auto id : outputIds)
1265  {
1266  if (id > m_PreImportedOutputHandles.size())
1267  {
1268  throw InvalidArgumentException(fmt::format("ClearImportedOutputs::Unknown ImportedOutputId: {}", id));
1269  }
1270 
1271  auto& importedTensorHandle = m_PreImportedOutputHandles[id].m_TensorHandle;
1272  if (!importedTensorHandle)
1273  {
1275  fmt::format("ClearImportedOutputs::ImportedOutput with id: {} has already been deleted", id));
1276  }
1277  // Call Unimport then destroy the tensorHandle
1278  importedTensorHandle->Unimport();
1279  importedTensorHandle = {};
1280  }
1281 }
1282 
1284  const OutputTensors& outputTensors,
1285  IWorkingMemHandle& iWorkingMemHandle,
1286  std::vector<ImportedInputId> preImportedInputs,
1287  std::vector<ImportedOutputId> preImportedOutputs)
1288 {
1289  const Graph& graph = m_OptimizedNetwork->pOptimizedNetworkImpl->GetGraph();
1290 
1291  if (inputTensors.size() + preImportedInputs.size() != graph.GetNumInputs())
1292  {
1293  if (preImportedInputs.empty())
1294  {
1295  throw InvalidArgumentException("LoadedNetwork::Execute: Number of inputs provided does not match network.");
1296  }
1297  else
1298  {
1299  throw InvalidArgumentException("LoadedNetwork::Execute: "
1300  "Number of inputs + preImportedInputs provided does not match network.");
1301  }
1302  }
1303 
1304  if (outputTensors.size() + preImportedOutputs.size() != graph.GetNumOutputs())
1305  {
1306  if (preImportedOutputs.empty())
1307  {
1308  throw InvalidArgumentException("LoadedNetwork::Execute: "
1309  "Number of outputs provided does not match network.");
1310  }
1311  else
1312  {
1313  throw InvalidArgumentException("LoadedNetwork::Execute: "
1314  "Number of outputs + preImportedOutputs provided does not match network.");
1315  }
1316  }
1317 
1318  WorkingMemHandle& workingMemHandle = dynamic_cast<WorkingMemHandle&>(iWorkingMemHandle);
1319  // Collect all the given LayerBindingIds and check them for duplicates and unknowns.
1320  std::vector<LayerBindingId>& bindingIds = workingMemHandle.GetBindingIdVector();
1321  unsigned int index = 0;
1322  for (auto pair : inputTensors)
1323  {
1324  bindingIds[index++] = pair.first;
1325  }
1326  for (ImportedInputId id : preImportedInputs)
1327  {
1328  bindingIds[index++] = ValidateImportedInputID(id);
1329  }
1330  for (auto pair : outputTensors)
1331  {
1332  bindingIds[index++] = pair.first;
1333  }
1334  for (ImportedOutputId id : preImportedOutputs)
1335  {
1336  bindingIds[index++] = ValidateImportedOutputID(id);
1337  }
1338 
1339  workingMemHandle.ValidateBindingIds();
1340 
1341  auto resetMemHandle = [&]()
1342  {
1343  for (ImportedInputId id: preImportedInputs)
1344  {
1345  const LayerBindingId layerBindingId = m_PreImportedInputHandles[id].m_LayerBindingId;
1346 
1347  auto inputHandle = workingMemHandle.GetInputHandle(layerBindingId);
1348  auto inputConnections = workingMemHandle.GetInputConnections(layerBindingId);
1349  for (auto it : inputConnections)
1350  {
1351  *it = inputHandle;
1352  }
1353  }
1354 
1355  for (ImportedOutputId id: preImportedOutputs)
1356  {
1357  const LayerBindingId layerBindingId = m_PreImportedOutputHandles[id].m_LayerBindingId;
1358 
1359  auto outputHandle = workingMemHandle.GetOutputHandle(layerBindingId);
1360  auto outputConnections = workingMemHandle.GetOutputConnection(layerBindingId);
1361 
1362  for (auto it : outputConnections)
1363  {
1364  *it = outputHandle;
1365  }
1366  }
1367  };
1368 
1369  std::unique_ptr<profiling::TimelineUtilityMethods> timelineUtils =
1371  profiling::ProfilingGuid inferenceGuid = m_ProfilingService.GetNextGuid();
1372  if (timelineUtils)
1373  {
1374  // Add inference timeline trace if profiling is enabled.
1375  profiling::ProfilingGuid networkGuid = m_OptimizedNetwork->GetGuid();
1376  timelineUtils->CreateTypedEntity(inferenceGuid, profiling::LabelsAndEventClasses::INFERENCE_GUID);
1377  timelineUtils->CreateRelationship(profiling::ProfilingRelationshipType::RetentionLink,
1378  networkGuid,
1379  inferenceGuid,
1380  profiling::LabelsAndEventClasses::EXECUTION_OF_GUID);
1381  timelineUtils->RecordEvent(inferenceGuid, profiling::LabelsAndEventClasses::ARMNN_PROFILING_SOL_EVENT_CLASS);
1382  }
1383 
1384  bool executionSucceeded = true;
1385 
1386  if (timelineUtils)
1387  {
1388  // Add end of life of the inference timeline if profiling is enabled.
1389  timelineUtils->RecordEvent(inferenceGuid, profiling::LabelsAndEventClasses::ARMNN_PROFILING_EOL_EVENT_CLASS);
1390  timelineUtils->Commit();
1391  }
1392 
1393  if (!workingMemHandle.IsAllocated())
1394  {
1395  workingMemHandle.Allocate();
1396  }
1397 
1398  {
1400  for (auto pair : inputTensors)
1401  {
1402  EnqueueInput(pair.second, workingMemHandle.GetInputHandle(pair.first));
1403  }
1404 
1405  // Swap in the pre-imported inputs if any
1406  for (ImportedInputId id : preImportedInputs)
1407  {
1408  const ImportedTensorHandlePin& importedInputPin = m_PreImportedInputHandles[id];
1409  const LayerBindingId layerBindingId = m_PreImportedInputHandles[id].m_LayerBindingId;
1410  const auto& preimportedHandle = importedInputPin.m_TensorHandle;
1411 
1412  auto inputConnections = workingMemHandle.GetInputConnections(layerBindingId);
1413  for (auto it : inputConnections)
1414  {
1415  *it = preimportedHandle.get();
1416  }
1417  }
1418  }
1419  {
1421  if (m_NetworkProperties.m_ExportEnabled)
1422  {
1423  for (auto pair: outputTensors)
1424  {
1425  ImportOutputTensor(pair.second, workingMemHandle.GetOutputHandle(pair.first));
1426  }
1427  }
1428 
1429  for (ImportedOutputId id : preImportedOutputs)
1430  {
1431  const ImportedTensorHandlePin& importedOutputPin = m_PreImportedOutputHandles[id];
1432  const LayerBindingId layerBindingId = m_PreImportedOutputHandles[id].m_LayerBindingId;
1433  const auto& preimportedHandle = importedOutputPin.m_TensorHandle;
1434 
1435  auto outputConnections = workingMemHandle.GetOutputConnection(layerBindingId);
1436 
1437  for (auto it : outputConnections)
1438  {
1439  *it = preimportedHandle.get();
1440  }
1441  }
1442  }
1443 
1444  auto Fail = [&](const std::exception& error)
1445  {
1446  ARMNN_LOG(error) << "An error occurred attempting to execute a workload: " << error.what();
1447  executionSucceeded = false;
1448  };
1449  profiling::ProfilingDynamicGuid workloadInferenceID(0);
1450 
1451  try
1452  {
1453  for (unsigned int i = 0; i < m_WorkloadQueue.size(); ++i)
1454  {
1455  auto& workload = m_WorkloadQueue[i];
1456  if (timelineUtils)
1457  {
1458  workloadInferenceID = timelineUtils->RecordWorkloadInferenceAndStartOfLifeEvent(workload->GetGuid(),
1459  inferenceGuid);
1460  }
1461  workload->ExecuteAsync(workingMemHandle.GetWorkingMemDescriptorAt(i));
1462 
1463  if (timelineUtils)
1464  {
1465  timelineUtils->RecordEndOfLifeEvent(workloadInferenceID);
1466  }
1467  }
1468  }
1469  catch (const RuntimeException& error)
1470  {
1471  resetMemHandle();
1472  Fail(error);
1473  }
1474  catch (const std::runtime_error& error)
1475  {
1476  resetMemHandle();
1477  Fail(error);
1478  }
1479  catch (...)
1480  {
1481  resetMemHandle();
1482  throw;
1483  }
1484 
1485  if (!m_NetworkProperties.m_ExportEnabled)
1486  {
1487  for (auto pair: outputTensors)
1488  {
1489  CopyToOutputTensor(pair.second, workingMemHandle.GetOutputHandle(pair.first));
1490  }
1491  }
1492  else
1493  {
1494  ARMNN_SCOPED_PROFILING_EVENT(Compute::Undefined, "SyncMemGeneric_Execute");
1495  workingMemHandle.MemSyncOutputs();
1496  }
1497 
1498  resetMemHandle();
1499 
1500  return executionSucceeded ? Status::Success : Status::Failure;
1501 }
1502 
1503 /// Create a new unique WorkingMemHandle object. Create multiple handles if you wish to have
1504 /// overlapped Execution by calling this function from different threads.
1505 std::unique_ptr<IWorkingMemHandle> LoadedNetwork::CreateWorkingMemHandle(NetworkId networkId)
1506 {
1507  Graph& order = m_OptimizedNetwork->pOptimizedNetworkImpl->GetGraph();
1508 
1509  // Tensors that will need to be allocated internally within armnn
1510  std::vector<std::unique_ptr<ITensorHandle>> managedTensorHandles;
1511  // Tensors that will be allocated externally by the user
1512  std::vector<std::unique_ptr<ITensorHandle>> unmanagedTensorHandles;
1513 
1514  std::vector<WorkingMemDescriptor> workingMemDescriptors;
1515  std::unordered_map<LayerGuid, WorkingMemDescriptor> workingMemDescriptorMap;
1516 
1517  auto GetTensorHandle = [&](Layer* layer, const OutputSlot& outputSlot)
1518  {
1519  ITensorHandleFactory::FactoryId factoryId = outputSlot.GetTensorHandleFactoryId();
1520  const TensorInfo& tensorInfo = outputSlot.GetTensorInfo();
1521 
1522  if (factoryId == ITensorHandleFactory::LegacyFactoryId)
1523  {
1524  BackendId id = layer->GetBackendId();
1526  return m_WorkloadFactories.at(id)->CreateTensorHandle(tensorInfo, false);
1528  }
1529  else
1530  {
1531  ITensorHandleFactory* handleFactory = m_TensorHandleFactoryRegistry.GetFactory(factoryId);
1532  ARMNN_ASSERT(handleFactory);
1533  return handleFactory->CreateTensorHandle(tensorInfo, false);
1534  }
1535  };
1536 
1537  struct HandleInfo
1538  {
1539  ITensorHandle* m_TensorHandle;
1540 
1541  bool m_IsInputLayerHandle = false;
1542  bool m_IsOutputLayerHandle = false;
1543 
1544  WorkingMemHandle::InputMemDescriptorCoords m_InputMemDescriptorCoords;
1545  WorkingMemHandle::OutputMemDescriptorCoords m_OutputMemDescriptorCoords;
1546  };
1547 
1548  std::unordered_map<const OutputSlot*, HandleInfo> outputToHandleInfoMap;
1549 
1550  unsigned int layerIndex = 0;
1551  for (auto&& layer : order)
1552  {
1553  // Constant layers execution and management is handled during loaded network construction
1554  if (layer->GetType() == LayerType::Constant)
1555  {
1556  continue;
1557  }
1558 
1559  WorkingMemDescriptor workingMemDescriptor;
1560 
1561  bool isMemoryManaged = true;
1562  bool isInputLayer = false;
1563  bool isOutputLayer = false;
1564  bool isConnectedToOutputLayer = false;
1565 
1566  if (layer->GetType() == LayerType::Input || layer->GetType() == LayerType::MemImport)
1567  {
1568  // Input layers/workloads will not be executed so the descriptor is not added to workingMemDescriptors
1569  // However we will still need to manage the tensorHandle
1570  isInputLayer = true;
1571  isMemoryManaged = !m_NetworkProperties.m_ImportEnabled;
1572  }
1573  else if (layer->GetType() == LayerType::Output)
1574  {
1575  isOutputLayer = true;
1576  }
1577 
1578  unsigned int slotIndex = 0;
1579  // Create a tensor handle for each output slot of a layer
1580  // Once we create it, we start managing its lifetime
1581  for (auto& slot : layer->GetOutputSlots())
1582  {
1583  for (unsigned int i = 0; i < slot.GetNumConnections(); ++i)
1584  {
1585  if ((slot.GetConnection(i)->GetOwningLayer().GetType() == LayerType::Output))
1586  {
1587  if (!isConnectedToOutputLayer)
1588  {
1589  isConnectedToOutputLayer = true;
1590  // If Export is enabled disable memory management, so we can export, otherwise we do a copy
1591  isMemoryManaged = !m_NetworkProperties.m_ExportEnabled;
1592  }
1593  else
1594  {
1595  // Importing in this case would likely cause unexpected behaviour, so we disallow it.
1596  ARMNN_LOG(warning) <<
1597  fmt::format("Layer name: '{0}' guid: '{1}' has two or more OutputLayers connected to it. "
1598  "This will prevent importing on the connected OutputLayers.",
1599  layer->GetName(), layer->GetGuid());
1600  isMemoryManaged = true;
1601  }
1602  }
1603  }
1604 
1605  ITensorHandle* tensorHandle;
1606  if (isMemoryManaged)
1607  {
1608  managedTensorHandles.emplace_back(GetTensorHandle(layer, slot));
1609  tensorHandle = managedTensorHandles.back().get();
1610  }
1611  else
1612  {
1613  unmanagedTensorHandles.emplace_back(GetTensorHandle(layer, slot));
1614  tensorHandle = unmanagedTensorHandles.back().get();
1615  }
1616 
1617  workingMemDescriptor.m_Outputs.push_back(tensorHandle);
1618 
1619  HandleInfo& handleInfo = outputToHandleInfoMap[&slot];
1620  handleInfo.m_TensorHandle = tensorHandle;
1621 
1622  // Store the coordinates of the current layer's OutputSlot that is connected to the OutputLayer
1623  if (isConnectedToOutputLayer)
1624  {
1625  handleInfo.m_IsOutputLayerHandle = true;
1626  handleInfo.m_OutputMemDescriptorCoords.m_OutputSlotCoords = {layerIndex, slotIndex};
1627  }
1628  // Store the LayerBindingId of the InputLayer
1629  if (isInputLayer)
1630  {
1631  handleInfo.m_IsInputLayerHandle = true;
1632  LayerBindingId bindingId = static_cast<BindableLayer*>(layer)->GetBindingId();
1633  handleInfo.m_InputMemDescriptorCoords.m_LayerBindingId = bindingId;
1634  }
1635  slotIndex++;
1636  }
1637  // Loop through the input slots in the same layer and decrement the reference counter associated
1638  // to each tensor handle we encounter.
1639  // Once it reaches zero, the lifetime of the tensor handle has ended, and we mark its memory as available
1640  // so that the next tensor handle with a non overlapping lifetime can share its memory.
1641  for (auto& slot : layer->GetInputSlots())
1642  {
1643  ARMNN_ASSERT(slot.GetConnection());
1644  auto outputSlot = slot.GetConnectedOutputSlot();
1645  auto key = outputSlot->GetOwningLayer().GetGuid();
1646 
1647  // Constant layers execution and management is handled during loaded network construction
1648  auto found = m_ConstantTensorHandles.find(key);
1649  if (found != m_ConstantTensorHandles.end())
1650  {
1651  ITensorHandle* tensorHandle = found->second;
1652  workingMemDescriptor.m_Inputs.push_back(tensorHandle);
1653 
1654  // Odd case where a constant layer is connected to an output layer
1655  // We will need to create a HandleInfo to track it
1656  if (isOutputLayer)
1657  {
1658  LayerBindingId bindingId = static_cast<BindableLayer*>(layer)->GetBindingId();
1659 
1660  HandleInfo& handleInfo = outputToHandleInfoMap[outputSlot];
1661  handleInfo.m_TensorHandle = tensorHandle;
1662  handleInfo.m_IsOutputLayerHandle = true;
1663  handleInfo.m_OutputMemDescriptorCoords.m_LayerBindingIds.push_back(bindingId);
1664  handleInfo.m_OutputMemDescriptorCoords.m_InputSlotCoords.push_back({layerIndex, 0});
1665  }
1666  continue;
1667  }
1668 
1669  HandleInfo& handleInfo = outputToHandleInfoMap.at(outputSlot);
1670 
1671  ITensorHandle* inputTensorHandle = handleInfo.m_TensorHandle;
1672  workingMemDescriptor.m_Inputs.push_back(inputTensorHandle);
1673 
1674  // Store the LayerBindingId of the OutputLayer
1675  if (isOutputLayer)
1676  {
1677  LayerBindingId bindingId = static_cast<BindableLayer*>(layer)->GetBindingId();
1678  handleInfo.m_OutputMemDescriptorCoords.m_LayerBindingIds.push_back(bindingId);
1679  handleInfo.m_OutputMemDescriptorCoords.m_InputSlotCoords.push_back({layerIndex, 0});
1680  }
1681  // In this case the layer is not an Output Layer but shares its input tensorhandle with an OutputLayer
1682  // It will need to be updated as well, if we swap out the tensorhandle
1683  else if (handleInfo.m_IsOutputLayerHandle)
1684  {
1685  handleInfo.m_OutputMemDescriptorCoords.m_InputSlotCoords.push_back({layerIndex, slot.GetSlotIndex()});
1686  }
1687 
1688  // Store the coordinates of the InputSlots connected to the InputLayer
1689  // There can be more than one InputSlot connected to an InputLayer, so we use a vector
1690  if (handleInfo.m_IsInputLayerHandle)
1691  {
1692  std::pair<LayerGuid, unsigned int> connectionLocation{layerIndex, slot.GetSlotIndex()};
1693  handleInfo.m_InputMemDescriptorCoords.m_InputSlotCoords.emplace_back(connectionLocation);
1694  }
1695  }
1696  workingMemDescriptorMap.insert({layer->GetGuid(), workingMemDescriptor});
1697 
1698  // Input/Output layers/workloads will not be executed, so the descriptor is not added to workingMemDescriptors
1699  // However we will still need to manage the tensorHandle
1700  if (!isInputLayer)
1701  {
1702  workingMemDescriptors.push_back(workingMemDescriptor);
1703  layerIndex++;
1704  }
1705  }
1706 
1707  std::vector<std::pair<std::shared_ptr<TensorMemory>, MemorySource>> tensorMemory;
1708 
1709  auto externalMemoryManager = CreateExternalMemoryManger(tensorMemory);
1710 
1711  // Sort m_TensorMemory, so it's order matches the outputSlot order
1712  std::sort(tensorMemory.begin(), tensorMemory.end(),
1713  [](const std::pair<std::shared_ptr<TensorMemory>, MemorySource>& lhs,
1714  const std::pair<std::shared_ptr<TensorMemory>, MemorySource>& rhs)
1715  {
1716  return lhs.first->m_OutputSlotId < rhs.first->m_OutputSlotId;
1717  });
1718 
1719  std::vector<WorkingMemHandle::InputMemDescriptorCoords> inputConnectionsInfo;
1720  std::vector<WorkingMemHandle::OutputMemDescriptorCoords> outputConnectionsInfo;
1721 
1722  for (const auto& handleInfo: outputToHandleInfoMap)
1723  {
1724  if (handleInfo.second.m_IsOutputLayerHandle)
1725  {
1726  outputConnectionsInfo.emplace_back(handleInfo.second.m_OutputMemDescriptorCoords);
1727  }
1728 
1729  if (handleInfo.second.m_IsInputLayerHandle)
1730  {
1731  inputConnectionsInfo.emplace_back(handleInfo.second.m_InputMemDescriptorCoords);
1732  }
1733  }
1734 
1735  return std::make_unique<WorkingMemHandle>(networkId,
1736  inputConnectionsInfo,
1737  outputConnectionsInfo,
1738  workingMemDescriptors,
1739  workingMemDescriptorMap,
1740  std::move(externalMemoryManager),
1741  std::move(tensorMemory),
1742  std::move(managedTensorHandles),
1743  std::move(unmanagedTensorHandles));
1744 }
1745 
1747 {
1748  for (auto&& workloadPtr: m_WorkloadQueue)
1749  {
1750  workloadPtr.get()->RegisterDebugCallback(func);
1751  }
1752 }
1753 
1754 
1755 void LoadedNetwork::CreateMemoryProfileAsync()
1756 {
1757  struct PartialBlock
1758  {
1759  unsigned int m_StartOfLife;
1760  unsigned int m_Lifetime;
1761 
1762  size_t m_MemSize;
1763  unsigned int m_Index;
1764 
1765  BackendId m_BackendId;
1766  };
1767 
1768  auto align = [](size_t numToAlign)
1769  {
1770  const size_t alignment = sizeof(float);
1771  return ((numToAlign + alignment - 1) / alignment) * alignment;
1772  };
1773 
1774  std::unordered_map<const OutputSlot*, PartialBlock> memBlockTrackerMap;
1775 
1776  const bool inputImportingEnabled = m_NetworkProperties.m_InputSource != MemorySource::Undefined;
1777  const bool outputImportingEnabled = m_NetworkProperties.m_OutputSource != MemorySource::Undefined;
1778 
1779  unsigned int timestep = 0;
1780  unsigned int outputIndex = 0;
1781  Graph& order = m_OptimizedNetwork->pOptimizedNetworkImpl->GetGraph().TopologicalSort();
1782 
1783  for (auto&& layer : order)
1784  {
1785  const LayerType& layerType = layer->GetType();
1786  // Don't manage memory if importing.
1787  if (layerType == LayerType::Input && inputImportingEnabled)
1788  {
1789  continue;
1790  }
1791  // Don't manage memory if importing.
1792  if (layerType == LayerType::Output && outputImportingEnabled
1793  && layer->GetInputSlot(0).GetConnectedOutputSlot()->GetNumConnections() == 1)
1794  {
1795  continue;
1796  }
1797  // Because Constant Layer memory can not be shared, the memory must persist for the lifetime of execution,
1798  // management is done separately.
1799  if (layerType == LayerType::Constant)
1800  {
1801  continue;
1802  }
1803 
1804  BackendId backendId = layer->GetBackendId();
1805  for (auto& outputSlot : layer->GetOutputSlots())
1806  {
1807  if (!m_SupportsExternallyManagedMemory[backendId])
1808  {
1809  continue;
1810  }
1811 
1812  PartialBlock partialBlock;
1813 
1814  partialBlock.m_StartOfLife = timestep;
1815 
1816  size_t alignedSize = align(outputSlot.GetOutputHandler().GetTensorInfo().GetNumBytes());
1817  partialBlock.m_MemSize = alignedSize;
1818  partialBlock.m_Index = outputIndex++;
1819  partialBlock.m_Lifetime = outputSlot.GetNumConnections();
1820  partialBlock.m_BackendId = backendId;
1821 
1822  if (partialBlock.m_Lifetime == 0)
1823  {
1824  m_MemBlockMap[partialBlock.m_BackendId].emplace_back(partialBlock.m_StartOfLife,
1825  partialBlock.m_StartOfLife,
1826  partialBlock.m_MemSize,
1827  0,
1828  partialBlock.m_Index);
1829  }
1830  else
1831  {
1832  memBlockTrackerMap[&outputSlot] = partialBlock;
1833  }
1834  }
1835 
1836  for (auto& inputSlot : layer->GetInputSlots())
1837  {
1838  const Layer& connectedInputLayer = inputSlot.GetConnectedOutputSlot()->GetOwningLayer();
1839  const LayerType& owningLayerType = connectedInputLayer.GetType();
1840 
1841  if (owningLayerType == LayerType::Constant)
1842  {
1843  continue;
1844  }
1845  if (inputImportingEnabled && owningLayerType == LayerType::Input)
1846  {
1847  continue;
1848  }
1849 
1850  auto outputSlot = inputSlot.GetConnectedOutputSlot();
1851 
1852  PartialBlock& partialBlock = memBlockTrackerMap.at(outputSlot);
1853 
1854  auto& lifetime = partialBlock.m_Lifetime;
1855  --lifetime;
1856 
1857  if (lifetime == 0)
1858  {
1859  m_MemBlockMap[partialBlock.m_BackendId].emplace_back(partialBlock.m_StartOfLife,
1860  timestep,
1861  partialBlock.m_MemSize,
1862  0,
1863  partialBlock.m_Index);
1864  }
1865  }
1866  ++timestep;
1867  }
1868 }
1869 
1870 void LoadedNetwork::CreateMemoryProfile()
1871 {
1872  // Finds the first TensorHandle ancestor of a SubTensorHandle. If the ITensorHandle provided
1873  // is a TensorHandle, the function just returns it
1874  auto TraceSubTensorHandleAncestry = [](ITensorHandle* const subTensorHandle)
1875  {
1876  ITensorHandle* ancestor = subTensorHandle;
1877  while (ancestor && ancestor->GetParent())
1878  {
1879  ancestor = ancestor->GetParent();
1880  }
1881  return ancestor;
1882  };
1883 
1884  struct PartialBlock
1885  {
1886  unsigned int m_StartOfLife;
1887  unsigned int m_Lifetime;
1888 
1889  size_t m_MemSize;
1890  unsigned int m_Index;
1891 
1892  BackendId m_BackendId;
1893  };
1894 
1895  auto align = [](size_t numToAlign)
1896  {
1897  const size_t alignment = sizeof(float);
1898  return ((numToAlign + alignment - 1) / alignment) * alignment;
1899  };
1900 
1901  std::unordered_map<ITensorHandle*, PartialBlock> memBlockTrackerMap;
1902 
1903  const bool inputImportingEnabled = m_NetworkProperties.m_InputSource != MemorySource::Undefined;
1904  const bool outputImportingEnabled = m_NetworkProperties.m_OutputSource != MemorySource::Undefined;
1905 
1906  unsigned int timestep = 0;
1907  unsigned int outputIndex = 0;
1908  Graph& order = m_OptimizedNetwork->pOptimizedNetworkImpl->GetGraph().TopologicalSort();
1909 
1910  for (auto&& layer : order)
1911  {
1912  const LayerType& layerType = layer->GetType();
1913  // Don't manage memory if importing.
1914  if (layerType == LayerType::Input && inputImportingEnabled)
1915  {
1916  continue;
1917  }
1918  // Don't manage memory if importing.
1919  if (layerType == LayerType::Output && outputImportingEnabled
1920  && layer->GetInputSlot(0).GetConnectedOutputSlot()->GetNumConnections() == 1)
1921  {
1922  continue;
1923  }
1924  // Because Constant Layer memory can not be shared, the memory must persist for the lifetime of execution,
1925  // management is done separately.
1926  if (layerType == LayerType::Constant)
1927  {
1928  continue;
1929  }
1930 
1931  BackendId backendId = layer->GetBackendId();
1932  for (auto& outputSlot : layer->GetOutputSlots())
1933  {
1934  if (!m_SupportsExternallyManagedMemory[backendId])
1935  {
1936  continue;
1937  }
1938 
1939  ITensorHandle* tensorHandle = outputSlot.GetOutputHandler().GetData();
1940  tensorHandle = TraceSubTensorHandleAncestry(tensorHandle);
1941 
1942  if (memBlockTrackerMap.find(tensorHandle) == memBlockTrackerMap.end())
1943  {
1944  PartialBlock partialBlock;
1945 
1946  partialBlock.m_StartOfLife = timestep;
1947 
1948  size_t alignedSize = align(outputSlot.GetOutputHandler().GetTensorInfo().GetNumBytes());
1949  partialBlock.m_MemSize = alignedSize;
1950  partialBlock.m_Index = outputIndex++;
1951  partialBlock.m_Lifetime = outputSlot.GetNumConnections();
1952  partialBlock.m_BackendId = backendId;
1953 
1954  if (partialBlock.m_Lifetime == 0)
1955  {
1956  m_MemBlockMap[partialBlock.m_BackendId].emplace_back(partialBlock.m_StartOfLife,
1957  partialBlock.m_StartOfLife,
1958  partialBlock.m_MemSize,
1959  0,
1960  partialBlock.m_Index);
1961  }
1962  else
1963  {
1964  memBlockTrackerMap[tensorHandle] = partialBlock;
1965  }
1966  m_Tensorhandles.push_back(tensorHandle);
1967 
1968  }
1969  else
1970  {
1971  memBlockTrackerMap.at(tensorHandle).m_Lifetime += outputSlot.GetNumConnections();
1972  }
1973  }
1974 
1975  for (auto& inputSlot : layer->GetInputSlots())
1976  {
1977  const Layer& connectedInputLayer = inputSlot.GetConnectedOutputSlot()->GetOwningLayer();
1978  const LayerType& owningLayerType = connectedInputLayer.GetType();
1979 
1980  if (owningLayerType == LayerType::Constant)
1981  {
1982  continue;
1983  }
1984  if (inputImportingEnabled && owningLayerType == LayerType::Input)
1985  {
1986  continue;
1987  }
1988  if (!m_SupportsExternallyManagedMemory[connectedInputLayer.GetBackendId()])
1989  {
1990  continue;
1991  }
1992 
1993  auto outputSlot = inputSlot.GetConnectedOutputSlot();
1994 
1995  ITensorHandle* tensorHandle = outputSlot->GetOutputHandler().GetData();
1996  tensorHandle = TraceSubTensorHandleAncestry(tensorHandle);
1997 
1998  PartialBlock& partialBlock = memBlockTrackerMap.at(tensorHandle);
1999 
2000  auto& lifetime = partialBlock.m_Lifetime;
2001  --lifetime;
2002 
2003  if (lifetime == 0)
2004  {
2005  m_MemBlockMap[partialBlock.m_BackendId].emplace_back(partialBlock.m_StartOfLife,
2006  timestep,
2007  partialBlock.m_MemSize,
2008  0,
2009  partialBlock.m_Index);
2010  }
2011  }
2012  ++timestep;
2013  }
2014 
2015 }
2016 
2017 std::unique_ptr<MemoryManager> LoadedNetwork::CreateExternalMemoryManger(
2018  std::vector<std::pair<std::shared_ptr<TensorMemory>, MemorySource>>& tensorMemoryVec)
2019 {
2020  std::unique_ptr<MemoryManager> memoryManager = std::make_unique<MemoryManager>();
2021  auto allocatorMap = BackendRegistryInstance().GetAllocators();
2022 
2023  for (auto& backend : m_MemBinMap)
2024  {
2025  std::vector<BufferStorage> bufferStorageVec;
2026 
2027  std::shared_ptr<ICustomAllocator> backendAllocator;
2028  if (allocatorMap.find(backend.first) != allocatorMap.end())
2029  {
2030  backendAllocator = allocatorMap[backend.first];
2031  }
2032  else
2033  {
2034  backendAllocator = m_Backends[backend.first]->GetDefaultAllocator();
2035  }
2036 
2037  for (auto& memBin : backend.second)
2038  {
2039  BufferStorage bufferStorage;
2040  bufferStorage.m_BufferSize = memBin.m_MemSize;
2041  bufferStorage.m_TensorMemoryVector.reserve(memBin.m_MemBlocks.size());
2042 
2043  for (auto& memBlock : memBin.m_MemBlocks)
2044  {
2045  auto tensorMemory = std::make_shared<TensorMemory>(TensorMemory{memBlock.m_Offset, memBlock.m_Index});
2046 
2047  tensorMemoryVec.emplace_back(tensorMemory, backendAllocator->GetMemorySourceType());
2048  bufferStorage.m_TensorMemoryVector.emplace_back(tensorMemory);
2049  }
2050 
2051  bufferStorageVec.emplace_back(std::move(bufferStorage));
2052  }
2053 
2054  memoryManager->StoreMemToAllocate(bufferStorageVec, backendAllocator, 4);
2055  }
2056 
2057  return memoryManager;
2058 }
2059 
2060 LayerBindingId LoadedNetwork::ValidateImportedInputID(ImportedInputId id)
2061 {
2062  try
2063  {
2064  const auto& importedTensorHandlePin = m_PreImportedInputHandles.at(id);
2065  if (!importedTensorHandlePin.m_TensorHandle)
2066  {
2067  throw InvalidArgumentException(fmt::format("LoadedNetwork::Execute:"
2068  "PreImportedInput: {} has been deleted", id));
2069  }
2070  return importedTensorHandlePin.m_LayerBindingId;
2071  }
2072  catch (const std::out_of_range&)
2073  {
2074  throw InvalidArgumentException(fmt::format("LoadedNetwork::Execute: Unknown ImportedInputId: {}", id));
2075  }
2076 }
2077 
2078 LayerBindingId LoadedNetwork::ValidateImportedOutputID(ImportedOutputId id)
2079 {
2080  try
2081  {
2082  const auto& importedTensorHandlePin = m_PreImportedOutputHandles.at(id);
2083  if (!importedTensorHandlePin.m_TensorHandle)
2084  {
2085  throw InvalidArgumentException(fmt::format("LoadedNetwork::Execute: "
2086  "PreImportedOutput: {} has been deleted", id));
2087  }
2088  return importedTensorHandlePin.m_LayerBindingId;
2089  }
2090  catch (const std::out_of_range&)
2091  {
2092  throw InvalidArgumentException(fmt::format("LoadedNetwork::Execute: Unknown ImportedOutputId: {}", id));
2093  }
2094 }
2095 
2096 }
Status Execute(const InputTensors &inputTensors, const OutputTensors &outputTensors, IWorkingMemHandle &workingMemHandle, std::vector< ImportedInputId > preImportedInputs={}, std::vector< ImportedOutputId > preImportedOutputs={})
Thread safe execution of the loaded network.
std::vector< std::shared_ptr< TensorMemory > > m_TensorMemoryVector
Vector of pointer to .
std::unique_ptr< IWorkingMemHandle > CreateWorkingMemHandle(NetworkId networkId)
Create a new unique WorkingMemHandle object.
bool HasCapability(const std::string &name, const BackendCapabilities &capabilities)
Convenience function to check if a capability exists in a BackendCapabilites struct.
virtual bool Import(void *memory, MemorySource source)
Import externally allocated memory.
FactoryFunction GetFactory(const BackendId &id) const
ConstIteratorInputs begin() const
Definition: Graph.hpp:63
std::vector< ImportedOutputId > ImportOutputs(const OutputTensors &outputTensors)
std::unique_ptr< IWorkloadFactory > IWorkloadFactoryPtr
unsigned int GetNumInputSlots() const override
Returns the number of connectable input slots.
Definition: Layer.hpp:313
static std::unique_ptr< TimelineUtilityMethods > GetTimelineUtils(ProfilingService &profilingService)
static ProfilerManager & GetInstance()
Definition: Profiling.cpp:568
#define ARMNN_NO_DEPRECATE_WARN_BEGIN
Definition: Deprecated.hpp:33
virtual IMemoryManagerUniquePtr CreateMemoryManager() const
LayerBindingId GetBindingId() const
Definition: Layer.hpp:444
virtual unsigned int GetImportFlags() const
Get flags describing supported import sources.
const armnn::Tensor GetOutputTensor(const LayerBindingId layerId, const OutputTensors &outputTensors)
MemoryOptimizerStrategiesMapRef GetMemoryOptimizerStrategies()
unsigned int ImportedOutputId
Definition: Types.hpp:279
WorkingMemDescriptor & GetWorkingMemDescriptorAt(unsigned int id) override
Get the WorkingMemDescriptor at an index.
size_t m_Offset
Number of bytes the value is away from the .m_Buffer.
virtual void Allocate()=0
Indicate to the memory manager that this resource is no longer active.
TensorInfo GetInputTensorInfo(LayerBindingId layerId) const
#define ARMNN_LOG(severity)
Definition: Logging.hpp:202
size_t m_BufferSize
Total size of the buffer.
ITensorHandle * GetOutputHandle(LayerBindingId layerBindingId) const
BackendRegistry & BackendRegistryInstance()
std::vector< std::pair< LayerBindingId, class ConstTensor > > InputTensors
Definition: Tensor.hpp:392
const ProfilingDetailsMethod m_OutputNetworkDetailsMethod
Definition: IRuntime.hpp:93
unsigned int MemorySourceFlags
MemoryType GetMemoryArea() const
Definition: Tensor.hpp:305
size_t GetNumOutputs() const
Definition: Graph.hpp:184
void CopyToOutputTensor(const Tensor &outputTensor, ITensorHandle *outputTensorHandle)
TensorInfo GetOutputTensorInfo(LayerBindingId layerId) const
Copyright (c) 2021 ARM Limited and Contributors.
void IgnoreUnused(Ts &&...)
const std::vector< InputSlot > & GetInputSlots() const
Definition: Layer.hpp:237
std::function< void(LayerGuid guid, unsigned int slotIndex, ITensorHandle *tensorHandle)> DebugCallbackFunction
Define the type of callback for the Debug layer to call.
Definition: Types.hpp:357
unsigned int GetNumOutputSlots() const override
Returns the number of connectable output slots.
Definition: Layer.hpp:314
int LayerBindingId
Type of identifiers for bindable layers (inputs, outputs).
Definition: Types.hpp:277
#define ARMNN_SCOPED_PROFILING_EVENT(backendId, name)
Definition: Profiling.hpp:220
virtual const BackendId & GetId() const =0
ConstIteratorOutputs begin() const
Definition: Graph.hpp:82
A tensor defined by a TensorInfo (shape and data type) and a mutable backing store.
Definition: Tensor.hpp:319
virtual IWorkloadFactoryPtr CreateWorkloadFactory(const IMemoryManagerSharedPtr &memoryManager=nullptr) const =0
unsigned int GetNumConnections() const override
Definition: Layer.hpp:138
const InputSlot & GetInputSlot(unsigned int index) const override
Get a const input slot handle by slot index.
Definition: Layer.hpp:316
std::vector< TensorInfo > m_InputTensorInfos
const std::vector< std::vector< ITensorHandle * >::iterator > & GetOutputConnection(LayerBindingId layerBindingId) const
#define ARMNN_NO_DEPRECATE_WARN_END
Definition: Deprecated.hpp:34
#define ARMNN_ASSERT_MSG(COND, MSG)
Definition: Assert.hpp:15
bool SupportsTensorAllocatorAPI() const
#define ARMNN_SCOPED_HEAP_PROFILING(TAG)
int NetworkId
Definition: IRuntime.hpp:25
A tensor defined by a TensorInfo (shape and data type) and an immutable backing store.
Definition: Tensor.hpp:327
virtual ITensorHandle * GetParent() const =0
Get the parent tensor if this is a subtensor.
std::vector< std::pair< LayerBindingId, class Tensor > > OutputTensors
Definition: Tensor.hpp:393
const std::string & GetNameStr() const
Definition: Layer.hpp:220
LayerType GetType() const override
Returns the armnn::LayerType of this layer.
Definition: Layer.hpp:265
Status
enumeration
Definition: Types.hpp:29
const std::vector< std::vector< ITensorHandle * >::iterator > & GetInputConnections(LayerBindingId layerBindingId) const
#define ARMNN_ASSERT(COND)
Definition: Assert.hpp:14
const OutputSlot * GetConnectedOutputSlot() const
Definition: Layer.hpp:55
void ClearImportedInputs(const std::vector< ImportedInputId > inputIds)
std::vector< TensorInfo > m_OutputTensorInfos
static bool IsLayerSupported(const BackendId &backendId, const IConnectableLayer &layer, Optional< DataType > dataType, std::string &outReasonIfUnsupported)
std::vector< std::unique_ptr< IWorkload > > WorkloadQueue
const TensorInfo & GetInfo() const
Definition: Tensor.hpp:295
#define CHECK_LOCATION()
Definition: Exceptions.hpp:209
const BackendId & GetBackendId() const
Definition: Layer.hpp:269
void Allocate() override
Allocate the backing memory required for execution.
const std::vector< OutputSlot > & GetOutputSlots() const
Definition: Layer.hpp:238
OutputLayersAccessor GetOutputLayers() const
Returns a wrapper object with begin(), end() methods to iterate over the output layers in a range-bas...
Definition: Graph.hpp:192
unsigned int ImportedInputId
Definition: Types.hpp:278
Status EnqueueWorkload(const InputTensors &inputTensors, const OutputTensors &outputTensors)
Single thread execution of the loaded network.
void RegisterProfiler(IProfiler *profiler)
Definition: Profiling.cpp:575
virtual const void * Map(bool blocking=true) const =0
Map the tensor data for access.
std::vector< LayerBindingId > & GetBindingIdVector()
profiling::ProfilingGuid GetNetworkGuid()
std::unordered_map< BackendId, std::shared_ptr< ICustomAllocator > > GetAllocators()
virtual BackendCapabilities GetCapabilities() const
Returns a BackendCapability if the backend lists the capability The BackendCapability must then be in...
virtual void Unmap() const =0
Unmap the tensor data.
bool IsAllocated() override
IsAllocated returns true if the backing memory is currently allocated.
std::vector< ITensorHandle * > m_Outputs
Base class for all ArmNN exceptions so that users can filter to just those.
Definition: Exceptions.hpp:46
const OutputHandler & GetOutputHandler(unsigned int i=0) const
Definition: Layer.hpp:225
MemorySource
Define the Memory Source to reduce copies.
Definition: Types.hpp:217
const std::string & Get() const
Definition: BackendId.hpp:138
void RegisterDebugCallback(const DebugCallbackFunction &func)
ConstIteratorOutputs end() const
Definition: Graph.hpp:88
std::vector< ImportedInputId > ImportInputs(const InputTensors &inputTensors)
Contains information about TensorInfos of a layer.
const char * GetName() const override
Returns the name of the layer.
Definition: Layer.hpp:311
ITensorHandleFactory::FactoryId GetTensorHandleFactoryId() const
Definition: Layer.cpp:176
bool CheckFlag(MemorySourceFlags flags, MemorySource source)
void CopyTensorContentsGeneric(const ITensorHandle *srcTensor, ITensorHandle *dstTensor, CopyFunc copy)
Graph & TopologicalSort()
Sorts layers in topological order and return this.
Definition: Graph.hpp:180
InputLayersAccessor GetInputLayers() const
Returns a wrapper object with begin(), end() methods to iterate over the input layers in a range-base...
Definition: Graph.hpp:188
std::vector< ITensorHandle * > m_Inputs
size_t GetNumLayers() const
Definition: Graph.hpp:194
ConstIteratorInputs end() const
Definition: Graph.hpp:68
const armnn::ConstTensor GetInputTensor(const LayerBindingId layerId, const InputTensors &inputTensors)
const TensorInfo & GetTensorInfo(const ITensorHandle *tensorHandle)
float32 helpers
static std::unique_ptr< LoadedNetwork > MakeLoadedNetwork(std::unique_ptr< IOptimizedNetwork > net, std::string &errorMessage, const INetworkProperties &networkProperties, profiling::ProfilingService &profilingService)
const TensorInfo & GetTensorInfo() const override
Definition: Layer.cpp:63
ITensorHandle * GetInputHandle(LayerBindingId layerBindingId) const
size_t GetNumInputs() const
Definition: Graph.hpp:183
virtual std::unique_ptr< ITensorHandle > CreateTensorHandle(const TensorInfo &tensorInfo) const =0
static const FactoryId LegacyFactoryId
const bool m_ProfilingEnabled
Definition: IRuntime.hpp:91
void ClearImportedOutputs(const std::vector< ImportedOutputId > outputIds)
LayerType
When adding a new layer, adapt also the LastLayer enum value in the enum class LayerType below...
Definition: Types.hpp:443
LayerGuid GetGuid() const final
Returns the unique id of the layer.
Definition: Layer.hpp:322