ArmNN
 21.08
LoadedNetwork.cpp
Go to the documentation of this file.
1 //
2 // Copyright © 2017 Arm Ltd and Contributors. All rights reserved.
3 // SPDX-License-Identifier: MIT
4 //
5 
6 #include "LoadedNetwork.hpp"
7 #include "Layer.hpp"
8 #include "Graph.hpp"
9 #include <Processes.hpp>
10 #include "Profiling.hpp"
11 #include "HeapProfiling.hpp"
12 #include "WorkingMemHandle.hpp"
13 
15 #include <armnn/Logging.hpp>
16 #include <armnn/utility/Assert.hpp>
17 
22 
23 #include <fmt/format.h>
24 
25 namespace armnn
26 {
27 
28 using namespace std;
29 using namespace armnn::profiling;
30 
31 namespace
32 {
33 
34 template <typename ExceptionType>
35 std::string ToErrorMessage(const char * prefix, const ExceptionType & error)
36 {
37  std::stringstream ss;
38  ss << prefix << " " << error.what();
39  return ss.str();
40 }
41 
42 void AddLayerStructure(std::unique_ptr<TimelineUtilityMethods>& timelineUtils,
43  const Layer& layer,
44  ProfilingGuid networkGuid)
45 {
46  // Add layer to the post-optimisation network structure
47  std::string layerName = layer.GetNameStr().empty() ? "<Unnamed>" : layer.GetNameStr();
48  timelineUtils->CreateNamedTypedChildEntity(layer.GetGuid(),
49  networkGuid,
50  layerName,
51  LabelsAndEventClasses::LAYER_GUID);
52  for (auto&& input : layer.GetInputSlots())
53  {
54  const IOutputSlot* source = input.GetConnectedOutputSlot();
55  ARMNN_ASSERT(source != NULL);
56  timelineUtils->CreateConnectionRelationship(ProfilingRelationshipType::RetentionLink,
57  source->GetOwningLayerGuid(),
58  layer.GetGuid());
59  }
60 }
61 
62 void AddWorkloadStructure(std::unique_ptr<TimelineUtilityMethods>& timelineUtils,
63  std::unique_ptr<IWorkload>& workload,
64  const Layer& layer)
65 {
66  // Add workload to the post-optimisation network structure
67  timelineUtils->CreateTypedEntity(workload->GetGuid(), LabelsAndEventClasses::WORKLOAD_GUID);
68  timelineUtils->MarkEntityWithLabel(workload->GetGuid(),
69  layer.GetBackendId().Get(),
70  LabelsAndEventClasses::BACKENDID_GUID);
71 
72  // Link the workload to the layer
73  timelineUtils->CreateRelationship(ProfilingRelationshipType::RetentionLink,
74  layer.GetGuid(),
75  workload->GetGuid(),
76  LabelsAndEventClasses::CHILD_GUID);
77 }
78 
79 } // anonymous
80 
81 std::unique_ptr<LoadedNetwork> LoadedNetwork::MakeLoadedNetwork(std::unique_ptr<IOptimizedNetwork> net,
82  std::string& errorMessage,
83  const INetworkProperties& networkProperties,
84  profiling::ProfilingService& profilingService)
85 {
86  std::unique_ptr<LoadedNetwork> loadedNetwork;
87 
88  auto Fail = [&](const std::exception& error) -> std::unique_ptr<LoadedNetwork>
89  {
90  errorMessage = ToErrorMessage("An error occurred when preparing the network workloads: ", error);
91  ARMNN_LOG(error) << errorMessage;
92 
93  return std::unique_ptr<LoadedNetwork>();
94  };
95 
96  try
97  {
98  loadedNetwork.reset(new LoadedNetwork(std::move(net), networkProperties, profilingService));
99  }
100  catch (const armnn::RuntimeException& error)
101  {
102  return Fail(error);
103  }
104  catch (const armnn::Exception& error)
105  {
106  return Fail(error);
107  }
108  catch (const std::runtime_error& error)
109  {
110  return Fail(error);
111  }
112 
113  return loadedNetwork;
114 }
115 
116 LoadedNetwork::LoadedNetwork(std::unique_ptr<IOptimizedNetwork> net,
117  const INetworkProperties& networkProperties,
118  profiling::ProfilingService& profilingService) :
119  m_OptimizedNetwork(std::move(net)),
120  m_NetworkProperties(networkProperties),
121  m_TensorHandleFactoryRegistry(),
122  m_ProfilingService(profilingService)
123 {
124  // Create a profiler and register it for the current thread.
125  m_Profiler = std::make_shared<IProfiler>();
127 
128  m_Profiler->EnableProfiling(networkProperties.m_ProfilingEnabled);
129 
130  if (networkProperties.m_OutputNetworkDetails) m_Profiler->EnableNetworkDetailsToStdOut();
131 
132  Graph& order = m_OptimizedNetwork->pOptimizedNetworkImpl->GetGraph().TopologicalSort();
133  //First create tensor handlers, backends and workload factories.
134  //Handlers are created before workloads are.
135  //Because workload creation can modify some of the handlers,
136  //(for example the splitter and concat layers).
137  for (auto&& layer : order)
138  {
139  auto const& backendId = layer->GetBackendId();
140  if (m_Backends.count(backendId) == 0)
141  {
142  auto createBackend = BackendRegistryInstance().GetFactory(backendId);
143  auto it = m_Backends.emplace(std::make_pair(backendId, createBackend()));
144 
145  IBackendInternal* backend = it.first->second.get();
146 
147  if (backend->SupportsTensorAllocatorAPI())
148  {
149  auto workloadFactory = backend->CreateWorkloadFactory(
150  m_TensorHandleFactoryRegistry, m_OptimizedNetwork->pOptimizedNetworkImpl->GetModelOptions(),
151  static_cast<MemorySourceFlags>(m_NetworkProperties.m_InputSource),
152  static_cast<MemorySourceFlags>(m_NetworkProperties.m_OutputSource));
153  m_WorkloadFactories.emplace(
154  std::make_pair(backendId, std::make_pair(std::move(workloadFactory), nullptr)));
155  }
156  else
157  {
159  auto workloadFactory = backend->CreateWorkloadFactory(
160  memoryManager, m_OptimizedNetwork->pOptimizedNetworkImpl->GetModelOptions());
161 
162  m_WorkloadFactories.emplace(
163  std::make_pair(backendId, std::make_pair(std::move(workloadFactory), memoryManager)));
164  }
165  }
166  }
167 
168  if (!networkProperties.m_AsyncEnabled)
169  {
170  for (auto&& layer : order)
171  {
172  auto& workloadFactory = GetWorkloadFactory(*layer);
173 
174  switch (layer->GetType())
175  {
176  case LayerType::Input:
178  {
179  // If IsImportEnabled is true then we need to set IsMemoryManaged
180  // to false when creating TensorHandles
181  layer->CreateTensorHandles(m_TensorHandleFactoryRegistry,
182  workloadFactory,
183  !m_NetworkProperties.m_ImportEnabled);
184  break;
185  }
186  default:
187  {
188  // Look for a layer with 1 OutputSlot which has 1 connection and that connection is an Output Layer
189  // If Export is enabled disable memory management so we can export, otherwise we do a copy
190  if ((layer->GetNumOutputSlots() == 1) &&
191  (layer->GetOutputSlots()[0].GetNumConnections() == 1) &&
192  (layer->GetOutputSlots()[0].GetConnection(0)->GetOwningLayer().GetType() == LayerType::Output))
193  {
194  layer->CreateTensorHandles(m_TensorHandleFactoryRegistry,
195  workloadFactory,
196  !m_NetworkProperties.m_ExportEnabled);
197  }
198  else
199  {
200  layer->CreateTensorHandles(m_TensorHandleFactoryRegistry, workloadFactory);
201  }
202  }
203  }
204  }
205  }
206 
207  ProfilingGuid networkGuid = m_OptimizedNetwork->GetGuid();
208  std::unique_ptr<TimelineUtilityMethods> timelineUtils =
209  TimelineUtilityMethods::GetTimelineUtils(m_ProfilingService);
210  if (timelineUtils)
211  {
212  timelineUtils->CreateTypedEntity(networkGuid, LabelsAndEventClasses::NETWORK_GUID);
213  // Mark the network with a start of life event
214  timelineUtils->RecordEvent(networkGuid, LabelsAndEventClasses::ARMNN_PROFILING_SOL_EVENT_CLASS);
215  // and with the process ID
216  int processID = armnnUtils::Processes::GetCurrentId();
217  std::stringstream ss;
218  ss << processID;
219  timelineUtils->MarkEntityWithLabel(networkGuid, ss.str(), LabelsAndEventClasses::PROCESS_ID_GUID);
220  }
221 
222  //Then create workloads.
223  for (auto&& layer : order)
224  {
225  if (timelineUtils)
226  {
227  // Add layer to the post-optimisation network structure
228  AddLayerStructure(timelineUtils, *layer, networkGuid);
229  }
230 
231  const IWorkloadFactory& workloadFactory = GetWorkloadFactory(*layer);
232 
233  switch (layer->GetType())
234  {
235  case LayerType::Input:
236  case LayerType::Output:
237  {
238  // Inputs and outputs are treated in a special way - see EnqueueInput() and EnqueueOutput().
239  break;
240  }
241  default:
242  {
243  auto workload = layer->CreateWorkload(workloadFactory);
244 
245  if (!workload)
246  {
247  const char* const layerName =
248  layer->GetNameStr().length() != 0 ? layer->GetName() : "<Unnamed>";
250  fmt::format("No workload created for layer (name: '{0}' type: '{1}') (compute '{2}')",
251  layerName, static_cast<int>(layer->GetType()), layer->GetBackendId().Get()
252  ));
253  }
254 
255  if (timelineUtils)
256  {
257  // Add workload to the post-optimisation network structure
258  AddWorkloadStructure(timelineUtils, workload, *layer);
259  }
260 
261  // For async networks ConstantWorkloads are managed exclusively by LoadedNetwork
262  // and are separated out from the other workloads
263  if (networkProperties.m_AsyncEnabled && layer->GetType() == LayerType::Constant)
264  {
265  m_ConstantWorkloads[layer->GetGuid()] = std::move(workload);
266  }
267  else
268  {
269  m_WorkloadQueue.push_back(move(workload));
270  }
271 
272  // release the constant data in the layer..
273  layer->ReleaseConstantData();
274  break;
275  }
276  }
277  }
278 
279  for (auto&& workloadFactory : m_WorkloadFactories)
280  {
281  workloadFactory.second.first->AfterWorkloadsCreated();
282  }
283 
284  if (timelineUtils)
285  {
286  // Commit to send the post-optimisation network structure
287  timelineUtils->Commit();
288  }
289 
290  if (!networkProperties.m_AsyncEnabled)
291  {
292  // Set up memory.
293  m_OptimizedNetwork->pOptimizedNetworkImpl->GetGraph().AllocateDynamicBuffers();
294 
295  // Now that the intermediate tensor memory has been set-up,
296  // do any post allocation configuration for each workload.
297  for (auto &workload : m_WorkloadQueue)
298  {
299  workload->PostAllocationConfigure();
300  }
301  }
302  else
303  {
304  AllocateAndExecuteConstantWorkloads();
305  }
306 }
307 
308 void LoadedNetwork::AllocateAndExecuteConstantWorkloads()
309 {
310  Graph& order = m_OptimizedNetwork->pOptimizedNetworkImpl->GetGraph();
311  for (auto&& layer : order)
312  {
313  if (layer->GetType() == LayerType::Constant)
314  {
315  const auto& outSlot = layer->GetOutputSlots()[0];
316  const auto factoryId = outSlot.GetTensorHandleFactoryId();
318  auto& workloadFactory = GetWorkloadFactory(*layer);
319 
320  layer->CreateTensorHandles(m_TensorHandleFactoryRegistry, workloadFactory);
321  ITensorHandle* tensorHandle = outSlot.GetOutputHandler().GetData();
322 
323  m_ConstantTensorHandles[layer->GetGuid()] = tensorHandle;
324  tensorHandle->Allocate();
325 
326  WorkingMemDescriptor memDesc;
327  memDesc.m_Outputs.push_back(tensorHandle);
328  m_ConstantWorkloads[layer->GetGuid()]->ExecuteAsync(memDesc);
329  }
330  }
331 }
332 
333 
335 {
336  Graph& order = m_OptimizedNetwork->pOptimizedNetworkImpl->GetGraph().TopologicalSort();
337  ProfilingGuid networkGuid = m_OptimizedNetwork->GetGuid();
338 
339  std::unique_ptr<TimelineUtilityMethods> timelineUtils =
340  TimelineUtilityMethods::GetTimelineUtils(m_ProfilingService);
341 
342  timelineUtils->CreateTypedEntity(networkGuid, LabelsAndEventClasses::NETWORK_GUID);
343 
344  for (auto&& layer : order)
345  {
346  // Add layer to the post-optimisation network structure
347  AddLayerStructure(timelineUtils, *layer, networkGuid);
348  switch (layer->GetType())
349  {
350  case LayerType::Input:
351  case LayerType::Output:
352  {
353  // Inputs and outputs are treated in a special way - see EnqueueInput() and EnqueueOutput().
354  break;
355  }
356  default:
357  {
358  for (auto& workload : m_WorkloadQueue)
359  {
360  // Add workload to the post-optimisation network structure
361  AddWorkloadStructure(timelineUtils, workload, *layer);
362  }
363  break;
364  }
365  }
366  }
367  // Commit to send the post-optimisation network structure
368  timelineUtils->Commit();
369 }
370 
371 profiling::ProfilingGuid LoadedNetwork::GetNetworkGuid()
372 {
373  return m_OptimizedNetwork->GetGuid();
374 }
375 
377 {
378  for (auto&& inputLayer : m_OptimizedNetwork->pOptimizedNetworkImpl->GetGraph().GetInputLayers())
379  {
380  ARMNN_ASSERT_MSG(inputLayer->GetNumOutputSlots() == 1, "Input layer should have exactly 1 output slot");
381  if (inputLayer->GetBindingId() == layerId)
382  {
383  return inputLayer->GetOutputSlot(0).GetTensorInfo();
384  }
385  }
386 
387  throw InvalidArgumentException(fmt::format("No input layer is associated with id {}", layerId));
388 }
389 
391 {
392  for (auto&& outputLayer : m_OptimizedNetwork->pOptimizedNetworkImpl->GetGraph().GetOutputLayers())
393  {
394  ARMNN_ASSERT_MSG(outputLayer->GetNumInputSlots() == 1, "Output layer should have exactly 1 input slot");
395  ARMNN_ASSERT_MSG(outputLayer->GetInputSlot(0).GetConnection(), "Input slot on Output layer must be connected");
396  if (outputLayer->GetBindingId() == layerId)
397  {
398  return outputLayer->GetInputSlot(0).GetConnection()->GetTensorInfo();
399  }
400  }
401 
402  throw InvalidArgumentException(fmt::format("No output layer is associated with id {}", layerId));
403 }
404 
405 const IWorkloadFactory& LoadedNetwork::GetWorkloadFactory(const Layer& layer) const
406 {
407  const IWorkloadFactory* workloadFactory = nullptr;
408 
409  auto it = m_WorkloadFactories.find(layer.GetBackendId());
410  if (it == m_WorkloadFactories.end())
411  {
412  throw RuntimeException(fmt::format("No workload factory for {0} to be used for layer: {1}",
413  layer.GetBackendId().Get(),
414  layer.GetNameStr()),
415  CHECK_LOCATION());
416  }
417 
418  workloadFactory = it->second.first.get();
419 
420  ARMNN_ASSERT_MSG(workloadFactory, "No workload factory");
421 
422  std::string reasonIfUnsupported;
424  {},
425  reasonIfUnsupported,
426  m_OptimizedNetwork->pOptimizedNetworkImpl->GetModelOptions()),
427  "Factory does not support layer");
428  IgnoreUnused(reasonIfUnsupported);
429  return *workloadFactory;
430 }
431 
432 namespace {
433 
434 // Non-copyable class owning accelerator-specific tensor data.
435 class TensorPin
436 {
437 public:
438  TensorPin(std::unique_ptr<ITensorHandle> handle, const TensorInfo& info, LayerBindingId id)
439  : m_TensorHandle(std::move(handle))
440  , m_TensorInfo(info)
441  , m_Id(id)
442  {
443  }
444 
445  ITensorHandle* GetTensorHandle() const { return m_TensorHandle.get(); }
446  const TensorInfo& GetTensorInfo() const { return m_TensorInfo; }
447  LayerBindingId GetBindingId() const { return m_Id; }
448 
449 private:
450  std::unique_ptr<ITensorHandle> m_TensorHandle;
451  TensorInfo m_TensorInfo;
452  LayerBindingId m_Id;
453 };
454 
455 static const TensorPin& GetTensorPin(LayerBindingId id,
456  const std::vector<TensorPin>& pins,
457  char const* bindingPointDesc)
458 {
459  auto it = std::find_if(pins.begin(), pins.end(),
460  [id](const TensorPin& pin)
461  {
462  return pin.GetBindingId() == id;
463  });
464 
465  if (it != pins.end())
466  {
467  return *it;
468  }
469  else
470  {
471  throw InvalidArgumentException(fmt::format("No tensor supplied for {0} {1}", bindingPointDesc, id));
472  }
473 }
474 
475 // Stores data that needs to be kept accessible for the entire execution of a workload.
476 class WorkloadData
477 {
478 public:
479  WorkloadData(const InputTensors& inputTensors, const OutputTensors& outputTensors)
480  {
481  m_InputTensorPins.reserve(inputTensors.size());
482  m_OutputTensorPins.reserve(outputTensors.size());
483 
484  for (auto inputTensorPair : inputTensors)
485  {
486  auto inputTensor = inputTensorPair.second;
487 
488  std::unique_ptr<ITensorHandle> tensorHandle =
489  std::make_unique<ConstPassthroughTensorHandle>(inputTensor.GetInfo(),inputTensor.GetMemoryArea());
490  LayerBindingId layerId = inputTensorPair.first;
491 
492  m_InputTensorPins.emplace_back(std::move(tensorHandle), inputTensor.GetInfo(), layerId);
493  }
494 
495  for (auto outputTensorPair : outputTensors)
496  {
497  auto outputTensor = outputTensorPair.second;
498 
499  std::unique_ptr<ITensorHandle> tensorHandle =
500  std::make_unique<PassthroughTensorHandle>(outputTensor.GetInfo(), outputTensor.GetMemoryArea());
501  LayerBindingId layerId = outputTensorPair.first;
502 
503  m_OutputTensorPins.emplace_back(std::move(tensorHandle), outputTensor.GetInfo(), layerId);
504  }
505  }
506 
507  const TensorPin& GetInputTensorPin(LayerBindingId id) const
508  {
509  return GetTensorPin(id, m_InputTensorPins, "input");
510  }
511 
512  const TensorPin& GetOutputTensorPin(LayerBindingId id) const
513  {
514  return GetTensorPin(id, m_OutputTensorPins, "output");
515  }
516 
517 private:
518 
519  std::vector<TensorPin> m_InputTensorPins;
520  std::vector<TensorPin> m_OutputTensorPins;
521 };
522 
523 }
524 
526  const OutputTensors& outputTensors)
527 {
528  const Graph& graph = m_OptimizedNetwork->pOptimizedNetworkImpl->GetGraph();
529 
530  // Walk graph to determine the order of execution.
531  if (graph.GetNumLayers() < 2)
532  {
533  ARMNN_LOG(warning) << "IRuntime::EnqueueWorkload()::Less than two nodes in graph";
534  return Status::Failure;
535  }
536 
537  // Data that must be kept alive for the entire execution of the workload.
538  WorkloadData workloadData(inputTensors, outputTensors);
539 
540  if (graph.GetNumInputs() != inputTensors.size())
541  {
542  throw InvalidArgumentException("Number of inputs provided does not match network.");
543  }
544 
545  // For each input to the network, call EnqueueInput with the data passed by the user.
546  {
548  m_InputQueue.clear();
549  m_InputQueue.reserve(graph.GetNumInputs());
550  for (const BindableLayer* inputLayer : graph.GetInputLayers())
551  {
552  const TensorPin& pin = workloadData.GetInputTensorPin(inputLayer->GetBindingId());
553  EnqueueInput(*inputLayer, pin.GetTensorHandle(), pin.GetTensorInfo());
554  }
555  }
556 
557  // For each output to the network, call EnqueueOutput with the data passed by the user.
558  {
560  m_OutputQueue.clear();
561  m_OutputQueue.reserve(graph.GetNumOutputs());
562  for (const BindableLayer* outputLayer : graph.GetOutputLayers())
563  {
564  const TensorPin& pin = workloadData.GetOutputTensorPin(outputLayer->GetBindingId());
565  EnqueueOutput(*outputLayer, pin.GetTensorHandle(), pin.GetTensorInfo());
566  }
567  }
568 
569  std::unique_ptr<TimelineUtilityMethods> timelineUtils =
570  TimelineUtilityMethods::GetTimelineUtils(m_ProfilingService);
571  ProfilingGuid inferenceGuid = m_ProfilingService.GetNextGuid();
572  if (timelineUtils)
573  {
574  // Add inference timeline trace if profiling is enabled.
575  ProfilingGuid networkGuid = m_OptimizedNetwork->GetGuid();
576  timelineUtils->CreateTypedEntity(inferenceGuid, LabelsAndEventClasses::INFERENCE_GUID);
577  timelineUtils->CreateRelationship(ProfilingRelationshipType::RetentionLink,
578  networkGuid,
579  inferenceGuid,
580  LabelsAndEventClasses::EXECUTION_OF_GUID);
581  timelineUtils->RecordEvent(inferenceGuid, LabelsAndEventClasses::ARMNN_PROFILING_SOL_EVENT_CLASS);
582  }
583 
584  bool executionSucceeded = true;
585 
586  {
587  if (m_ProfilingService.IsProfilingEnabled())
588  {
589  m_ProfilingService.IncrementCounterValue(armnn::profiling::INFERENCES_RUN);
590  }
592  ARMNN_SCOPED_HEAP_PROFILING("Executing");
593  executionSucceeded = Execute(timelineUtils, inferenceGuid);
594  }
595 
596  if (timelineUtils)
597  {
598  // Add end of life of the inference timeline if profiling is enabled.
599  timelineUtils->RecordEvent(inferenceGuid, LabelsAndEventClasses::ARMNN_PROFILING_EOL_EVENT_CLASS);
600  timelineUtils->Commit();
601  }
602  return executionSucceeded ? Status::Success : Status::Failure;
603 }
604 
605 void LoadedNetwork::EnqueueInput(const BindableLayer& layer, ITensorHandle* tensorHandle, const TensorInfo& tensorInfo)
606 {
607  if (layer.GetType() != LayerType::Input)
608  {
609  throw InvalidArgumentException("EnqueueInput: given layer not an InputLayer");
610  }
611 
612  if (tensorHandle == nullptr)
613  {
614  throw InvalidArgumentException("EnqueueInput: tensorHandle must not be NULL");
615  }
616 
617  InputQueueDescriptor inputQueueDescriptor;
619 
620  inputQueueDescriptor.m_Inputs.push_back(tensorHandle);
621  info.m_InputTensorInfos.push_back(tensorInfo);
622 
623  ARMNN_ASSERT_MSG(layer.GetNumOutputSlots() == 1, "Can only handle Input Layer with one output");
624  const OutputHandler& handler = layer.GetOutputHandler();
625  const TensorInfo& outputTensorInfo = handler.GetTensorInfo();
626  ITensorHandle* outputTensorHandle = handler.GetData();
627  ARMNN_ASSERT_MSG(outputTensorHandle != nullptr,
628  "Data should have been allocated.");
629  inputQueueDescriptor.m_Outputs.push_back(outputTensorHandle);
630  info.m_OutputTensorInfos.push_back(outputTensorInfo);
631 
632  MemorySourceFlags importFlags = outputTensorHandle->GetImportFlags();
633  bool needMemCopy = true;
634  if (m_NetworkProperties.m_ImportEnabled) // Try import the input tensor
635  {
636  if(CheckFlag(importFlags, m_NetworkProperties.m_InputSource))
637  {
638  needMemCopy = false;
639  // This assumes a CPU Tensor handle
640  void* mem = tensorHandle->Map(false);
641  if (outputTensorHandle->Import(mem, m_NetworkProperties.m_InputSource))
642  {
643  tensorHandle->Unmap();
644  return; // No need for a workload since the import has been done.
645  }
646  tensorHandle->Unmap();
647  throw MemoryImportException("EnqueueInput: Memory Import failed");
648  }
649  }
650  if (needMemCopy)
651  {
652  // Create a mem copy workload for input since we did not import
653  std::unique_ptr<IWorkload> inputWorkload = std::make_unique<CopyMemGenericWorkload>(inputQueueDescriptor, info);
654 
655  ARMNN_ASSERT_MSG(inputWorkload, "No input workload created");
656 
657  std::unique_ptr<TimelineUtilityMethods> timelineUtils =
658  TimelineUtilityMethods::GetTimelineUtils(m_ProfilingService);
659  if (timelineUtils)
660  {
661  // Add Input Workload to the post-optimisation network structure
662  AddWorkloadStructure(timelineUtils, inputWorkload, layer);
663  timelineUtils->Commit();
664  }
665 
666  m_InputQueue.push_back(move(inputWorkload));
667  }
668 }
669 
670 void LoadedNetwork::EnqueueOutput(const BindableLayer& layer, ITensorHandle* tensorHandle, const TensorInfo& tensorInfo)
671 {
672  if (layer.GetType() != LayerType::Output)
673  {
674  throw InvalidArgumentException("EnqueueOutput: given layer not an OutputLayer");
675  }
676 
677  if (tensorHandle == nullptr)
678  {
679  throw InvalidArgumentException("EnqueueOutput: tensorHandle must not be NULL");
680  }
681 
682  OutputQueueDescriptor outputQueueDescriptor;
684 
685  outputQueueDescriptor.m_Outputs.push_back(tensorHandle);
686  info.m_OutputTensorInfos.push_back(tensorInfo);
687 
688  ARMNN_ASSERT_MSG(layer.GetNumInputSlots() == 1, "Output Layer should have exactly one input.");
689 
690  // Gets the output handler from the previous node.
691  const OutputHandler& outputHandler = layer.GetInputSlots()[0].GetConnectedOutputSlot()->GetOutputHandler();
692 
693  const TensorInfo& inputTensorInfo = outputHandler.GetTensorInfo();
694  ITensorHandle* inputTensorHandle = outputHandler.GetData();
695  ARMNN_ASSERT_MSG(inputTensorHandle != nullptr, "Data should have been allocated.");
696 
697  // Try import the output tensor.
698  // Note: We can only import the output pointer if all of the following hold true:
699  // a) The imported pointer is aligned sufficiently
700  // b) The tensor has zero padding
701  // c) There is only one connection to the OutputSlot and it is to an OutputLayer.
702  // d) The output pointer is allocated via malloc. (Other types will be supported in a later release)
703  // e) m_IsExportEnabled must be set to true
704  bool needMemCopy = true;
705  if (m_NetworkProperties.m_ExportEnabled &&
706  (layer.GetInputSlots()[0].GetConnectedOutputSlot()->GetNumConnections() == 1))
707  {
708  if(layer.GetInputSlots()[0].GetConnectedOutputSlot()->GetOwningLayer().GetType() != LayerType::Input)
709  {
710  MemorySourceFlags importFlags = inputTensorHandle->GetImportFlags();
711  if (CheckFlag(importFlags, m_NetworkProperties.m_OutputSource))
712  {
713  needMemCopy = false;
714  void *mem = tensorHandle->Map(false);
715  bool importOk = inputTensorHandle->Import(mem, m_NetworkProperties.m_OutputSource);
716  tensorHandle->Unmap();
717 
718  if (importOk)
719  {
720  // Insert synchronization workload
721  MemSyncQueueDescriptor syncDesc;
722  syncDesc.m_Inputs.push_back(inputTensorHandle);
723  info.m_InputTensorInfos.push_back(inputTensorInfo);
724  auto syncWorkload = std::make_unique<SyncMemGenericWorkload>(syncDesc, info);
725  ARMNN_ASSERT_MSG(syncWorkload, "No sync workload created");
726  m_OutputQueue.push_back(move(syncWorkload));
727  }
728  else
729  {
730  throw MemoryExportException("EnqueueOutput: Memory Export failed");
731  }
732  }
733  }
734  }
735  if (needMemCopy)
736  {
737  // If we got here then we didn't export the memory, so add an output workload which performs a memcopy.
738  outputQueueDescriptor.m_Inputs.push_back(inputTensorHandle);
739  info.m_InputTensorInfos.push_back(inputTensorInfo);
740 
741  std::unique_ptr<IWorkload> outputWorkload =
742  std::make_unique<CopyMemGenericWorkload>(outputQueueDescriptor, info);
743  ARMNN_ASSERT_MSG(outputWorkload, "No output workload created");
744 
745  std::unique_ptr<TimelineUtilityMethods> timelineUtils =
746  TimelineUtilityMethods::GetTimelineUtils(m_ProfilingService);
747  if (timelineUtils)
748  {
749  // Add Output Workload to the post-optimisation network structure
750  AddWorkloadStructure(timelineUtils, outputWorkload, layer);
751  timelineUtils->Commit();
752  }
753 
754  m_OutputQueue.push_back(move(outputWorkload));
755  }
756 }
757 
758 void LoadedNetwork::AllocateWorkingMemory(std::lock_guard<std::mutex>& lock)
759 {
760  ARMNN_SCOPED_PROFILING_EVENT(Compute::Undefined, "Working Memory Allocation");
761 
762  // this unused parameter makes sure we can only call this function with a valid lock
763  IgnoreUnused(lock);
764 
765  if (m_IsWorkingMemAllocated)
766  {
767  return;
768  }
769  for (auto&& workloadFactory : m_WorkloadFactories)
770  {
771  IBackendInternal::IMemoryManagerSharedPtr memoryManager = workloadFactory.second.second;
772  if (memoryManager)
773  {
774  memoryManager->Acquire();
775  }
776  }
777  m_TensorHandleFactoryRegistry.AquireMemory();
778  m_IsWorkingMemAllocated = true;
779 }
780 
782 {
783  std::lock_guard<std::mutex> lockGuard(m_WorkingMemMutex);
784  if (!m_IsWorkingMemAllocated)
785  {
786  return;
787  }
788  // Informs the memory managers to release memory in it's respective memory group
789  for (auto&& workloadFactory : m_WorkloadFactories)
790  {
791  IBackendInternal::IMemoryManagerSharedPtr memoryManager = workloadFactory.second.second;
792  if (memoryManager)
793  {
794  memoryManager->Release();
795  }
796  }
797  m_TensorHandleFactoryRegistry.ReleaseMemory();
798  m_IsWorkingMemAllocated = false;
799 }
800 
801 bool LoadedNetwork::Execute(std::unique_ptr<TimelineUtilityMethods>& timelineUtils,
802  profiling::ProfilingGuid inferenceGuid)
803 {
804  bool success = true;
805 
806  auto Fail = [&](const std::exception& error)
807  {
808  ARMNN_LOG(error) << "An error occurred attempting to execute a workload: " << error.what();
809  success = false;
810  };
811 
812  try
813  {
814  std::lock_guard<std::mutex> lockGuard(m_WorkingMemMutex);
815  AllocateWorkingMemory(lockGuard);
816 
817  ProfilingDynamicGuid workloadInferenceID(0);
818  auto ExecuteQueue = [&timelineUtils, &workloadInferenceID, &inferenceGuid](WorkloadQueue& queue)
819  {
820  for (auto& workload : queue)
821  {
822  if(timelineUtils)
823  {
824  workloadInferenceID = timelineUtils->RecordWorkloadInferenceAndStartOfLifeEvent(workload->GetGuid(),
825  inferenceGuid);
826  }
827  workload->Execute();
828  if(timelineUtils)
829  {
830  timelineUtils->RecordEndOfLifeEvent(workloadInferenceID);
831  }
832  }
833  };
834 
835  ExecuteQueue(m_InputQueue);
836  ExecuteQueue(m_WorkloadQueue);
837  ExecuteQueue(m_OutputQueue);
838  }
839  catch (const RuntimeException& error)
840  {
841  Fail(error);
842  }
843  catch (const std::runtime_error& error)
844  {
845  Fail(error);
846  }
847 
848  return success;
849 }
850 
851 void LoadedNetwork::EnqueueInput(const BindableLayer& layer,
852  const ConstTensor& inputTensor,
853  WorkingMemHandle& context)
854 {
855  if (layer.GetType() != LayerType::Input)
856  {
857  throw InvalidArgumentException("EnqueueInput: given layer not an InputLayer");
858  }
859  LayerGuid id = layer.GetGuid();
860  WorkingMemDescriptor descriptor = context.GetWorkingMemDescriptor(id);
861 
862  MemorySourceFlags importFlags = descriptor.m_Outputs[0]->GetImportFlags();
863  if (m_NetworkProperties.m_ImportEnabled) // Try import the input tensor
864  {
865  if (CheckFlag(importFlags, m_NetworkProperties.m_InputSource) )
866  {
867  // This assumes a CPU Tensor handle
868  std::unique_ptr<ITensorHandle> tensorHandle =
869  std::make_unique<ConstPassthroughTensorHandle>(inputTensor.GetInfo(),
870  inputTensor.GetMemoryArea());
871 
872  void* mem = tensorHandle->Map(false);
873  if (descriptor.m_Outputs[0]->Import(mem, m_NetworkProperties.m_InputSource))
874  {
875  tensorHandle->Unmap();
876  return;
877  }
878  tensorHandle->Unmap();
879  throw MemoryImportException("EnqueueInput: Memory Import failed");
880  }
881  else
882  {
883  throw MemoryImportException("EnqueueInput: Memory Import failed, backend does not support Import");
884  }
885  }
886  else
887  {
888  std::unique_ptr<ITensorHandle> tensorHandle =
889  std::make_unique<ConstPassthroughTensorHandle>(inputTensor.GetInfo(), inputTensor.GetMemoryArea());
890 
891  auto copyFunc = [](void* dst, const void* src, size_t size)
892  {
893  memcpy(dst, src, size);
894  };
895 
896  for (const auto& input : descriptor.m_Outputs)
897  {
898  CopyTensorContentsGeneric(tensorHandle.get(), input, copyFunc);
899  }
900  }
901 }
902 
903 void LoadedNetwork::EnqueueOutput(const BindableLayer& layer, const Tensor& outputTensor, WorkingMemHandle& handle)
904 {
905  if (layer.GetType() != LayerType::Output)
906  {
907  throw InvalidArgumentException("EnqueueOutput: given layer not an OutputLayer");
908  }
909  ARMNN_ASSERT_MSG(layer.GetNumInputSlots() == 1, "Output Layer should have exactly one input.");
910 
911  LayerGuid id = layer.GetGuid();
912  WorkingMemDescriptor descriptor = handle.GetWorkingMemDescriptor(id);
913 
914  ITensorHandle* inputTensorHandle = descriptor.m_Inputs[0];
915  ARMNN_ASSERT_MSG(inputTensorHandle != nullptr, "Data should have been allocated.");
916 
917  // Try import the output tensor.
918  // Note: We can only import the output pointer if all of the following hold true:
919  // a) The imported pointer is aligned sufficiently
920  // b) The tensor has zero padding
921  // c) There is only one connection to the OutputSlot and it is to an OutputLayer.
922  // d) The output pointer is allocated via malloc. (Other types will be supported in a later release)
923  // e) m_IsExportEnabled must be set to true
924  if (m_NetworkProperties.m_ExportEnabled &&
925  (layer.GetInputSlots()[0].GetConnectedOutputSlot()->GetNumConnections() == 1))
926  {
927  if (layer.GetInputSlots()[0].GetConnectedOutputSlot()->GetOwningLayer().GetType() != LayerType::Input)
928  {
929  MemorySourceFlags importFlags = inputTensorHandle->GetImportFlags();
930  if (CheckFlag(importFlags, m_NetworkProperties.m_OutputSource))
931  {
932  std::unique_ptr<ITensorHandle> tensorHandle =
933  std::make_unique<PassthroughTensorHandle>(outputTensor.GetInfo(),
934  outputTensor.GetMemoryArea());
935 
936  void* mem = tensorHandle->Map(false);
937  bool importOk = inputTensorHandle->Import(mem, m_NetworkProperties.m_OutputSource);
938  tensorHandle->Unmap();
939 
940  if (importOk)
941  {
942  ARMNN_SCOPED_PROFILING_EVENT(Compute::Undefined, "SyncMemGeneric_Execute");
943  inputTensorHandle->Map(true);
944  inputTensorHandle->Unmap();
945  }
946  else
947  {
948  throw MemoryExportException("EnqueueOutput: Memory Export failed");
949  }
950  }
951  else
952  {
953  throw MemoryExportException("EnqueueOutput: Memory Export failed, backend does not support Export");
954  }
955  }
956  else
957  {
958  throw MemoryExportException("EnqueueOutput: Memory Export failed, attempting to export Input Layer");
959  }
960  }
961  else
962  {
963  auto copyFunc = [](void* dst, const void* src, size_t size)
964  {
965  memcpy(dst, src, size);
966  };
967 
968  std::unique_ptr<ITensorHandle> tensorHandle =
969  std::make_unique<PassthroughTensorHandle>(outputTensor.GetInfo(),
970  outputTensor.GetMemoryArea());
971 
972  CopyTensorContentsGeneric(inputTensorHandle, tensorHandle.get(), copyFunc);
973  }
974 }
975 
976 
977 const armnn::ConstTensor GetInputTensor(const LayerBindingId layerId, const InputTensors& inputTensors)
978 {
979  for (auto inputTensorPair : inputTensors)
980  {
981  LayerBindingId id = inputTensorPair.first;
982  if (id == layerId)
983  {
984  return inputTensorPair.second;
985  }
986  }
987  throw InvalidArgumentException("Input does not exist.");
988 }
989 
990 const armnn::Tensor GetOutputTensor(const LayerBindingId layerId, const OutputTensors& outputTensors)
991 {
992  for (auto outputTensorPair : outputTensors)
993  {
994  LayerBindingId id = outputTensorPair.first;
995  if (id == layerId)
996  {
997  return outputTensorPair.second;
998  }
999  }
1000  throw InvalidArgumentException("Output does not exist.");
1001 }
1002 
1004  const OutputTensors& outputTensors,
1005  IWorkingMemHandle& iWorkingMemHandle)
1006 {
1007  const Graph& graph = m_OptimizedNetwork->pOptimizedNetworkImpl->GetGraph();
1008 
1009  // Walk graph to determine the order of execution.
1010  if (graph.GetNumLayers() < 2)
1011  {
1012  ARMNN_LOG(warning) << "IRuntime::EnqueueWorkload()::Less than two nodes in graph";
1013  return Status::Failure;
1014  }
1015 
1016  if (graph.GetNumInputs() != inputTensors.size())
1017  {
1018  throw InvalidArgumentException("Number of inputs provided does not match network.");
1019  }
1020 
1021  std::unique_ptr<profiling::TimelineUtilityMethods> timelineUtils =
1023  profiling::ProfilingGuid inferenceGuid = m_ProfilingService.GetNextGuid();
1024  if (timelineUtils)
1025  {
1026  // Add inference timeline trace if profiling is enabled.
1027  profiling::ProfilingGuid networkGuid = m_OptimizedNetwork->GetGuid();
1028  timelineUtils->CreateTypedEntity(inferenceGuid, profiling::LabelsAndEventClasses::INFERENCE_GUID);
1029  timelineUtils->CreateRelationship(profiling::ProfilingRelationshipType::RetentionLink,
1030  networkGuid,
1031  inferenceGuid,
1032  profiling::LabelsAndEventClasses::EXECUTION_OF_GUID);
1033  timelineUtils->RecordEvent(inferenceGuid, profiling::LabelsAndEventClasses::ARMNN_PROFILING_SOL_EVENT_CLASS);
1034  }
1035 
1036  bool executionSucceeded = true;
1037 
1038  if (timelineUtils)
1039  {
1040  // Add end of life of the inference timeline if profiling is enabled.
1041  timelineUtils->RecordEvent(inferenceGuid, profiling::LabelsAndEventClasses::ARMNN_PROFILING_EOL_EVENT_CLASS);
1042  timelineUtils->Commit();
1043  }
1044  WorkingMemHandle& workingMemHandle = dynamic_cast<WorkingMemHandle&>(iWorkingMemHandle);
1045  std::lock_guard<std::mutex> lockGuard(workingMemHandle.GetMutex());
1046 
1047  if (!workingMemHandle.IsAllocated())
1048  {
1049  workingMemHandle.Allocate();
1050  }
1051 
1052  {
1054  for (const BindableLayer* inputLayer : graph.GetInputLayers())
1055  {
1056  EnqueueInput(*inputLayer, GetInputTensor(inputLayer->GetBindingId(), inputTensors), workingMemHandle);
1057  }
1058  }
1059 
1060  auto Fail = [&](const std::exception& error)
1061  {
1062  ARMNN_LOG(error) << "An error occurred attempting to execute a workload: " << error.what();
1063  executionSucceeded = false;
1064  };
1065  profiling::ProfilingDynamicGuid workloadInferenceID(0);
1066 
1067  try
1068  {
1069  for (unsigned int i = 0; i < m_WorkloadQueue.size(); ++i)
1070  {
1071  auto& workload = m_WorkloadQueue[i];
1072  if (timelineUtils)
1073  {
1074  workloadInferenceID = timelineUtils->RecordWorkloadInferenceAndStartOfLifeEvent(workload->GetGuid(),
1075  inferenceGuid);
1076  }
1077  workload->ExecuteAsync(workingMemHandle.GetWorkingMemDescriptorAt(i));
1078 
1079  if (timelineUtils)
1080  {
1081  timelineUtils->RecordEndOfLifeEvent(workloadInferenceID);
1082  }
1083  }
1084  }
1085  catch (const RuntimeException& error)
1086  {
1087  Fail(error);
1088  }
1089  catch (const std::runtime_error& error)
1090  {
1091  Fail(error);
1092  }
1093  // For each output to the network, call EnqueueOutput with the data passed by the user.
1094  {
1096  for (const BindableLayer *outputLayer : graph.GetOutputLayers())
1097  {
1098  EnqueueOutput(*outputLayer, GetOutputTensor(outputLayer->GetBindingId(), outputTensors), workingMemHandle);
1099  }
1100  }
1101 
1102  return executionSucceeded ? Status::Success : Status::Failure;
1103 }
1104 
1105 /// Create a new unique WorkingMemHandle object. Create multiple handles if you wish to have
1106 /// overlapped Execution by calling this function from different threads.
1107 std::unique_ptr<IWorkingMemHandle> LoadedNetwork::CreateWorkingMemHandle(NetworkId networkId)
1108 {
1109  Graph& order = m_OptimizedNetwork->pOptimizedNetworkImpl->GetGraph();
1110  std::unordered_map<LayerGuid, std::vector<std::unique_ptr<ITensorHandle> > > tensorHandleMap;
1111  std::vector<WorkingMemDescriptor> workingMemDescriptors;
1112  std::unordered_map<LayerGuid, WorkingMemDescriptor> workingMemDescriptorMap;
1113  TensorHandleFactoryRegistry tensorHandleFactoryRegistry;
1114  WorkloadFactoryMap workloadFactoryMap;
1115 
1116  std::vector<std::shared_ptr<IMemoryManager>> memoryManagers;
1117 
1118  for (auto const& backend : m_Backends)
1119  {
1120  if (backend.second->SupportsTensorAllocatorAPI())
1121  {
1122  backend.second->RegisterTensorHandleFactories(
1123  tensorHandleFactoryRegistry,
1124  static_cast<MemorySourceFlags>(m_NetworkProperties.m_InputSource),
1125  static_cast<MemorySourceFlags>(m_NetworkProperties.m_OutputSource));
1126  memoryManagers.emplace_back(tensorHandleFactoryRegistry.GetMemoryManagers().back());
1127  }
1128  else
1129  {
1130  std::shared_ptr<IMemoryManager> memoryManager = backend.second->CreateMemoryManager();
1131  auto workloadFactory = backend.second->CreateWorkloadFactory(
1132  memoryManager, m_OptimizedNetwork->pOptimizedNetworkImpl->GetModelOptions());
1133 
1134  workloadFactoryMap.emplace(
1135  std::make_pair(backend.first, std::make_pair(std::move(workloadFactory), memoryManager)));
1136  memoryManagers.emplace_back(memoryManager);
1137  }
1138  }
1139 
1140  auto GetTensorHandle = [&](Layer* layer, const OutputSlot& outputSlot, bool isMemoryManaged)
1141  {
1142  ITensorHandleFactory::FactoryId factoryId = outputSlot.GetTensorHandleFactoryId();
1143  const TensorInfo& tensorInfo = outputSlot.GetTensorInfo();
1144 
1145  if (factoryId == ITensorHandleFactory::LegacyFactoryId)
1146  {
1147  BackendId id = layer->GetBackendId();
1149  return workloadFactoryMap.at(id).first->CreateTensorHandle(tensorInfo, isMemoryManaged);
1151  }
1152  else
1153  {
1154  ITensorHandleFactory* handleFactory = tensorHandleFactoryRegistry.GetFactory(factoryId);
1155  ARMNN_ASSERT(handleFactory);
1156  return handleFactory->CreateTensorHandle(tensorInfo, isMemoryManaged);
1157  }
1158  };
1159 
1160  std::unordered_map<const ITensorHandle*, unsigned int> handleReferenceCounts;
1161  for (auto&& layer : order)
1162  {
1163  WorkingMemDescriptor workingMemDescriptor;
1164 
1165  // Constant layers execution and management is handled during loaded network construction
1166  if (layer->GetType() == LayerType::Constant)
1167  {
1168  continue;
1169  }
1170  bool isMemoryManaged = true;
1171  bool isInputLayer = true;
1172  // Look for the layer with 1 OutputSlot which has 1 connection and that connection is an Output Layer
1173  // If Export is enabled disable memory management so we can export, otherwise we do a copy
1174  if ((layer->GetNumOutputSlots() == 1) &&
1175  (layer->GetOutputSlots()[0].GetNumConnections() == 1) &&
1176  (layer->GetOutputSlots()[0].GetConnection(0)->GetOwningLayer().GetType() == LayerType::Output))
1177  {
1178  isMemoryManaged = !m_NetworkProperties.m_ExportEnabled;
1179  }
1180  else if (layer->GetType() == LayerType::Input || layer->GetType() == LayerType::MemImport)
1181  {
1182  // Input layers/workloads will not be executed so the descriptor is not added to workingMemDescriptors
1183  // However we will still need to manage the tensorHandle
1184  isInputLayer = false;
1185  isMemoryManaged = !m_NetworkProperties.m_ExportEnabled;
1186  }
1187 
1188  // Create a tensor handle for each output slot of a layer
1189  // Once we create it, we start managing its lifetime
1190  for (auto& slot : layer->GetOutputSlots())
1191  {
1192  tensorHandleMap[layer->GetGuid()].emplace_back(GetTensorHandle(layer, slot, isMemoryManaged));
1193  ITensorHandle* tensorHandle = tensorHandleMap[layer->GetGuid()].back().get();
1194 
1195  workingMemDescriptor.m_Outputs.push_back(tensorHandle);
1196  tensorHandle->Manage();
1197  unsigned int numConnections = slot.GetNumConnections();
1198  ARMNN_ASSERT(numConnections != 0);
1199 
1200  handleReferenceCounts[tensorHandle] = numConnections;
1201  }
1202  // Loop through the input slots in the same layer and decrement the reference counter associated
1203  // to each tensor handle we encounter.
1204  // Once it reaches zero, the lifetime of the tensor handle has ended, and we mark it's memory as available
1205  // so that the next tensor handle with a non overlapping lifetime can share it's memory.
1206  for (auto& slot : layer->GetInputSlots())
1207  {
1208  ARMNN_ASSERT(slot.GetConnection());
1209  auto outputSlot = slot.GetConnectedOutputSlot();
1210  auto key = outputSlot->GetOwningLayer().GetGuid();
1211 
1212  // Constant layers execution and management is handled during loaded network construction
1213  auto found = m_ConstantTensorHandles.find(key);
1214  if (found != m_ConstantTensorHandles.end())
1215  {
1216  workingMemDescriptor.m_Inputs.push_back(found->second);
1217  continue;
1218  }
1219 
1220  auto search = tensorHandleMap.find(key);
1221  unsigned int index = outputSlot->CalculateIndexOnOwner();
1222  ITensorHandle* inputTensorHandle = search->second[index].get();
1223  workingMemDescriptor.m_Inputs.push_back(inputTensorHandle);
1224  --handleReferenceCounts.at(inputTensorHandle);
1225  if (handleReferenceCounts.at(inputTensorHandle) == 0u)
1226  {
1227  // Stop managing lifetime of tensor handle
1228  inputTensorHandle->Allocate();
1229  handleReferenceCounts.erase(inputTensorHandle);
1230  }
1231  }
1232  workingMemDescriptorMap.insert({layer->GetGuid(), workingMemDescriptor});
1233 
1234  // Input layers/workloads will not be executed, so the descriptor is not added to workingMemDescriptors
1235  // However we will still need to manage the tensorHandle
1236  if (isInputLayer)
1237  {
1238  workingMemDescriptors.push_back(workingMemDescriptor);
1239  }
1240  }
1241 
1242  return std::make_unique<WorkingMemHandle>(networkId,
1243  workingMemDescriptors,
1244  workingMemDescriptorMap,
1245  memoryManagers,
1246  std::move(tensorHandleMap));
1247 }
1248 
1250 {
1251  for (auto&& workloadPtr: m_WorkloadQueue)
1252  {
1253  workloadPtr.get()->RegisterDebugCallback(func);
1254  }
1255 }
1256 
1257 }
std::unique_ptr< IWorkingMemHandle > CreateWorkingMemHandle(NetworkId networkId)
Create a new unique WorkingMemHandle object.
virtual bool Import(void *memory, MemorySource source)
Import externally allocated memory.
FactoryFunction GetFactory(const BackendId &id) const
unsigned int GetNumInputSlots() const override
Returns the number of connectable input slots.
Definition: Layer.hpp:313
static std::unique_ptr< TimelineUtilityMethods > GetTimelineUtils(ProfilingService &profilingService)
static ProfilerManager & GetInstance()
Definition: Profiling.cpp:526
#define ARMNN_NO_DEPRECATE_WARN_BEGIN
Definition: Deprecated.hpp:33
virtual unsigned int GetImportFlags() const
Get flags describing supported import sources.
const armnn::Tensor GetOutputTensor(const LayerBindingId layerId, const OutputTensors &outputTensors)
WorkingMemDescriptor & GetWorkingMemDescriptorAt(unsigned int id) override
Get the WorkingMemDescriptor at an index.
virtual void Allocate()=0
Indicate to the memory manager that this resource is no longer active.
TensorInfo GetInputTensorInfo(LayerBindingId layerId) const
#define ARMNN_LOG(severity)
Definition: Logging.hpp:202
virtual void Manage()=0
Indicate to the memory manager that this resource is active.
BackendRegistry & BackendRegistryInstance()
std::vector< std::pair< LayerBindingId, class ConstTensor > > InputTensors
Definition: Tensor.hpp:360
unsigned int MemorySourceFlags
MemoryType GetMemoryArea() const
Definition: Tensor.hpp:305
size_t GetNumOutputs() const
Definition: Graph.hpp:181
TensorInfo GetOutputTensorInfo(LayerBindingId layerId) const
Copyright (c) 2021 ARM Limited and Contributors.
void IgnoreUnused(Ts &&...)
const std::vector< InputSlot > & GetInputSlots() const
Definition: Layer.hpp:237
std::function< void(LayerGuid guid, unsigned int slotIndex, ITensorHandle *tensorHandle)> DebugCallbackFunction
Define the type of callback for the Debug layer to call.
Definition: Types.hpp:321
unsigned int GetNumOutputSlots() const override
Returns the number of connectable output slots.
Definition: Layer.hpp:314
#define ARMNN_SCOPED_PROFILING_EVENT(backendId, name)
Definition: Profiling.hpp:219
int LayerBindingId
Type of identifiers for bindable layers (inputs, outputs).
Definition: Types.hpp:244
A tensor defined by a TensorInfo (shape and data type) and a mutable backing store.
Definition: Tensor.hpp:319
Status Execute(const InputTensors &inputTensors, const OutputTensors &outputTensors, IWorkingMemHandle &workingMemHandle)
Thread safe execution of the loaded network.
virtual IWorkloadFactoryPtr CreateWorkloadFactory(const IMemoryManagerSharedPtr &memoryManager=nullptr) const =0
std::vector< TensorInfo > m_InputTensorInfos
WorkingMemDescriptor & GetWorkingMemDescriptor(LayerGuid id) override
Get the WorkingMemDescriptor for a Layer. The mutex must be locked.
#define ARMNN_NO_DEPRECATE_WARN_END
Definition: Deprecated.hpp:34
#define ARMNN_ASSERT_MSG(COND, MSG)
Definition: Assert.hpp:15
bool SupportsTensorAllocatorAPI() const
std::shared_ptr< IMemoryManager > IMemoryManagerSharedPtr
#define ARMNN_SCOPED_HEAP_PROFILING(TAG)
int NetworkId
Definition: IRuntime.hpp:24
A tensor defined by a TensorInfo (shape and data type) and an immutable backing store.
Definition: Tensor.hpp:327
std::vector< std::pair< LayerBindingId, class Tensor > > OutputTensors
Definition: Tensor.hpp:361
const std::string & GetNameStr() const
Definition: Layer.hpp:220
LayerType GetType() const override
Returns the armnn::LayerType of this layer.
Definition: Layer.hpp:265
Status
enumeration
Definition: Types.hpp:29
#define ARMNN_ASSERT(COND)
Definition: Assert.hpp:14
std::vector< TensorInfo > m_OutputTensorInfos
static bool IsLayerSupported(const BackendId &backendId, const IConnectableLayer &layer, Optional< DataType > dataType, std::string &outReasonIfUnsupported)
std::vector< std::unique_ptr< IWorkload > > WorkloadQueue
const TensorInfo & GetInfo() const
Definition: Tensor.hpp:295
#define CHECK_LOCATION()
Definition: Exceptions.hpp:197
const BackendId & GetBackendId() const
Definition: Layer.hpp:269
void Allocate() override
Allocate the backing memory required for execution.
const std::vector< OutputSlot > & GetOutputSlots() const
Definition: Layer.hpp:238
const bool m_OutputNetworkDetails
Definition: IRuntime.hpp:88
std::mutex & GetMutex() override
Get a mutex which can be used for synchronizing access to the WorkingMemHandle object.
OutputLayersAccessor GetOutputLayers() const
Returns a wrapper object with begin(), end() methods to iterate over the output layers in a range-bas...
Definition: Graph.hpp:189
Status EnqueueWorkload(const InputTensors &inputTensors, const OutputTensors &outputTensors)
Single thread execution of the loaded network.
void RegisterProfiler(IProfiler *profiler)
Definition: Profiling.cpp:533
virtual const void * Map(bool blocking=true) const =0
Map the tensor data for access.
profiling::ProfilingGuid GetNetworkGuid()
virtual void Unmap() const =0
Unmap the tensor data.
bool IsAllocated() override
IsAllocated returns true if the backing memory is currently allocated. The mutex must be locked...
std::vector< ITensorHandle * > m_Outputs
Base class for all ArmNN exceptions so that users can filter to just those.
Definition: Exceptions.hpp:46
const OutputHandler & GetOutputHandler(unsigned int i=0) const
Definition: Layer.hpp:225
const std::string & Get() const
Definition: BackendId.hpp:136
void RegisterDebugCallback(const DebugCallbackFunction &func)
profiling::ProfilingGuid LayerGuid
Define LayerGuid type.
Definition: Types.hpp:313
ITensorHandleFactory * GetFactory(ITensorHandleFactory::FactoryId id) const
Find a TensorHandleFactory by Id Returns nullptr if not found.
Contains information about TensorInfos of a layer.
std::vector< std::shared_ptr< IMemoryManager > > & GetMemoryManagers()
bool CheckFlag(MemorySourceFlags flags, MemorySource source)
void CopyTensorContentsGeneric(const ITensorHandle *srcTensor, ITensorHandle *dstTensor, CopyFunc copy)
Graph & TopologicalSort()
Sorts layers in topological order and return this.
Definition: Graph.hpp:177
InputLayersAccessor GetInputLayers() const
Returns a wrapper object with begin(), end() methods to iterate over the input layers in a range-base...
Definition: Graph.hpp:185
std::vector< ITensorHandle * > m_Inputs
size_t GetNumLayers() const
Definition: Graph.hpp:191
virtual ARMNN_NO_DEPRECATE_WARN_END IMemoryManagerUniquePtr CreateMemoryManager() const
const armnn::ConstTensor GetInputTensor(const LayerBindingId layerId, const InputTensors &inputTensors)
const TensorInfo & GetTensorInfo(const ITensorHandle *tensorHandle)
float32 helpers
static std::unique_ptr< LoadedNetwork > MakeLoadedNetwork(std::unique_ptr< IOptimizedNetwork > net, std::string &errorMessage, const INetworkProperties &networkProperties, profiling::ProfilingService &profilingService)
size_t GetNumInputs() const
Definition: Graph.hpp:180
virtual std::unique_ptr< ITensorHandle > CreateTensorHandle(const TensorInfo &tensorInfo) const =0
static const FactoryId LegacyFactoryId
const bool m_ProfilingEnabled
Definition: IRuntime.hpp:86
LayerGuid GetGuid() const final
Returns the unique id of the layer.
Definition: Layer.hpp:322