ArmNN
 21.02
LoadedNetwork.cpp
Go to the documentation of this file.
1 //
2 // Copyright © 2017 Arm Ltd and Contributors. All rights reserved.
3 // SPDX-License-Identifier: MIT
4 //
5 
6 #include "LoadedNetwork.hpp"
7 #include "Layer.hpp"
8 #include "Graph.hpp"
9 #include "Network.hpp"
10 #include <Processes.hpp>
11 #include "Profiling.hpp"
12 #include "HeapProfiling.hpp"
13 
15 #include <armnn/Logging.hpp>
16 #include <armnn/utility/Assert.hpp>
17 
22 
24 
25 #include <fmt/format.h>
26 
27 namespace armnn
28 {
29 
30 using namespace std;
31 using namespace armnn::profiling;
32 
33 namespace
34 {
35 
36 template <typename ExceptionType>
37 std::string ToErrorMessage(const char * prefix, const ExceptionType & error)
38 {
39  std::stringstream ss;
40  ss << prefix << " " << error.what();
41  return ss.str();
42 }
43 
44 void AddLayerStructure(std::unique_ptr<TimelineUtilityMethods>& timelineUtils,
45  const Layer& layer,
46  ProfilingGuid networkGuid)
47 {
48  // Add layer to the post-optimisation network structure
49  std::string layerName = layer.GetNameStr().empty() ? "<Unnamed>" : layer.GetNameStr();
50  timelineUtils->CreateNamedTypedChildEntity(layer.GetGuid(),
51  networkGuid,
52  layerName,
54  for (auto&& input : layer.GetInputSlots())
55  {
56  const IOutputSlot* source = input.GetConnectedOutputSlot();
57  ARMNN_ASSERT(source != NULL);
58  timelineUtils->CreateConnectionRelationship(ProfilingRelationshipType::RetentionLink,
59  source->GetOwningLayerGuid(),
60  layer.GetGuid());
61  }
62 }
63 
64 void AddWorkloadStructure(std::unique_ptr<TimelineUtilityMethods>& timelineUtils,
65  std::unique_ptr<IWorkload>& workload,
66  const Layer& layer)
67 {
68  // Add workload to the post-optimisation network structure
69  timelineUtils->CreateTypedEntity(workload->GetGuid(), LabelsAndEventClasses::WORKLOAD_GUID);
70  timelineUtils->MarkEntityWithLabel(workload->GetGuid(),
71  layer.GetBackendId().Get(),
73 
74  // Link the workload to the layer
75  timelineUtils->CreateRelationship(ProfilingRelationshipType::RetentionLink,
76  layer.GetGuid(),
77  workload->GetGuid(),
79 }
80 
81 } // anonymous
82 
83 std::unique_ptr<LoadedNetwork> LoadedNetwork::MakeLoadedNetwork(std::unique_ptr<IOptimizedNetwork> net,
84  std::string& errorMessage,
85  const INetworkProperties& networkProperties,
86  profiling::ProfilingService& profilingService)
87 {
88  std::unique_ptr<LoadedNetwork> loadedNetwork;
89 
90  auto Fail = [&](const std::exception& error) -> std::unique_ptr<LoadedNetwork>
91  {
92  errorMessage = ToErrorMessage("An error occurred when preparing the network workloads: ", error);
93  ARMNN_LOG(error) << errorMessage;
94 
95  return std::unique_ptr<LoadedNetwork>();
96  };
97 
98  try
99  {
100  loadedNetwork.reset(new LoadedNetwork(std::move(net), networkProperties, profilingService));
101  }
102  catch (const armnn::RuntimeException& error)
103  {
104  return Fail(error);
105  }
106  catch (const armnn::Exception& error)
107  {
108  return Fail(error);
109  }
110  catch (const std::runtime_error& error)
111  {
112  return Fail(error);
113  }
114 
115  return loadedNetwork;
116 }
117 
118 LoadedNetwork::LoadedNetwork(std::unique_ptr<IOptimizedNetwork> net,
119  const INetworkProperties& networkProperties,
120  profiling::ProfilingService& profilingService) :
121  m_OptimizedNetwork(std::move(net)),
122  m_IsImportEnabled(networkProperties.m_ImportEnabled),
123  m_IsExportEnabled(networkProperties.m_ExportEnabled),
124  m_TensorHandleFactoryRegistry(),
125  m_ProfilingService(profilingService)
126 {
127  // Create a profiler and register it for the current thread.
128  m_Profiler = std::make_shared<IProfiler>();
130 
131  Graph& order = m_OptimizedNetwork->pOptimizedNetworkImpl->GetGraph().TopologicalSort();
132  //First create tensor handlers, backends and workload factories.
133  //Handlers are created before workloads are.
134  //Because workload creation can modify some of the handlers,
135  //(for example the splitter and concat layers).
136  for (auto&& layer : order)
137  {
138  auto const& backendId = layer->GetBackendId();
139  if (m_Backends.count(backendId) == 0)
140  {
141  auto createBackend = BackendRegistryInstance().GetFactory(backendId);
142  auto it = m_Backends.emplace(std::make_pair(backendId, createBackend()));
143 
144  IBackendInternal* backend = it.first->second.get();
145 
146  if (backend->SupportsTensorAllocatorAPI())
147  {
148  auto workloadFactory = backend->CreateWorkloadFactory(
149  m_TensorHandleFactoryRegistry, m_OptimizedNetwork->pOptimizedNetworkImpl->GetModelOptions());
150  m_WorkloadFactories.emplace(
151  std::make_pair(backendId, std::make_pair(std::move(workloadFactory), nullptr)));
152  }
153  else
154  {
156  auto workloadFactory = backend->CreateWorkloadFactory(
157  memoryManager, m_OptimizedNetwork->pOptimizedNetworkImpl->GetModelOptions());
158 
159  m_WorkloadFactories.emplace(
160  std::make_pair(backendId, std::make_pair(std::move(workloadFactory), memoryManager)));
161  }
162  }
163  }
164 
165  for (auto&& layer : order)
166  {
167  auto& workloadFactory = GetWorkloadFactory(*layer);
168 
169  switch (layer->GetType())
170  {
171  case LayerType::Input:
173  {
174  // If IsImportEnabled is true then we need to set IsMemoryManaged to false when creating TensorHandles
175  layer->CreateTensorHandles(m_TensorHandleFactoryRegistry, workloadFactory, !m_IsImportEnabled);
176  break;
177  }
178  default:
179  {
180  // Look for the layer with 1 OutputSlot which has 1 connection and that connection is an Output Layer
181  // If Export is enabled disable memory management so we can export, otherwise we do a copy
182  if((layer->GetNumOutputSlots() == 1) &&
183  (layer->GetOutputSlots()[0].GetNumConnections() == 1) &&
184  (layer->GetOutputSlots()[0].GetConnection(0)->GetOwningLayer().GetType() == LayerType::Output))
185  {
186  layer->CreateTensorHandles(m_TensorHandleFactoryRegistry, workloadFactory, !m_IsExportEnabled);
187  }
188  else
189  {
190  layer->CreateTensorHandles(m_TensorHandleFactoryRegistry, workloadFactory);
191  }
192  }
193  }
194  }
195 
196  ProfilingGuid networkGuid = m_OptimizedNetwork->GetGuid();
197  std::unique_ptr<TimelineUtilityMethods> timelineUtils =
198  TimelineUtilityMethods::GetTimelineUtils(m_ProfilingService);
199  if (timelineUtils)
200  {
201  timelineUtils->CreateTypedEntity(networkGuid, LabelsAndEventClasses::NETWORK_GUID);
202  // Mark the network with a start of life event
203  timelineUtils->RecordEvent(networkGuid, LabelsAndEventClasses::ARMNN_PROFILING_SOL_EVENT_CLASS);
204  // and with the process ID
205  int processID = armnnUtils::Processes::GetCurrentId();
206  std::stringstream ss;
207  ss << processID;
208  timelineUtils->MarkEntityWithLabel(networkGuid, ss.str(), LabelsAndEventClasses::PROCESS_ID_GUID);
209  }
210 
211  //Then create workloads.
212  for (auto&& layer : order)
213  {
214  if (timelineUtils)
215  {
216  // Add layer to the post-optimisation network structure
217  AddLayerStructure(timelineUtils, *layer, networkGuid);
218  }
219 
220  const IWorkloadFactory& workloadFactory = GetWorkloadFactory(*layer);
221 
222  switch (layer->GetType())
223  {
224  case LayerType::Input:
225  case LayerType::Output:
226  {
227  // Inputs and outputs are treated in a special way - see EnqueueInput() and EnqueueOutput().
228  break;
229  }
230  default:
231  {
232  auto workload = layer->CreateWorkload(workloadFactory);
233 
234  if (!workload)
235  {
236  const char* const layerName =
237  layer->GetNameStr().length() != 0 ? layer->GetName() : "<Unnamed>";
239  fmt::format("No workload created for layer (name: '{0}' type: '{1}') (compute '{2}')",
240  layerName, static_cast<int>(layer->GetType()), layer->GetBackendId().Get()
241  ));
242  }
243 
244  if (timelineUtils)
245  {
246  // Add workload to the post-optimisation network structure
247  AddWorkloadStructure(timelineUtils, workload, *layer);
248  }
249 
250  m_WorkloadQueue.push_back(move(workload));
251  // release the constant data in the layer..
252  layer->ReleaseConstantData();
253  break;
254  }
255  }
256  }
257 
258  for (auto&& workloadFactory : m_WorkloadFactories)
259  {
260  workloadFactory.second.first->AfterWorkloadsCreated();
261  }
262 
263  if (timelineUtils)
264  {
265  // Commit to send the post-optimisation network structure
266  timelineUtils->Commit();
267  }
268 
269  // Set up memory.
270  m_OptimizedNetwork->pOptimizedNetworkImpl->GetGraph().AllocateDynamicBuffers();
271 
272  // Now that the intermediate tensor memory has been set-up, do any post allocation configuration for each workload.
273  for (auto& workload : m_WorkloadQueue)
274  {
275  workload->PostAllocationConfigure();
276  }
277 }
278 
280 {
281  Graph& order = m_OptimizedNetwork->pOptimizedNetworkImpl->GetGraph().TopologicalSort();
282  ProfilingGuid networkGuid = m_OptimizedNetwork->GetGuid();
283 
284  std::unique_ptr<TimelineUtilityMethods> timelineUtils =
285  TimelineUtilityMethods::GetTimelineUtils(m_ProfilingService);
286 
287  timelineUtils->CreateTypedEntity(networkGuid, LabelsAndEventClasses::NETWORK_GUID);
288 
289  for (auto&& layer : order)
290  {
291  // Add layer to the post-optimisation network structure
292  AddLayerStructure(timelineUtils, *layer, networkGuid);
293  switch (layer->GetType())
294  {
295  case LayerType::Input:
296  case LayerType::Output:
297  {
298  // Inputs and outputs are treated in a special way - see EnqueueInput() and EnqueueOutput().
299  break;
300  }
301  default:
302  {
303  for (auto& workload : m_WorkloadQueue)
304  {
305  // Add workload to the post-optimisation network structure
306  AddWorkloadStructure(timelineUtils, workload, *layer);
307  }
308  break;
309  }
310  }
311  }
312  // Commit to send the post-optimisation network structure
313  timelineUtils->Commit();
314 }
315 
317 {
318  return m_OptimizedNetwork->GetGuid();
319 }
320 
322 {
323  for (auto&& inputLayer : m_OptimizedNetwork->pOptimizedNetworkImpl->GetGraph().GetInputLayers())
324  {
325  ARMNN_ASSERT_MSG(inputLayer->GetNumOutputSlots() == 1, "Input layer should have exactly 1 output slot");
326  if (inputLayer->GetBindingId() == layerId)
327  {
328  return inputLayer->GetOutputSlot(0).GetTensorInfo();
329  }
330  }
331 
332  throw InvalidArgumentException(fmt::format("No input layer is associated with id {}", layerId));
333 }
334 
336 {
337  for (auto&& outputLayer : m_OptimizedNetwork->pOptimizedNetworkImpl->GetGraph().GetOutputLayers())
338  {
339  ARMNN_ASSERT_MSG(outputLayer->GetNumInputSlots() == 1, "Output layer should have exactly 1 input slot");
340  ARMNN_ASSERT_MSG(outputLayer->GetInputSlot(0).GetConnection(), "Input slot on Output layer must be connected");
341  if (outputLayer->GetBindingId() == layerId)
342  {
343  return outputLayer->GetInputSlot(0).GetConnection()->GetTensorInfo();
344  }
345  }
346 
347  throw InvalidArgumentException(fmt::format("No output layer is associated with id {}", layerId));
348 }
349 
350 const IWorkloadFactory& LoadedNetwork::GetWorkloadFactory(const Layer& layer) const
351 {
352  const IWorkloadFactory* workloadFactory = nullptr;
353 
354  auto it = m_WorkloadFactories.find(layer.GetBackendId());
355  if (it == m_WorkloadFactories.end())
356  {
357  throw RuntimeException(fmt::format("No workload factory for {0} to be used for layer: {1}",
358  layer.GetBackendId().Get(),
359  layer.GetNameStr()),
360  CHECK_LOCATION());
361  }
362 
363  workloadFactory = it->second.first.get();
364 
365  ARMNN_ASSERT_MSG(workloadFactory, "No workload factory");
366 
367  std::string reasonIfUnsupported;
369  {},
370  reasonIfUnsupported,
371  m_OptimizedNetwork->pOptimizedNetworkImpl->GetModelOptions()),
372  "Factory does not support layer");
373  IgnoreUnused(reasonIfUnsupported);
374  return *workloadFactory;
375 }
376 
377 namespace {
378 
379 // Non-copyable class owning accelerator-specific tensor data.
380 class TensorPin
381 {
382 public:
383  TensorPin(std::unique_ptr<ITensorHandle> handle, const TensorInfo& info, LayerBindingId id)
384  : m_TensorHandle(std::move(handle))
385  , m_TensorInfo(info)
386  , m_Id(id)
387  {
388  }
389 
390  ITensorHandle* GetTensorHandle() const { return m_TensorHandle.get(); }
391  const TensorInfo& GetTensorInfo() const { return m_TensorInfo; }
392  LayerBindingId GetBindingId() const { return m_Id; }
393 
394 private:
395  std::unique_ptr<ITensorHandle> m_TensorHandle;
396  TensorInfo m_TensorInfo;
397  LayerBindingId m_Id;
398 };
399 
400 static const TensorPin& GetTensorPin(LayerBindingId id,
401  const std::vector<TensorPin>& pins,
402  char const* bindingPointDesc)
403 {
404  auto it = std::find_if(pins.begin(), pins.end(),
405  [id](const TensorPin& pin)
406  {
407  return pin.GetBindingId() == id;
408  });
409 
410  if (it != pins.end())
411  {
412  return *it;
413  }
414  else
415  {
416  throw InvalidArgumentException(fmt::format("No tensor supplied for {0} {1}", bindingPointDesc, id));
417  }
418 }
419 
420 // Stores data that needs to be kept accessible for the entire execution of a workload.
421 class WorkloadData
422 {
423 public:
424  WorkloadData(const InputTensors& inputTensors, const OutputTensors& outputTensors)
425  {
426  m_InputTensorPins.reserve(inputTensors.size());
427  m_OutputTensorPins.reserve(outputTensors.size());
428 
429  for (auto inputTensorPair : inputTensors)
430  {
431  auto inputTensor = inputTensorPair.second;
432 
433  std::unique_ptr<ITensorHandle> tensorHandle =
434  std::make_unique<ConstPassthroughCpuTensorHandle>(inputTensor.GetInfo(),inputTensor.GetMemoryArea());
435  LayerBindingId layerId = inputTensorPair.first;
436 
437  m_InputTensorPins.emplace_back(std::move(tensorHandle), inputTensor.GetInfo(), layerId);
438  }
439 
440  for (auto outputTensorPair : outputTensors)
441  {
442  auto outputTensor = outputTensorPair.second;
443 
444  std::unique_ptr<ITensorHandle> tensorHandle =
445  std::make_unique<PassthroughCpuTensorHandle>(outputTensor.GetInfo(), outputTensor.GetMemoryArea());
446  LayerBindingId layerId = outputTensorPair.first;
447 
448  m_OutputTensorPins.emplace_back(std::move(tensorHandle), outputTensor.GetInfo(), layerId);
449  }
450  }
451 
452  const TensorPin& GetInputTensorPin(LayerBindingId id) const
453  {
454  return GetTensorPin(id, m_InputTensorPins, "input");
455  }
456 
457  const TensorPin& GetOutputTensorPin(LayerBindingId id) const
458  {
459  return GetTensorPin(id, m_OutputTensorPins, "output");
460  }
461 
462 private:
463 
464  std::vector<TensorPin> m_InputTensorPins;
465  std::vector<TensorPin> m_OutputTensorPins;
466 };
467 
468 }
469 
471  const OutputTensors& outputTensors)
472 {
473  const Graph& graph = m_OptimizedNetwork->pOptimizedNetworkImpl->GetGraph();
474 
475  // Walk graph to determine the order of execution.
476  if (graph.GetNumLayers() < 2)
477  {
478  ARMNN_LOG(warning) << "IRuntime::EnqueueWorkload()::Less than two nodes in graph";
479  return Status::Failure;
480  }
481 
482  // Data that must be kept alive for the entire execution of the workload.
483  WorkloadData workloadData(inputTensors, outputTensors);
484 
485  if (graph.GetNumInputs() != inputTensors.size())
486  {
487  throw InvalidArgumentException("Number of inputs provided does not match network.");
488  }
489 
490  // For each input to the network, call EnqueueInput with the data passed by the user.
491  {
493  m_InputQueue.clear();
494  m_InputQueue.reserve(graph.GetNumInputs());
495  for (const BindableLayer* inputLayer : graph.GetInputLayers())
496  {
497  const TensorPin& pin = workloadData.GetInputTensorPin(inputLayer->GetBindingId());
498  EnqueueInput(*inputLayer, pin.GetTensorHandle(), pin.GetTensorInfo());
499  }
500  }
501 
502  // For each output to the network, call EnqueueOutput with the data passed by the user.
503  {
505  m_OutputQueue.clear();
506  m_OutputQueue.reserve(graph.GetNumOutputs());
507  for (const BindableLayer* outputLayer : graph.GetOutputLayers())
508  {
509  const TensorPin& pin = workloadData.GetOutputTensorPin(outputLayer->GetBindingId());
510  EnqueueOutput(*outputLayer, pin.GetTensorHandle(), pin.GetTensorInfo());
511  }
512  }
513 
514  std::unique_ptr<TimelineUtilityMethods> timelineUtils =
515  TimelineUtilityMethods::GetTimelineUtils(m_ProfilingService);
516  ProfilingGuid inferenceGuid = m_ProfilingService.GetNextGuid();
517  if (timelineUtils)
518  {
519  // Add inference timeline trace if profiling is enabled.
520  ProfilingGuid networkGuid = m_OptimizedNetwork->GetGuid();
521  timelineUtils->CreateTypedEntity(inferenceGuid, LabelsAndEventClasses::INFERENCE_GUID);
522  timelineUtils->CreateRelationship(ProfilingRelationshipType::RetentionLink,
523  networkGuid,
524  inferenceGuid,
526  timelineUtils->RecordEvent(inferenceGuid, LabelsAndEventClasses::ARMNN_PROFILING_SOL_EVENT_CLASS);
527  }
528 
529  bool executionSucceeded = true;
530 
531  {
532  if (m_ProfilingService.IsProfilingEnabled())
533  {
534  m_ProfilingService.IncrementCounterValue(armnn::profiling::INFERENCES_RUN);
535  }
537  ARMNN_SCOPED_HEAP_PROFILING("Executing");
538  executionSucceeded = Execute(timelineUtils, inferenceGuid);
539  }
540 
541  if (timelineUtils)
542  {
543  // Add end of life of the inference timeline if profiling is enabled.
544  timelineUtils->RecordEvent(inferenceGuid, LabelsAndEventClasses::ARMNN_PROFILING_EOL_EVENT_CLASS);
545  timelineUtils->Commit();
546  }
547  return executionSucceeded ? Status::Success : Status::Failure;
548 }
549 
550 void LoadedNetwork::EnqueueInput(const BindableLayer& layer, ITensorHandle* tensorHandle, const TensorInfo& tensorInfo)
551 {
552  if (layer.GetType() != LayerType::Input)
553  {
554  throw InvalidArgumentException("EnqueueInput: given layer not an InputLayer");
555  }
556 
557  if (tensorHandle == nullptr)
558  {
559  throw InvalidArgumentException("EnqueueInput: tensorHandle must not be NULL");
560  }
561 
562  InputQueueDescriptor inputQueueDescriptor;
564 
565  inputQueueDescriptor.m_Inputs.push_back(tensorHandle);
566  info.m_InputTensorInfos.push_back(tensorInfo);
567 
568  ARMNN_ASSERT_MSG(layer.GetNumOutputSlots() == 1, "Can only handle Input Layer with one output");
569  const OutputHandler& handler = layer.GetOutputHandler();
570  const TensorInfo& outputTensorInfo = handler.GetTensorInfo();
571  ITensorHandle* outputTensorHandle = handler.GetData();
572  ARMNN_ASSERT_MSG(outputTensorHandle != nullptr,
573  "Data should have been allocated.");
574  inputQueueDescriptor.m_Outputs.push_back(outputTensorHandle);
575  info.m_OutputTensorInfos.push_back(outputTensorInfo);
576 
577  MemorySourceFlags importFlags = outputTensorHandle->GetImportFlags();
578  bool needMemCopy = true;
579  if (m_IsImportEnabled) // Try import the input tensor
580  {
581  if(CheckFlag(importFlags, MemorySource::Malloc) )
582  {
583  needMemCopy = false;
584  // This assumes a CPU Tensor handle
585  void* mem = tensorHandle->Map(false);
586  if (outputTensorHandle->Import(mem, MemorySource::Malloc))
587  {
588  tensorHandle->Unmap();
589  return; // No need for a workload since the import has been done.
590  }
591  tensorHandle->Unmap();
592  throw MemoryImportException("EnqueueInput: Memory Import failed");
593  }
594  }
595  if (needMemCopy)
596  {
597  // Create a mem copy workload for input since we did not import
598  std::unique_ptr<IWorkload> inputWorkload = std::make_unique<CopyMemGenericWorkload>(inputQueueDescriptor, info);
599 
600  ARMNN_ASSERT_MSG(inputWorkload, "No input workload created");
601 
602  std::unique_ptr<TimelineUtilityMethods> timelineUtils =
603  TimelineUtilityMethods::GetTimelineUtils(m_ProfilingService);
604  if (timelineUtils)
605  {
606  // Add Input Workload to the post-optimisation network structure
607  AddWorkloadStructure(timelineUtils, inputWorkload, layer);
608  timelineUtils->Commit();
609  }
610 
611  m_InputQueue.push_back(move(inputWorkload));
612  }
613 }
614 
615 void LoadedNetwork::EnqueueOutput(const BindableLayer& layer, ITensorHandle* tensorHandle, const TensorInfo& tensorInfo)
616 {
617  if (layer.GetType() != LayerType::Output)
618  {
619  throw InvalidArgumentException("EnqueueOutput: given layer not an OutputLayer");
620  }
621 
622  if (tensorHandle == nullptr)
623  {
624  throw InvalidArgumentException("EnqueueOutput: tensorHandle must not be NULL");
625  }
626 
627  OutputQueueDescriptor outputQueueDescriptor;
629 
630  outputQueueDescriptor.m_Outputs.push_back(tensorHandle);
631  info.m_OutputTensorInfos.push_back(tensorInfo);
632 
633  ARMNN_ASSERT_MSG(layer.GetNumInputSlots() == 1, "Output Layer should have exactly one input.");
634 
635  // Gets the output handler from the previous node.
636  const OutputHandler& outputHandler = layer.GetInputSlots()[0].GetConnectedOutputSlot()->GetOutputHandler();
637 
638  const TensorInfo& inputTensorInfo = outputHandler.GetTensorInfo();
639  ITensorHandle* inputTensorHandle = outputHandler.GetData();
640  ARMNN_ASSERT_MSG(inputTensorHandle != nullptr, "Data should have been allocated.");
641 
642  // Try import the output tensor.
643  // Note: We can only import the output pointer if all of the following hold true:
644  // a) The imported pointer is aligned sufficiently
645  // b) The tensor has zero padding
646  // c) There is only one connection to the OutputSlot and it is to an OutputLayer.
647  // d) The output pointer is allocated via malloc. (Other types will be supported in a later release)
648  // e) m_IsExportEnabled must be set to true
649  bool needMemCopy = true;
650  if (m_IsExportEnabled && (layer.GetInputSlots()[0].GetConnectedOutputSlot()->GetNumConnections() == 1))
651  {
652  if(layer.GetInputSlots()[0].GetConnectedOutputSlot()->GetOwningLayer().GetType() != LayerType::Input)
653  {
654  MemorySourceFlags importFlags = inputTensorHandle->GetImportFlags();
655  if (CheckFlag(importFlags, MemorySource::Malloc))
656  {
657  needMemCopy = false;
658  void *mem = tensorHandle->Map(false);
659  bool importOk = inputTensorHandle->Import(mem, MemorySource::Malloc);
660  tensorHandle->Unmap();
661 
662  if (importOk)
663  {
664  // Insert synchronization workload
665  MemSyncQueueDescriptor syncDesc;
666  syncDesc.m_Inputs.push_back(inputTensorHandle);
667  info.m_InputTensorInfos.push_back(inputTensorInfo);
668  auto syncWorkload = std::make_unique<SyncMemGenericWorkload>(syncDesc, info);
669  ARMNN_ASSERT_MSG(syncWorkload, "No sync workload created");
670  m_OutputQueue.push_back(move(syncWorkload));
671  }
672  else
673  {
674  throw MemoryExportException("EnqueueOutput: Memory Export failed");
675  }
676  }
677  }
678  }
679  if (needMemCopy)
680  {
681  // If we got here then we didn't export the memory, so add an output workload which performs a memcopy.
682  outputQueueDescriptor.m_Inputs.push_back(inputTensorHandle);
683  info.m_InputTensorInfos.push_back(inputTensorInfo);
684 
685  std::unique_ptr<IWorkload> outputWorkload =
686  std::make_unique<CopyMemGenericWorkload>(outputQueueDescriptor, info);
687  ARMNN_ASSERT_MSG(outputWorkload, "No output workload created");
688 
689  std::unique_ptr<TimelineUtilityMethods> timelineUtils =
690  TimelineUtilityMethods::GetTimelineUtils(m_ProfilingService);
691  if (timelineUtils)
692  {
693  // Add Output Workload to the post-optimisation network structure
694  AddWorkloadStructure(timelineUtils, outputWorkload, layer);
695  timelineUtils->Commit();
696  }
697 
698  m_OutputQueue.push_back(move(outputWorkload));
699  }
700 }
701 
702 void LoadedNetwork::AllocateWorkingMemory(std::lock_guard<std::mutex>& lock)
703 {
704  ARMNN_SCOPED_PROFILING_EVENT(Compute::Undefined, "Working Memory Allocation");
705 
706  // this unused parameter makes sure we can only call this function with a valid lock
707  IgnoreUnused(lock);
708 
709  if (m_IsWorkingMemAllocated)
710  {
711  return;
712  }
713  for (auto&& workloadFactory : m_WorkloadFactories)
714  {
715  IBackendInternal::IMemoryManagerSharedPtr memoryManager = workloadFactory.second.second;
716  if (memoryManager)
717  {
718  memoryManager->Acquire();
719  }
720  }
721  m_TensorHandleFactoryRegistry.AquireMemory();
722  m_IsWorkingMemAllocated = true;
723 }
724 
726 {
727  std::lock_guard<std::mutex> lockGuard(m_WorkingMemMutex);
728  if (!m_IsWorkingMemAllocated)
729  {
730  return;
731  }
732  // Informs the memory managers to release memory in it's respective memory group
733  for (auto&& workloadFactory : m_WorkloadFactories)
734  {
735  IBackendInternal::IMemoryManagerSharedPtr memoryManager = workloadFactory.second.second;
736  if (memoryManager)
737  {
738  memoryManager->Release();
739  }
740  }
741  m_TensorHandleFactoryRegistry.ReleaseMemory();
742  m_IsWorkingMemAllocated = false;
743 }
744 
745 bool LoadedNetwork::Execute(std::unique_ptr<TimelineUtilityMethods>& timelineUtils,
746  profiling::ProfilingGuid inferenceGuid)
747 {
748  bool success = true;
749 
750  auto Fail = [&](const std::exception& error)
751  {
752  ARMNN_LOG(error) << "An error occurred attempting to execute a workload: " << error.what();
753  success = false;
754  };
755 
756  try
757  {
758  std::lock_guard<std::mutex> lockGuard(m_WorkingMemMutex);
759  AllocateWorkingMemory(lockGuard);
760 
761  ProfilingDynamicGuid workloadInferenceID(0);
762  auto ExecuteQueue = [&timelineUtils, &workloadInferenceID, &inferenceGuid](WorkloadQueue& queue)
763  {
764  for (auto& workload : queue)
765  {
766  if(timelineUtils)
767  {
768  workloadInferenceID = timelineUtils->RecordWorkloadInferenceAndStartOfLifeEvent(workload->GetGuid(),
769  inferenceGuid);
770  }
771  workload->Execute();
772  if(timelineUtils)
773  {
774  timelineUtils->RecordEndOfLifeEvent(workloadInferenceID);
775  }
776  }
777  };
778 
779  ExecuteQueue(m_InputQueue);
780  ExecuteQueue(m_WorkloadQueue);
781  ExecuteQueue(m_OutputQueue);
782  }
783  catch (const RuntimeException& error)
784  {
785  Fail(error);
786  }
787  catch (const std::runtime_error& error)
788  {
789  Fail(error);
790  }
791 
792  return success;
793 }
794 
796 {
797  for (auto&& workloadPtr: m_WorkloadQueue)
798  {
799  workloadPtr.get()->RegisterDebugCallback(func);
800  }
801 }
802 
803 }
static ARMNN_DLLEXPORT ProfilingStaticGuid INFERENCE_GUID
virtual bool Import(void *memory, MemorySource source)
Import externally allocated memory.
FactoryFunction GetFactory(const BackendId &id) const
unsigned int GetNumInputSlots() const override
Returns the number of connectable input slots.
Definition: Layer.hpp:313
std::vector< std::unique_ptr< IWorkload > > WorkloadQueue
const bool m_ImportEnabled
Definition: IRuntime.hpp:34
static std::unique_ptr< TimelineUtilityMethods > GetTimelineUtils(ProfilingService &profilingService)
static ProfilerManager & GetInstance()
Definition: Profiling.cpp:489
virtual unsigned int GetImportFlags() const
Get flags describing supported import sources.
Strongly typed guids to distinguish between those generated at runtime, and those that are statically...
Definition: Types.hpp:335
TensorInfo GetInputTensorInfo(LayerBindingId layerId) const
#define ARMNN_LOG(severity)
Definition: Logging.hpp:202
BackendRegistry & BackendRegistryInstance()
std::vector< std::pair< LayerBindingId, class ConstTensor > > InputTensors
Definition: Tensor.hpp:340
unsigned int MemorySourceFlags
size_t GetNumOutputs() const
Definition: Graph.hpp:181
TensorInfo GetOutputTensorInfo(LayerBindingId layerId) const
Copyright (c) 2021 ARM Limited and Contributors.
void IgnoreUnused(Ts &&...)
const std::vector< InputSlot > & GetInputSlots() const
Definition: Layer.hpp:237
std::function< void(LayerGuid guid, unsigned int slotIndex, ITensorHandle *tensorHandle)> DebugCallbackFunction
Define the type of callback for the Debug layer to call.
Definition: Types.hpp:283
unsigned int GetNumOutputSlots() const override
Returns the number of connectable output slots.
Definition: Layer.hpp:314
static ARMNN_DLLEXPORT ProfilingStaticGuid WORKLOAD_GUID
static ARMNN_DLLEXPORT ProfilingStaticGuid ARMNN_PROFILING_EOL_EVENT_CLASS
#define ARMNN_SCOPED_PROFILING_EVENT(backendId, name)
Definition: Profiling.hpp:173
int LayerBindingId
Type of identifiers for bindable layers (inputs, outputs).
Definition: Types.hpp:210
static ARMNN_DLLEXPORT ProfilingStaticGuid ARMNN_PROFILING_SOL_EVENT_CLASS
virtual IWorkloadFactoryPtr CreateWorkloadFactory(const IMemoryManagerSharedPtr &memoryManager=nullptr) const =0
std::vector< TensorInfo > m_InputTensorInfos
static ARMNN_DLLEXPORT ProfilingStaticGuid LAYER_GUID
#define ARMNN_ASSERT_MSG(COND, MSG)
Definition: Assert.hpp:15
bool SupportsTensorAllocatorAPI() const
std::shared_ptr< IMemoryManager > IMemoryManagerSharedPtr
#define ARMNN_SCOPED_HEAP_PROFILING(TAG)
static ARMNN_DLLEXPORT ProfilingStaticGuid EXECUTION_OF_GUID
std::vector< std::pair< LayerBindingId, class Tensor > > OutputTensors
Definition: Tensor.hpp:341
const std::string & GetNameStr() const
Definition: Layer.hpp:220
LayerType GetType() const override
Returns the armnn::LayerType of this layer.
Definition: Layer.hpp:265
Status
enumeration
Definition: Types.hpp:26
const bool m_ExportEnabled
Definition: IRuntime.hpp:35
#define ARMNN_ASSERT(COND)
Definition: Assert.hpp:14
std::vector< TensorInfo > m_OutputTensorInfos
static bool IsLayerSupported(const BackendId &backendId, const IConnectableLayer &layer, Optional< DataType > dataType, std::string &outReasonIfUnsupported)
#define CHECK_LOCATION()
Definition: Exceptions.hpp:197
const BackendId & GetBackendId() const
Definition: Layer.hpp:269
OutputLayersAccessor GetOutputLayers() const
Returns a wrapper object with begin(), end() methods to iterate over the output layers in a range-bas...
Definition: Graph.hpp:189
static ARMNN_DLLEXPORT ProfilingStaticGuid NETWORK_GUID
Status EnqueueWorkload(const InputTensors &inputTensors, const OutputTensors &outputTensors)
void RegisterProfiler(IProfiler *profiler)
Definition: Profiling.cpp:496
virtual const void * Map(bool blocking=true) const =0
Map the tensor data for access.
profiling::ProfilingGuid GetNetworkGuid()
virtual void Unmap() const =0
Unmap the tensor data.
std::vector< ITensorHandle * > m_Outputs
Base class for all ArmNN exceptions so that users can filter to just those.
Definition: Exceptions.hpp:46
const OutputHandler & GetOutputHandler(unsigned int i=0) const
Definition: Layer.hpp:225
const std::string & Get() const
Definition: BackendId.hpp:136
void RegisterDebugCallback(const DebugCallbackFunction &func)
Contains information about inputs and outputs to a layer.
bool CheckFlag(MemorySourceFlags flags, MemorySource source)
Graph & TopologicalSort()
Sorts layers in topological order and return this.
Definition: Graph.hpp:177
InputLayersAccessor GetInputLayers() const
Returns a wrapper object with begin(), end() methods to iterate over the input layers in a range-base...
Definition: Graph.hpp:185
std::vector< ITensorHandle * > m_Inputs
static ARMNN_DLLEXPORT ProfilingStaticGuid PROCESS_ID_GUID
size_t GetNumLayers() const
Definition: Graph.hpp:191
virtual ARMNN_NO_DEPRECATE_WARN_END IMemoryManagerUniquePtr CreateMemoryManager() const
const TensorInfo & GetTensorInfo(const ITensorHandle *tensorHandle)
float32 helpers
static std::unique_ptr< LoadedNetwork > MakeLoadedNetwork(std::unique_ptr< IOptimizedNetwork > net, std::string &errorMessage, const INetworkProperties &networkProperties, profiling::ProfilingService &profilingService)
size_t GetNumInputs() const
Definition: Graph.hpp:180
static ARMNN_DLLEXPORT ProfilingStaticGuid BACKENDID_GUID
static ARMNN_DLLEXPORT ProfilingStaticGuid CHILD_GUID