ArmNN
 20.08
LoadedNetwork.cpp
Go to the documentation of this file.
1 //
2 // Copyright © 2017 Arm Ltd and Contributors. All rights reserved.
3 // SPDX-License-Identifier: MIT
4 //
5 
6 #include "LoadedNetwork.hpp"
7 #include "Layer.hpp"
8 #include "Graph.hpp"
9 #include "Network.hpp"
10 #include <Processes.hpp>
11 #include "Runtime.hpp"
12 #include "Profiling.hpp"
13 #include "HeapProfiling.hpp"
14 
16 #include <armnn/Logging.hpp>
17 #include <armnn/utility/Assert.hpp>
18 
23 
25 
26 #include <boost/format.hpp>
27 
28 namespace armnn
29 {
30 
31 using namespace std;
32 using namespace armnn::profiling;
33 
34 namespace
35 {
36 
37 template <typename ExceptionType>
38 std::string ToErrorMessage(const char * prefix, const ExceptionType & error)
39 {
40  std::stringstream ss;
41  ss << prefix << " " << error.what();
42  return ss.str();
43 }
44 
45 void AddLayerStructure(std::unique_ptr<TimelineUtilityMethods>& timelineUtils,
46  const Layer& layer,
47  ProfilingGuid networkGuid)
48 {
49  // Add layer to the post-optimisation network structure
50  std::string layerName = layer.GetNameStr().empty() ? "<Unnamed>" : layer.GetNameStr();
51  timelineUtils->CreateNamedTypedChildEntity(layer.GetGuid(),
52  networkGuid,
53  layerName,
55  for (auto&& input : layer.GetInputSlots())
56  {
57  const IOutputSlot* source = input.GetConnectedOutputSlot();
58  ARMNN_ASSERT(source != NULL);
59  timelineUtils->CreateConnectionRelationship(ProfilingRelationshipType::RetentionLink,
60  source->GetOwningLayerGuid(),
61  layer.GetGuid());
62  }
63 }
64 
65 void AddWorkloadStructure(std::unique_ptr<TimelineUtilityMethods>& timelineUtils,
66  std::unique_ptr<IWorkload>& workload,
67  const Layer& layer)
68 {
69  // Add workload to the post-optimisation network structure
70  timelineUtils->CreateTypedEntity(workload->GetGuid(), LabelsAndEventClasses::WORKLOAD_GUID);
71  timelineUtils->MarkEntityWithLabel(workload->GetGuid(),
72  layer.GetBackendId().Get(),
74 
75  // Link the workload to the layer
76  timelineUtils->CreateRelationship(ProfilingRelationshipType::RetentionLink,
77  layer.GetGuid(),
78  workload->GetGuid(),
80 }
81 
82 } // anonymous
83 
84 std::unique_ptr<LoadedNetwork> LoadedNetwork::MakeLoadedNetwork(std::unique_ptr<OptimizedNetwork> net,
85  std::string& errorMessage,
86  const INetworkProperties& networkProperties,
87  profiling::ProfilingService& profilingService)
88 {
89  std::unique_ptr<LoadedNetwork> loadedNetwork;
90 
91  auto Fail = [&](const std::exception& error) -> std::unique_ptr<LoadedNetwork>
92  {
93  errorMessage = ToErrorMessage("An error occurred when preparing the network workloads: ", error);
94  ARMNN_LOG(error) << errorMessage;
95 
96  return std::unique_ptr<LoadedNetwork>();
97  };
98 
99  try
100  {
101  loadedNetwork.reset(new LoadedNetwork(std::move(net), networkProperties, profilingService));
102  }
103  catch (const armnn::RuntimeException& error)
104  {
105  return Fail(error);
106  }
107  catch (const armnn::Exception& error)
108  {
109  return Fail(error);
110  }
111  catch (const std::runtime_error& error)
112  {
113  return Fail(error);
114  }
115 
116  return loadedNetwork;
117 }
118 
119 LoadedNetwork::LoadedNetwork(std::unique_ptr<OptimizedNetwork> net,
120  const INetworkProperties& networkProperties,
121  profiling::ProfilingService& profilingService) :
122  m_OptimizedNetwork(std::move(net)),
123  m_IsImportEnabled(networkProperties.m_ImportEnabled),
124  m_IsExportEnabled(networkProperties.m_ExportEnabled),
125  m_TensorHandleFactoryRegistry(),
126  m_ProfilingService(profilingService)
127 {
128  // Create a profiler and register it for the current thread.
129  m_Profiler = std::make_shared<Profiler>();
131 
132  Graph& order = m_OptimizedNetwork->GetGraph().TopologicalSort();
133  //First create tensor handlers, backends and workload factories.
134  //Handlers are created before workloads are.
135  //Because workload creation can modify some of the handlers,
136  //(for example the splitter and concat layers).
137  for (auto&& layer : order)
138  {
139  auto const& backendId = layer->GetBackendId();
140  if (m_Backends.count(backendId) == 0)
141  {
142  auto createBackend = BackendRegistryInstance().GetFactory(backendId);
143  auto it = m_Backends.emplace(std::make_pair(backendId, createBackend()));
144 
145  IBackendInternal* backend = it.first->second.get();
146 
147  if (backend->SupportsTensorAllocatorAPI())
148  {
149  auto workloadFactory = backend->CreateWorkloadFactory(m_TensorHandleFactoryRegistry);
150  m_WorkloadFactories.emplace(
151  std::make_pair(backendId, std::make_pair(std::move(workloadFactory), nullptr)));
152  }
153  else
154  {
156  auto workloadFactory = backend->CreateWorkloadFactory(memoryManager);
157 
158  m_WorkloadFactories.emplace(
159  std::make_pair(backendId, std::make_pair(std::move(workloadFactory), memoryManager)));
160  }
161  }
162  }
163 
164  for (auto&& layer : order)
165  {
166  auto& workloadFactory = GetWorkloadFactory(*layer);
167 
168  switch (layer->GetType())
169  {
170  case LayerType::Input:
172  {
173  // If IsImportEnabled is true then we need to set IsMemoryManaged to false when creating TensorHandles
174  layer->CreateTensorHandles(m_TensorHandleFactoryRegistry, workloadFactory, !m_IsImportEnabled);
175  break;
176  }
177  default:
178  {
179  // Look for the layer with 1 OutputSlot which has 1 connection and that connection is an Output Layer
180  // If Export is enabled disable memory management so we can export, otherwise we do a copy
181  if((layer->GetNumOutputSlots() == 1) &&
182  (layer->GetOutputSlots()[0].GetNumConnections() == 1) &&
183  (layer->GetOutputSlots()[0].GetConnection(0)->GetOwningLayer().GetType() == LayerType::Output))
184  {
185  layer->CreateTensorHandles(m_TensorHandleFactoryRegistry, workloadFactory, !m_IsExportEnabled);
186  }
187  else
188  {
189  layer->CreateTensorHandles(m_TensorHandleFactoryRegistry, workloadFactory);
190  }
191  }
192  }
193  }
194 
195  ProfilingGuid networkGuid = m_OptimizedNetwork->GetGuid();
196  std::unique_ptr<TimelineUtilityMethods> timelineUtils =
197  TimelineUtilityMethods::GetTimelineUtils(m_ProfilingService);
198  if (timelineUtils)
199  {
200  timelineUtils->CreateTypedEntity(networkGuid, LabelsAndEventClasses::NETWORK_GUID);
201  // Mark the network with a start of life event
202  timelineUtils->RecordEvent(networkGuid, LabelsAndEventClasses::ARMNN_PROFILING_SOL_EVENT_CLASS);
203  // and with the process ID
204  int processID = armnnUtils::Processes::GetCurrentId();
205  std::stringstream ss;
206  ss << processID;
207  timelineUtils->MarkEntityWithLabel(networkGuid, ss.str(), LabelsAndEventClasses::PROCESS_ID_GUID);
208  }
209 
210  //Then create workloads.
211  for (auto&& layer : order)
212  {
213  if (timelineUtils)
214  {
215  // Add layer to the post-optimisation network structure
216  AddLayerStructure(timelineUtils, *layer, networkGuid);
217  }
218 
219  const IWorkloadFactory& workloadFactory = GetWorkloadFactory(*layer);
220 
221  switch (layer->GetType())
222  {
223  case LayerType::Input:
224  case LayerType::Output:
225  {
226  // Inputs and outputs are treated in a special way - see EnqueueInput() and EnqueueOutput().
227  break;
228  }
229  default:
230  {
231  auto workload = layer->CreateWorkload(workloadFactory);
232 
233  if (!workload)
234  {
235  const char* const layerName =
236  layer->GetNameStr().length() != 0 ? layer->GetName() : "<Unnamed>";
237  throw InvalidArgumentException(boost::str(
238  boost::format("No workload created for layer (name: '%1%' type: '%2%') (compute '%3%')")
239  % layerName % static_cast<int>(layer->GetType()) % layer->GetBackendId().Get()
240  ));
241  }
242 
243  if (timelineUtils)
244  {
245  // Add workload to the post-optimisation network structure
246  AddWorkloadStructure(timelineUtils, workload, *layer);
247  }
248 
249  m_WorkloadQueue.push_back(move(workload));
250  // release the constant data in the layer..
251  layer->ReleaseConstantData();
252  break;
253  }
254  }
255  }
256 
257  if (timelineUtils)
258  {
259  // Commit to send the post-optimisation network structure
260  timelineUtils->Commit();
261  }
262 
263  // Set up memory.
264  m_OptimizedNetwork->GetGraph().AllocateDynamicBuffers();
265 
266  // Now that the intermediate tensor memory has been set-up, do any post allocation configuration for each workload.
267  for (auto& workload : m_WorkloadQueue)
268  {
269  workload->PostAllocationConfigure();
270  }
271 }
272 
274 {
275  Graph& order = m_OptimizedNetwork->GetGraph().TopologicalSort();
276  ProfilingGuid networkGuid = m_OptimizedNetwork->GetGuid();
277 
278  std::unique_ptr<TimelineUtilityMethods> timelineUtils =
279  TimelineUtilityMethods::GetTimelineUtils(m_ProfilingService);
280 
281  timelineUtils->CreateTypedEntity(networkGuid, LabelsAndEventClasses::NETWORK_GUID);
282 
283  for (auto&& layer : order)
284  {
285  // Add layer to the post-optimisation network structure
286  AddLayerStructure(timelineUtils, *layer, networkGuid);
287  switch (layer->GetType())
288  {
289  case LayerType::Input:
290  case LayerType::Output:
291  {
292  // Inputs and outputs are treated in a special way - see EnqueueInput() and EnqueueOutput().
293  break;
294  }
295  default:
296  {
297  for (auto& workload : m_WorkloadQueue)
298  {
299  // Add workload to the post-optimisation network structure
300  AddWorkloadStructure(timelineUtils, workload, *layer);
301  }
302  break;
303  }
304  }
305  }
306  // Commit to send the post-optimisation network structure
307  timelineUtils->Commit();
308 }
309 
311 {
312  return m_OptimizedNetwork->GetGuid();
313 }
314 
316 {
317  for (auto&& inputLayer : m_OptimizedNetwork->GetGraph().GetInputLayers())
318  {
319  ARMNN_ASSERT_MSG(inputLayer->GetNumOutputSlots() == 1, "Input layer should have exactly 1 output slot");
320  if (inputLayer->GetBindingId() == layerId)
321  {
322  return inputLayer->GetOutputSlot(0).GetTensorInfo();
323  }
324  }
325 
326  throw InvalidArgumentException(boost::str(boost::format("No input layer is associated with id %1%") % layerId));
327 }
328 
330 {
331  for (auto&& outputLayer : m_OptimizedNetwork->GetGraph().GetOutputLayers())
332  {
333  ARMNN_ASSERT_MSG(outputLayer->GetNumInputSlots() == 1, "Output layer should have exactly 1 input slot");
334  ARMNN_ASSERT_MSG(outputLayer->GetInputSlot(0).GetConnection(), "Input slot on Output layer must be connected");
335  if (outputLayer->GetBindingId() == layerId)
336  {
337  return outputLayer->GetInputSlot(0).GetConnection()->GetTensorInfo();
338  }
339  }
340 
341  throw InvalidArgumentException(boost::str(boost::format("No output layer is associated with id %1%") % layerId));
342 }
343 
344 const IWorkloadFactory& LoadedNetwork::GetWorkloadFactory(const Layer& layer) const
345 {
346  const IWorkloadFactory* workloadFactory = nullptr;
347 
348  auto it = m_WorkloadFactories.find(layer.GetBackendId());
349  if (it == m_WorkloadFactories.end())
350  {
351  throw RuntimeException(
352  boost::str(
353  boost::format("No workload factory for %1% to be used for layer: %2%")
354  % layer.GetBackendId().Get()
355  % layer.GetNameStr()),
356  CHECK_LOCATION());
357  }
358 
359  workloadFactory = it->second.first.get();
360 
361  ARMNN_ASSERT_MSG(workloadFactory, "No workload factory");
362 
363  std::string reasonIfUnsupported;
364  ARMNN_ASSERT_MSG(IWorkloadFactory::IsLayerSupported(layer, {}, reasonIfUnsupported),
365  "Factory does not support layer");
366  IgnoreUnused(reasonIfUnsupported);
367  return *workloadFactory;
368 }
369 
370 namespace {
371 
372 // Non-copyable class owning accelerator-specific tensor data.
373 class TensorPin
374 {
375 public:
376  TensorPin(std::unique_ptr<ITensorHandle> handle, const TensorInfo& info, LayerBindingId id)
377  : m_TensorHandle(std::move(handle))
378  , m_TensorInfo(info)
379  , m_Id(id)
380  {
381  }
382 
383  ITensorHandle* GetTensorHandle() const { return m_TensorHandle.get(); }
384  const TensorInfo& GetTensorInfo() const { return m_TensorInfo; }
385  LayerBindingId GetBindingId() const { return m_Id; }
386 
387 private:
388  std::unique_ptr<ITensorHandle> m_TensorHandle;
389  TensorInfo m_TensorInfo;
390  LayerBindingId m_Id;
391 };
392 
393 static const TensorPin& GetTensorPin(LayerBindingId id,
394  const std::vector<TensorPin>& pins,
395  char const* bindingPointDesc)
396 {
397  auto it = std::find_if(pins.begin(), pins.end(),
398  [id](const TensorPin& pin)
399  {
400  return pin.GetBindingId() == id;
401  });
402 
403  if (it != pins.end())
404  {
405  return *it;
406  }
407  else
408  {
409  throw InvalidArgumentException(boost::str(
410  boost::format("No tensor supplied for %1% %2%") % bindingPointDesc % id));
411  }
412 }
413 
414 // Stores data that needs to be kept accessible for the entire execution of a workload.
415 class WorkloadData
416 {
417 public:
418  WorkloadData(const InputTensors& inputTensors, const OutputTensors& outputTensors)
419  {
420  m_InputTensorPins.reserve(inputTensors.size());
421  m_OutputTensorPins.reserve(outputTensors.size());
422 
423  for (auto inputTensorPair : inputTensors)
424  {
425  auto inputTensor = inputTensorPair.second;
426 
427  std::unique_ptr<ITensorHandle> tensorHandle =
428  std::make_unique<ConstPassthroughCpuTensorHandle>(inputTensor.GetInfo(),inputTensor.GetMemoryArea());
429  LayerBindingId layerId = inputTensorPair.first;
430 
431  m_InputTensorPins.emplace_back(std::move(tensorHandle), inputTensor.GetInfo(), layerId);
432  }
433 
434  for (auto outputTensorPair : outputTensors)
435  {
436  auto outputTensor = outputTensorPair.second;
437 
438  std::unique_ptr<ITensorHandle> tensorHandle =
439  std::make_unique<PassthroughCpuTensorHandle>(outputTensor.GetInfo(), outputTensor.GetMemoryArea());
440  LayerBindingId layerId = outputTensorPair.first;
441 
442  m_OutputTensorPins.emplace_back(std::move(tensorHandle), outputTensor.GetInfo(), layerId);
443  }
444  }
445 
446  const TensorPin& GetInputTensorPin(LayerBindingId id) const
447  {
448  return GetTensorPin(id, m_InputTensorPins, "input");
449  }
450 
451  const TensorPin& GetOutputTensorPin(LayerBindingId id) const
452  {
453  return GetTensorPin(id, m_OutputTensorPins, "output");
454  }
455 
456 private:
457 
458  std::vector<TensorPin> m_InputTensorPins;
459  std::vector<TensorPin> m_OutputTensorPins;
460 };
461 
462 }
463 
465  const OutputTensors& outputTensors)
466 {
467  const Graph& graph = m_OptimizedNetwork->GetGraph();
468 
469  // Walk graph to determine the order of execution.
470  if (graph.GetNumLayers() < 2)
471  {
472  ARMNN_LOG(warning) << "IRuntime::EnqueueWorkload()::Less than two nodes in graph";
473  return Status::Failure;
474  }
475 
476  // Data that must be kept alive for the entire execution of the workload.
477  WorkloadData workloadData(inputTensors, outputTensors);
478 
479  if (graph.GetNumInputs() != inputTensors.size())
480  {
481  throw InvalidArgumentException("Number of inputs provided does not match network.");
482  }
483 
484  // For each input to the network, call EnqueueInput with the data passed by the user.
485  {
487  m_InputQueue.clear();
488  m_InputQueue.reserve(graph.GetNumInputs());
489  for (const BindableLayer* inputLayer : graph.GetInputLayers())
490  {
491  const TensorPin& pin = workloadData.GetInputTensorPin(inputLayer->GetBindingId());
492  EnqueueInput(*inputLayer, pin.GetTensorHandle(), pin.GetTensorInfo());
493  }
494  }
495 
496  // For each output to the network, call EnqueueOutput with the data passed by the user.
497  {
499  m_OutputQueue.clear();
500  m_OutputQueue.reserve(graph.GetNumOutputs());
501  for (const BindableLayer* outputLayer : graph.GetOutputLayers())
502  {
503  const TensorPin& pin = workloadData.GetOutputTensorPin(outputLayer->GetBindingId());
504  EnqueueOutput(*outputLayer, pin.GetTensorHandle(), pin.GetTensorInfo());
505  }
506  }
507 
508  std::unique_ptr<TimelineUtilityMethods> timelineUtils =
509  TimelineUtilityMethods::GetTimelineUtils(m_ProfilingService);
510  ProfilingGuid inferenceGuid = m_ProfilingService.GetNextGuid();
511  if (timelineUtils)
512  {
513  // Add inference timeline trace if profiling is enabled.
514  ProfilingGuid networkGuid = m_OptimizedNetwork->GetGuid();
515  timelineUtils->CreateTypedEntity(inferenceGuid, LabelsAndEventClasses::INFERENCE_GUID);
516  timelineUtils->CreateRelationship(ProfilingRelationshipType::RetentionLink,
517  networkGuid,
518  inferenceGuid,
520  timelineUtils->RecordEvent(inferenceGuid, LabelsAndEventClasses::ARMNN_PROFILING_SOL_EVENT_CLASS);
521  }
522 
523  bool executionSucceeded = true;
524 
525  {
526  if (m_ProfilingService.IsProfilingEnabled())
527  {
528  m_ProfilingService.IncrementCounterValue(armnn::profiling::INFERENCES_RUN);
529  }
531  ARMNN_SCOPED_HEAP_PROFILING("Executing");
532  executionSucceeded = Execute(timelineUtils, inferenceGuid);
533  }
534 
535  if (timelineUtils)
536  {
537  // Add end of life of the inference timeline if profiling is enabled.
538  timelineUtils->RecordEvent(inferenceGuid, LabelsAndEventClasses::ARMNN_PROFILING_EOL_EVENT_CLASS);
539  timelineUtils->Commit();
540  }
541  return executionSucceeded ? Status::Success : Status::Failure;
542 }
543 
544 void LoadedNetwork::EnqueueInput(const BindableLayer& layer, ITensorHandle* tensorHandle, const TensorInfo& tensorInfo)
545 {
546  if (layer.GetType() != LayerType::Input)
547  {
548  throw InvalidArgumentException("EnqueueInput: given layer not an InputLayer");
549  }
550 
551  if (tensorHandle == nullptr)
552  {
553  throw InvalidArgumentException("EnqueueInput: tensorHandle must not be NULL");
554  }
555 
556  InputQueueDescriptor inputQueueDescriptor;
558 
559  inputQueueDescriptor.m_Inputs.push_back(tensorHandle);
560  info.m_InputTensorInfos.push_back(tensorInfo);
561 
562  ARMNN_ASSERT_MSG(layer.GetNumOutputSlots() == 1, "Can only handle Input Layer with one output");
563  const OutputHandler& handler = layer.GetOutputHandler();
564  const TensorInfo& outputTensorInfo = handler.GetTensorInfo();
565  ITensorHandle* outputTensorHandle = handler.GetData();
566  ARMNN_ASSERT_MSG(outputTensorHandle != nullptr,
567  "Data should have been allocated.");
568  inputQueueDescriptor.m_Outputs.push_back(outputTensorHandle);
569  info.m_OutputTensorInfos.push_back(outputTensorInfo);
570 
571  MemorySourceFlags importFlags = outputTensorHandle->GetImportFlags();
572  if (m_IsImportEnabled) // Try import the input tensor
573  {
574  if(CheckFlag(importFlags, MemorySource::Malloc) )
575  {
576  // This assumes a CPU Tensor handle
577  void* mem = tensorHandle->Map(false);
578  if (outputTensorHandle->Import(mem, MemorySource::Malloc))
579  {
580  tensorHandle->Unmap();
581  return; // No need for a workload since the import has been done.
582  }
583  tensorHandle->Unmap();
584  throw MemoryImportException("EnqueueInput: Memory Import failed");
585  }
586  else
587  {
588  throw MemoryImportException("EnqueueInput: Memory Import failed, backend does not support Import");
589  }
590  }
591  else
592  {
593  // Create a mem copy workload for input since we did not import
594  std::unique_ptr<IWorkload> inputWorkload = std::make_unique<CopyMemGenericWorkload>(inputQueueDescriptor, info);
595 
596  ARMNN_ASSERT_MSG(inputWorkload, "No input workload created");
597 
598  std::unique_ptr<TimelineUtilityMethods> timelineUtils =
599  TimelineUtilityMethods::GetTimelineUtils(m_ProfilingService);
600  if (timelineUtils)
601  {
602  // Add Input Workload to the post-optimisation network structure
603  AddWorkloadStructure(timelineUtils, inputWorkload, layer);
604  timelineUtils->Commit();
605  }
606 
607  m_InputQueue.push_back(move(inputWorkload));
608  }
609 }
610 
611 void LoadedNetwork::EnqueueOutput(const BindableLayer& layer, ITensorHandle* tensorHandle, const TensorInfo& tensorInfo)
612 {
613  if (layer.GetType() != LayerType::Output)
614  {
615  throw InvalidArgumentException("EnqueueOutput: given layer not an OutputLayer");
616  }
617 
618  if (tensorHandle == nullptr)
619  {
620  throw InvalidArgumentException("EnqueueOutput: tensorHandle must not be NULL");
621  }
622 
623  OutputQueueDescriptor outputQueueDescriptor;
625 
626  outputQueueDescriptor.m_Outputs.push_back(tensorHandle);
627  info.m_OutputTensorInfos.push_back(tensorInfo);
628 
629  ARMNN_ASSERT_MSG(layer.GetNumInputSlots() == 1, "Output Layer should have exactly one input.");
630 
631  // Gets the output handler from the previous node.
632  const OutputHandler& outputHandler = layer.GetInputSlots()[0].GetConnectedOutputSlot()->GetOutputHandler();
633 
634  const TensorInfo& inputTensorInfo = outputHandler.GetTensorInfo();
635  ITensorHandle* inputTensorHandle = outputHandler.GetData();
636  ARMNN_ASSERT_MSG(inputTensorHandle != nullptr, "Data should have been allocated.");
637 
638  // Try import the output tensor.
639  // Note: We can only import the output pointer if all of the following hold true:
640  // a) The imported pointer is aligned sufficiently
641  // b) The tensor has zero padding
642  // c) There is only one connection to the OutputSlot and it is to an OutputLayer.
643  // d) The output pointer is allocated via malloc. (Other types will be supported in a later release)
644  // e) m_IsExportEnabled must be set to true
645  if (m_IsExportEnabled && (layer.GetInputSlots()[0].GetConnectedOutputSlot()->GetNumConnections() == 1))
646  {
647  if(layer.GetInputSlots()[0].GetConnectedOutputSlot()->GetOwningLayer().GetType() != LayerType::Input)
648  {
649  MemorySourceFlags importFlags = inputTensorHandle->GetImportFlags();
650  if (CheckFlag(importFlags, MemorySource::Malloc))
651  {
652  void *mem = tensorHandle->Map(false);
653  bool importOk = inputTensorHandle->Import(mem, MemorySource::Malloc);
654  tensorHandle->Unmap();
655 
656  if (importOk)
657  {
658  // Insert synchronization workload
659  MemSyncQueueDescriptor syncDesc;
660  syncDesc.m_Inputs.push_back(inputTensorHandle);
661  info.m_InputTensorInfos.push_back(inputTensorInfo);
662  auto syncWorkload = std::make_unique<SyncMemGenericWorkload>(syncDesc, info);
663  ARMNN_ASSERT_MSG(syncWorkload, "No sync workload created");
664  m_OutputQueue.push_back(move(syncWorkload));
665  }
666  else
667  {
668  throw MemoryExportException("EnqueueOutput: Memory Export failed");
669  }
670  }
671  else
672  {
673  throw MemoryExportException("EnqueueOutput: Memory Export failed, backend does not support Export");
674  }
675  }
676  else
677  {
678  throw MemoryExportException("EnqueueOutput: Memory Export failed, attempting to export Input Layer");
679  }
680  }
681  else
682  {
683  // If we got here then we didn't export the memory, so add an output workload which performs a memcopy.
684  outputQueueDescriptor.m_Inputs.push_back(inputTensorHandle);
685  info.m_InputTensorInfos.push_back(inputTensorInfo);
686 
687  std::unique_ptr<IWorkload> outputWorkload =
688  std::make_unique<CopyMemGenericWorkload>(outputQueueDescriptor, info);
689  ARMNN_ASSERT_MSG(outputWorkload, "No output workload created");
690 
691  std::unique_ptr<TimelineUtilityMethods> timelineUtils =
692  TimelineUtilityMethods::GetTimelineUtils(m_ProfilingService);
693  if (timelineUtils)
694  {
695  // Add Output Workload to the post-optimisation network structure
696  AddWorkloadStructure(timelineUtils, outputWorkload, layer);
697  timelineUtils->Commit();
698  }
699 
700  m_OutputQueue.push_back(move(outputWorkload));
701  }
702 }
703 
704 void LoadedNetwork::AllocateWorkingMemory(std::lock_guard<std::mutex>& lock)
705 {
706  ARMNN_SCOPED_PROFILING_EVENT(Compute::Undefined, "Working Memory Allocation");
707 
708  // this unused parameter makes sure we can only call this function with a valid lock
709  IgnoreUnused(lock);
710 
711  if (m_IsWorkingMemAllocated)
712  {
713  return;
714  }
715  for (auto&& workloadFactory : m_WorkloadFactories)
716  {
717  IBackendInternal::IMemoryManagerSharedPtr memoryManager = workloadFactory.second.second;
718  if (memoryManager)
719  {
720  memoryManager->Acquire();
721  }
722  }
723  m_TensorHandleFactoryRegistry.AquireMemory();
724  m_IsWorkingMemAllocated = true;
725 }
726 
728 {
729  std::lock_guard<std::mutex> lockGuard(m_WorkingMemMutex);
730  if (!m_IsWorkingMemAllocated)
731  {
732  return;
733  }
734  // Informs the memory managers to release memory in it's respective memory group
735  for (auto&& workloadFactory : m_WorkloadFactories)
736  {
737  IBackendInternal::IMemoryManagerSharedPtr memoryManager = workloadFactory.second.second;
738  if (memoryManager)
739  {
740  memoryManager->Release();
741  }
742  }
743  m_TensorHandleFactoryRegistry.ReleaseMemory();
744  m_IsWorkingMemAllocated = false;
745 }
746 
747 bool LoadedNetwork::Execute(std::unique_ptr<TimelineUtilityMethods>& timelineUtils,
748  profiling::ProfilingGuid inferenceGuid)
749 {
750  bool success = true;
751 
752  auto Fail = [&](const std::exception& error)
753  {
754  ARMNN_LOG(error) << "An error occurred attempting to execute a workload: " << error.what();
755  success = false;
756  };
757 
758  try
759  {
760  std::lock_guard<std::mutex> lockGuard(m_WorkingMemMutex);
761  AllocateWorkingMemory(lockGuard);
762 
763  ProfilingDynamicGuid workloadInferenceID(0);
764  auto ExecuteQueue = [&timelineUtils, &workloadInferenceID, &inferenceGuid](WorkloadQueue& queue)
765  {
766  for (auto& workload : queue)
767  {
768  if(timelineUtils)
769  {
770  workloadInferenceID = timelineUtils->RecordWorkloadInferenceAndStartOfLifeEvent(workload->GetGuid(),
771  inferenceGuid);
772  }
773  workload->Execute();
774  if(timelineUtils)
775  {
776  timelineUtils->RecordEndOfLifeEvent(workloadInferenceID);
777  }
778  }
779  };
780 
781  ExecuteQueue(m_InputQueue);
782  ExecuteQueue(m_WorkloadQueue);
783  ExecuteQueue(m_OutputQueue);
784  }
785  catch (const RuntimeException& error)
786  {
787  Fail(error);
788  }
789  catch (const std::runtime_error& error)
790  {
791  Fail(error);
792  }
793 
794  return success;
795 }
796 
798 {
799  for (auto&& workloadPtr: m_WorkloadQueue)
800  {
801  workloadPtr.get()->RegisterDebugCallback(func);
802  }
803 }
804 
805 }
static ARMNN_DLLEXPORT ProfilingStaticGuid INFERENCE_GUID
virtual bool Import(void *memory, MemorySource source)
Import externally allocated memory.
FactoryFunction GetFactory(const BackendId &id) const
unsigned int GetNumInputSlots() const override
Returns the number of connectable input slots.
Definition: Layer.hpp:309
std::vector< std::unique_ptr< IWorkload > > WorkloadQueue
const bool m_ImportEnabled
Definition: IRuntime.hpp:33
void RegisterProfiler(Profiler *profiler)
Definition: Profiling.cpp:493
static std::unique_ptr< TimelineUtilityMethods > GetTimelineUtils(ProfilingService &profilingService)
static ProfilerManager & GetInstance()
Definition: Profiling.cpp:486
virtual unsigned int GetImportFlags() const
Get flags describing supported import sources.
Strongly typed guids to distinguish between those generated at runtime, and those that are statically...
Definition: Types.hpp:319
TensorInfo GetInputTensorInfo(LayerBindingId layerId) const
#define ARMNN_LOG(severity)
Definition: Logging.hpp:163
BackendRegistry & BackendRegistryInstance()
std::vector< std::pair< LayerBindingId, class ConstTensor > > InputTensors
Definition: Tensor.hpp:324
unsigned int MemorySourceFlags
size_t GetNumOutputs() const
Definition: Graph.hpp:182
TensorInfo GetOutputTensorInfo(LayerBindingId layerId) const
Copyright (c) 2020 ARM Limited.
void IgnoreUnused(Ts &&...)
const std::vector< InputSlot > & GetInputSlots() const
Definition: Layer.hpp:233
std::function< void(LayerGuid guid, unsigned int slotIndex, ITensorHandle *tensorHandle)> DebugCallbackFunction
Define the type of callback for the Debug layer to call.
Definition: Types.hpp:267
unsigned int GetNumOutputSlots() const override
Returns the number of connectable output slots.
Definition: Layer.hpp:310
static ARMNN_DLLEXPORT ProfilingStaticGuid WORKLOAD_GUID
static ARMNN_DLLEXPORT ProfilingStaticGuid ARMNN_PROFILING_EOL_EVENT_CLASS
#define ARMNN_SCOPED_PROFILING_EVENT(backendId, name)
Definition: Profiling.hpp:169
int LayerBindingId
Type of identifiers for bindable layers (inputs, outputs).
Definition: Types.hpp:194
static ARMNN_DLLEXPORT ProfilingStaticGuid ARMNN_PROFILING_SOL_EVENT_CLASS
virtual IWorkloadFactoryPtr CreateWorkloadFactory(const IMemoryManagerSharedPtr &memoryManager=nullptr) const =0
std::vector< TensorInfo > m_InputTensorInfos
static ARMNN_DLLEXPORT ProfilingStaticGuid LAYER_GUID
#define ARMNN_ASSERT_MSG(COND, MSG)
Definition: Assert.hpp:15
bool SupportsTensorAllocatorAPI() const
std::shared_ptr< IMemoryManager > IMemoryManagerSharedPtr
#define ARMNN_SCOPED_HEAP_PROFILING(TAG)
static ARMNN_DLLEXPORT ProfilingStaticGuid EXECUTION_OF_GUID
std::vector< std::pair< LayerBindingId, class Tensor > > OutputTensors
Definition: Tensor.hpp:325
const std::string & GetNameStr() const
Definition: Layer.hpp:216
Status
enumeration
Definition: Types.hpp:26
const bool m_ExportEnabled
Definition: IRuntime.hpp:34
#define ARMNN_ASSERT(COND)
Definition: Assert.hpp:14
std::vector< TensorInfo > m_OutputTensorInfos
static bool IsLayerSupported(const BackendId &backendId, const IConnectableLayer &layer, Optional< DataType > dataType, std::string &outReasonIfUnsupported)
#define CHECK_LOCATION()
Definition: Exceptions.hpp:197
const BackendId & GetBackendId() const
Definition: Layer.hpp:265
OutputLayersAccessor GetOutputLayers() const
Returns a wrapper object with begin(), end() methods to iterate over the output layers in a range-bas...
Definition: Graph.hpp:190
static ARMNN_DLLEXPORT ProfilingStaticGuid NETWORK_GUID
Status EnqueueWorkload(const InputTensors &inputTensors, const OutputTensors &outputTensors)
virtual const void * Map(bool blocking=true) const =0
Map the tensor data for access.
profiling::ProfilingGuid GetNetworkGuid()
virtual void Unmap() const =0
Unmap the tensor data.
std::vector< ITensorHandle * > m_Outputs
Base class for all ArmNN exceptions so that users can filter to just those.
Definition: Exceptions.hpp:46
const OutputHandler & GetOutputHandler(unsigned int i=0) const
Definition: Layer.hpp:221
const std::string & Get() const
Definition: BackendId.hpp:136
void RegisterDebugCallback(const DebugCallbackFunction &func)
LayerType GetType() const
Definition: Layer.hpp:261
Contains information about inputs and outputs to a layer.
bool CheckFlag(MemorySourceFlags flags, MemorySource source)
Graph & TopologicalSort()
Sorts layers in topological order and return this.
Definition: Graph.hpp:178
InputLayersAccessor GetInputLayers() const
Returns a wrapper object with begin(), end() methods to iterate over the input layers in a range-base...
Definition: Graph.hpp:186
std::vector< ITensorHandle * > m_Inputs
static ARMNN_DLLEXPORT ProfilingStaticGuid PROCESS_ID_GUID
size_t GetNumLayers() const
Definition: Graph.hpp:192
virtual ARMNN_NO_DEPRECATE_WARN_END IMemoryManagerUniquePtr CreateMemoryManager() const
const TensorInfo & GetTensorInfo(const ITensorHandle *tensorHandle)
float32 helpers
size_t GetNumInputs() const
Definition: Graph.hpp:181
static ARMNN_DLLEXPORT ProfilingStaticGuid BACKENDID_GUID
static std::unique_ptr< LoadedNetwork > MakeLoadedNetwork(std::unique_ptr< OptimizedNetwork > net, std::string &errorMessage, const INetworkProperties &networkProperties, profiling::ProfilingService &profilingService)
static ARMNN_DLLEXPORT ProfilingStaticGuid CHILD_GUID