ArmNN
 20.05
LoadedNetwork.cpp
Go to the documentation of this file.
1 //
2 // Copyright © 2017 Arm Ltd. All rights reserved.
3 // SPDX-License-Identifier: MIT
4 //
5 
6 #include "LoadedNetwork.hpp"
7 #include "Layer.hpp"
8 #include "Graph.hpp"
9 #include "Network.hpp"
10 #include "Runtime.hpp"
11 #include "Profiling.hpp"
12 #include "HeapProfiling.hpp"
13 
15 #include <armnn/Logging.hpp>
16 #include <armnn/utility/Assert.hpp>
17 
22 
24 
25 #include <boost/format.hpp>
26 
27 namespace armnn
28 {
29 
30 using namespace std;
31 using namespace armnn::profiling;
32 
33 namespace
34 {
35 
36 template <typename ExceptionType>
37 std::string ToErrorMessage(const char * prefix, const ExceptionType & error)
38 {
39  std::stringstream ss;
40  ss << prefix << " " << error.what();
41  return ss.str();
42 }
43 
44 void AddLayerStructure(std::unique_ptr<TimelineUtilityMethods>& timelineUtils,
45  const Layer& layer,
46  ProfilingGuid networkGuid)
47 {
48  // Add layer to the post-optimisation network structure
49  std::string layerName = layer.GetNameStr().empty() ? "<Unnamed>" : layer.GetNameStr();
50  timelineUtils->CreateNamedTypedChildEntity(layer.GetGuid(),
51  networkGuid,
52  layerName,
54  for (auto&& input : layer.GetInputSlots())
55  {
56  const IOutputSlot* source = input.GetConnectedOutputSlot();
57  ARMNN_ASSERT(source != NULL);
58  timelineUtils->CreateConnectionRelationship(ProfilingRelationshipType::RetentionLink,
59  source->GetOwningLayerGuid(),
60  layer.GetGuid());
61  }
62 }
63 
64 void AddWorkloadStructure(std::unique_ptr<TimelineUtilityMethods>& timelineUtils,
65  std::unique_ptr<IWorkload>& workload,
66  const Layer& layer)
67 {
68  // Add workload to the post-optimisation network structure
69  timelineUtils->CreateTypedEntity(workload->GetGuid(), LabelsAndEventClasses::WORKLOAD_GUID);
70  timelineUtils->MarkEntityWithLabel(workload->GetGuid(),
71  layer.GetBackendId().Get(),
73 
74  // Link the workload to the layer
75  timelineUtils->CreateRelationship(ProfilingRelationshipType::RetentionLink,
76  layer.GetGuid(),
77  workload->GetGuid());
78 
79 }
80 
81 } // anonymous
82 
83 std::unique_ptr<LoadedNetwork> LoadedNetwork::MakeLoadedNetwork(std::unique_ptr<OptimizedNetwork> net,
84  std::string& errorMessage,
85  const INetworkProperties& networkProperties,
87 {
88  std::unique_ptr<LoadedNetwork> loadedNetwork;
89 
90  auto Fail = [&](const std::exception& error) -> std::unique_ptr<LoadedNetwork>
91  {
92  errorMessage = ToErrorMessage("An error occurred when preparing the network workloads: ", error);
93  ARMNN_LOG(error) << errorMessage;
94 
95  return std::unique_ptr<LoadedNetwork>();
96  };
97 
98  try
99  {
100  loadedNetwork.reset(new LoadedNetwork(std::move(net), networkProperties, profilingService));
101  }
102  catch (const armnn::RuntimeException& error)
103  {
104  return Fail(error);
105  }
106  catch (const armnn::Exception& error)
107  {
108  return Fail(error);
109  }
110  catch (const std::runtime_error& error)
111  {
112  return Fail(error);
113  }
114 
115  return loadedNetwork;
116 }
117 
118 LoadedNetwork::LoadedNetwork(std::unique_ptr<OptimizedNetwork> net,
119  const INetworkProperties& networkProperties,
121  m_OptimizedNetwork(std::move(net)),
122  m_IsImportEnabled(networkProperties.m_ImportEnabled),
123  m_IsExportEnabled(networkProperties.m_ExportEnabled),
124  m_ProfilingService(profilingService)
125 {
126  // Create a profiler and register it for the current thread.
127  m_Profiler = std::make_shared<Profiler>();
129 
130  Graph& order = m_OptimizedNetwork->GetGraph().TopologicalSort();
131  //First create tensor handlers, backends and workload factories.
132  //Handlers are created before workloads are.
133  //Because workload creation can modify some of the handlers,
134  //(for example the splitter and concat layers).
135  for (auto&& layer : order)
136  {
137  auto const& backendId = layer->GetBackendId();
138  if (m_Backends.count(backendId) == 0)
139  {
140  auto createBackend = BackendRegistryInstance().GetFactory(backendId);
141  auto it = m_Backends.emplace(std::make_pair(backendId, createBackend()));
142 
143  IBackendInternal* backend = it.first->second.get();
144 
145  if (backend->SupportsTensorAllocatorAPI())
146  {
147  backend->RegisterTensorHandleFactories(m_TensorHandleFactoryRegistry);
148 
149  auto workloadFactory = backend->CreateWorkloadFactory(m_TensorHandleFactoryRegistry);
150  m_WorkloadFactories.emplace(
151  std::make_pair(backendId, std::make_pair(std::move(workloadFactory), nullptr)));
152  }
153  else
154  {
156  auto workloadFactory = backend->CreateWorkloadFactory(memoryManager);
157 
158  m_WorkloadFactories.emplace(
159  std::make_pair(backendId, std::make_pair(std::move(workloadFactory), memoryManager)));
160  }
161  }
162  }
163 
164  for (auto&& layer : order)
165  {
166  auto& workloadFactory = GetWorkloadFactory(*layer);
167 
168  switch (layer->GetType())
169  {
170  case LayerType::Input:
171  {
172  // If IsImportEnabled is true then we need to set IsMemoryManaged to false when creating TensorHandles
173  layer->CreateTensorHandles(m_TensorHandleFactoryRegistry, workloadFactory, !m_IsImportEnabled);
174  break;
175  }
176  default:
177  {
178  // Look for the layer with 1 OutputSlot which has 1 connection and that connection is an Output Layer
179  // If Export is enabled disable memory management so we can export, otherwise we do a copy
180  if((layer->GetNumOutputSlots() == 1) &&
181  (layer->GetOutputSlots()[0].GetNumConnections() == 1) &&
182  (layer->GetOutputSlots()[0].GetConnection(0)->GetOwningLayer().GetType() == LayerType::Output))
183  {
184  layer->CreateTensorHandles(m_TensorHandleFactoryRegistry, workloadFactory, !m_IsExportEnabled);
185  }
186  else
187  {
188  layer->CreateTensorHandles(m_TensorHandleFactoryRegistry, workloadFactory);
189  }
190  }
191  }
192  }
193 
194  ProfilingGuid networkGuid = m_OptimizedNetwork->GetGuid();
195  std::unique_ptr<TimelineUtilityMethods> timelineUtils =
196  TimelineUtilityMethods::GetTimelineUtils(m_ProfilingService);
197  if (timelineUtils)
198  {
199  timelineUtils->CreateTypedEntity(networkGuid, LabelsAndEventClasses::NETWORK_GUID);
200  }
201 
202  //Then create workloads.
203  for (auto&& layer : order)
204  {
205  if (timelineUtils)
206  {
207  // Add layer to the post-optimisation network structure
208  AddLayerStructure(timelineUtils, *layer, networkGuid);
209  }
210 
211  const IWorkloadFactory& workloadFactory = GetWorkloadFactory(*layer);
212 
213  switch (layer->GetType())
214  {
215  case LayerType::Input:
216  case LayerType::Output:
217  {
218  // Inputs and outputs are treated in a special way - see EnqueueInput() and EnqueueOutput().
219  break;
220  }
221  default:
222  {
223  auto workload = layer->CreateWorkload(workloadFactory);
224 
225  if (!workload)
226  {
227  const char* const layerName =
228  layer->GetNameStr().length() != 0 ? layer->GetName() : "<Unnamed>";
229  throw InvalidArgumentException(boost::str(
230  boost::format("No workload created for layer (name: '%1%' type: '%2%') (compute '%3%')")
231  % layerName % static_cast<int>(layer->GetType()) % layer->GetBackendId().Get()
232  ));
233  }
234 
235  if (timelineUtils)
236  {
237  // Add workload to the post-optimisation network structure
238  AddWorkloadStructure(timelineUtils, workload, *layer);
239  }
240 
241  m_WorkloadQueue.push_back(move(workload));
242  // release the constant data in the layer..
243  layer->ReleaseConstantData();
244  break;
245  }
246  }
247  }
248 
249  if (timelineUtils)
250  {
251  // Commit to send the post-optimisation network structure
252  timelineUtils->Commit();
253  }
254 
255  // Set up memory.
256  m_OptimizedNetwork->GetGraph().AllocateDynamicBuffers();
257 
258  // Now that the intermediate tensor memory has been set-up, do any post allocation configuration for each workload.
259  for (auto& workload : m_WorkloadQueue)
260  {
261  workload->PostAllocationConfigure();
262  }
263 }
264 
266 {
267  Graph& order = m_OptimizedNetwork->GetGraph().TopologicalSort();
268  ProfilingGuid networkGuid = m_OptimizedNetwork->GetGuid();
269 
270  std::unique_ptr<TimelineUtilityMethods> timelineUtils =
271  TimelineUtilityMethods::GetTimelineUtils(m_ProfilingService);
272 
273  timelineUtils->CreateTypedEntity(networkGuid, LabelsAndEventClasses::NETWORK_GUID);
274 
275  for (auto&& layer : order)
276  {
277  // Add layer to the post-optimisation network structure
278  AddLayerStructure(timelineUtils, *layer, networkGuid);
279  switch (layer->GetType())
280  {
281  case LayerType::Input:
282  case LayerType::Output:
283  {
284  // Inputs and outputs are treated in a special way - see EnqueueInput() and EnqueueOutput().
285  break;
286  }
287  default:
288  {
289  for (auto& workload : m_WorkloadQueue)
290  {
291  // Add workload to the post-optimisation network structure
292  AddWorkloadStructure(timelineUtils, workload, *layer);
293  }
294  break;
295  }
296  }
297  }
298  // Commit to send the post-optimisation network structure
299  timelineUtils->Commit();
300 }
301 
303 {
304  for (auto&& inputLayer : m_OptimizedNetwork->GetGraph().GetInputLayers())
305  {
306  ARMNN_ASSERT_MSG(inputLayer->GetNumOutputSlots() == 1, "Input layer should have exactly 1 output slot");
307  if (inputLayer->GetBindingId() == layerId)
308  {
309  return inputLayer->GetOutputSlot(0).GetTensorInfo();
310  }
311  }
312 
313  throw InvalidArgumentException(boost::str(boost::format("No input layer is associated with id %1%") % layerId));
314 }
315 
317 {
318  for (auto&& outputLayer : m_OptimizedNetwork->GetGraph().GetOutputLayers())
319  {
320  ARMNN_ASSERT_MSG(outputLayer->GetNumInputSlots() == 1, "Output layer should have exactly 1 input slot");
321  ARMNN_ASSERT_MSG(outputLayer->GetInputSlot(0).GetConnection(), "Input slot on Output layer must be connected");
322  if (outputLayer->GetBindingId() == layerId)
323  {
324  return outputLayer->GetInputSlot(0).GetConnection()->GetTensorInfo();
325  }
326  }
327 
328  throw InvalidArgumentException(boost::str(boost::format("No output layer is associated with id %1%") % layerId));
329 }
330 
331 const IWorkloadFactory& LoadedNetwork::GetWorkloadFactory(const Layer& layer) const
332 {
333  const IWorkloadFactory* workloadFactory = nullptr;
334 
335  auto it = m_WorkloadFactories.find(layer.GetBackendId());
336  if (it == m_WorkloadFactories.end())
337  {
338  throw RuntimeException(
339  boost::str(
340  boost::format("No workload factory for %1% to be used for layer: %2%")
341  % layer.GetBackendId().Get()
342  % layer.GetNameStr()),
343  CHECK_LOCATION());
344  }
345 
346  workloadFactory = it->second.first.get();
347 
348  ARMNN_ASSERT_MSG(workloadFactory, "No workload factory");
349 
350  std::string reasonIfUnsupported;
351  ARMNN_ASSERT_MSG(IWorkloadFactory::IsLayerSupported(layer, {}, reasonIfUnsupported),
352  "Factory does not support layer");
353  IgnoreUnused(reasonIfUnsupported);
354  return *workloadFactory;
355 }
356 
357 namespace {
358 
359 // Non-copyable class owning accelerator-specific tensor data.
360 class TensorPin
361 {
362 public:
363  TensorPin(std::unique_ptr<ITensorHandle> handle, const TensorInfo& info, LayerBindingId id)
364  : m_TensorHandle(std::move(handle))
365  , m_TensorInfo(info)
366  , m_Id(id)
367  {
368  }
369 
370  ITensorHandle* GetTensorHandle() const { return m_TensorHandle.get(); }
371  const TensorInfo& GetTensorInfo() const { return m_TensorInfo; }
372  LayerBindingId GetBindingId() const { return m_Id; }
373 
374 private:
375  std::unique_ptr<ITensorHandle> m_TensorHandle;
376  TensorInfo m_TensorInfo;
377  LayerBindingId m_Id;
378 };
379 
380 static const TensorPin& GetTensorPin(LayerBindingId id,
381  const std::vector<TensorPin>& pins,
382  char const* bindingPointDesc)
383 {
384  auto it = std::find_if(pins.begin(), pins.end(),
385  [id](const TensorPin& pin)
386  {
387  return pin.GetBindingId() == id;
388  });
389 
390  if (it != pins.end())
391  {
392  return *it;
393  }
394  else
395  {
396  throw InvalidArgumentException(boost::str(
397  boost::format("No tensor supplied for %1% %2%") % bindingPointDesc % id));
398  }
399 }
400 
401 // Stores data that needs to be kept accessible for the entire execution of a workload.
402 class WorkloadData
403 {
404 public:
405  WorkloadData(const InputTensors& inputTensors, const OutputTensors& outputTensors)
406  {
407  m_InputTensorPins.reserve(inputTensors.size());
408  m_OutputTensorPins.reserve(outputTensors.size());
409 
410  for (auto inputTensorPair : inputTensors)
411  {
412  auto inputTensor = inputTensorPair.second;
413 
414  std::unique_ptr<ITensorHandle> tensorHandle =
415  std::make_unique<ConstPassthroughCpuTensorHandle>(inputTensor.GetInfo(),inputTensor.GetMemoryArea());
416  LayerBindingId layerId = inputTensorPair.first;
417 
418  m_InputTensorPins.emplace_back(std::move(tensorHandle), inputTensor.GetInfo(), layerId);
419  }
420 
421  for (auto outputTensorPair : outputTensors)
422  {
423  auto outputTensor = outputTensorPair.second;
424 
425  std::unique_ptr<ITensorHandle> tensorHandle =
426  std::make_unique<PassthroughCpuTensorHandle>(outputTensor.GetInfo(), outputTensor.GetMemoryArea());
427  LayerBindingId layerId = outputTensorPair.first;
428 
429  m_OutputTensorPins.emplace_back(std::move(tensorHandle), outputTensor.GetInfo(), layerId);
430  }
431  }
432 
433  const TensorPin& GetInputTensorPin(LayerBindingId id) const
434  {
435  return GetTensorPin(id, m_InputTensorPins, "input");
436  }
437 
438  const TensorPin& GetOutputTensorPin(LayerBindingId id) const
439  {
440  return GetTensorPin(id, m_OutputTensorPins, "output");
441  }
442 
443 private:
444 
445  std::vector<TensorPin> m_InputTensorPins;
446  std::vector<TensorPin> m_OutputTensorPins;
447 };
448 
449 }
450 
452  const OutputTensors& outputTensors)
453 {
455 
456  const Graph& graph = m_OptimizedNetwork->GetGraph();
457 
458  // Walk graph to determine the order of execution.
459  if (graph.GetNumLayers() < 2)
460  {
461  ARMNN_LOG(warning) << "IRuntime::EnqueueWorkload()::Less than two nodes in graph";
462  return Status::Failure;
463  }
464 
465  // Data that must be kept alive for the entire execution of the workload.
466  WorkloadData workloadData(inputTensors, outputTensors);
467 
468  if (graph.GetNumInputs() != inputTensors.size())
469  {
470  throw InvalidArgumentException("Number of inputs provided does not match network.");
471  }
472 
473  // For each input to the network, call EnqueueInput with the data passed by the user.
474  m_InputQueue.clear();
475  m_InputQueue.reserve(graph.GetNumInputs());
476  for (const BindableLayer* inputLayer : graph.GetInputLayers())
477  {
478  const TensorPin& pin = workloadData.GetInputTensorPin(inputLayer->GetBindingId());
479  EnqueueInput(*inputLayer, pin.GetTensorHandle(), pin.GetTensorInfo());
480  }
481 
482  // For each output to the network, call EnqueueOutput with the data passed by the user.
483  m_OutputQueue.clear();
484  m_OutputQueue.reserve(graph.GetNumOutputs());
485  for (const BindableLayer* outputLayer : graph.GetOutputLayers())
486  {
487  const TensorPin& pin = workloadData.GetOutputTensorPin(outputLayer->GetBindingId());
488  EnqueueOutput(*outputLayer, pin.GetTensorHandle(), pin.GetTensorInfo());
489  }
490 
491  std::unique_ptr<TimelineUtilityMethods> timelineUtils =
492  TimelineUtilityMethods::GetTimelineUtils(m_ProfilingService);
493  ProfilingGuid inferenceGuid = m_ProfilingService.GetNextGuid();
494  if (timelineUtils)
495  {
496  // Add inference timeline trace if profiling is enabled.
497  ProfilingGuid networkGuid = m_OptimizedNetwork->GetGuid();
498  timelineUtils->CreateTypedEntity(inferenceGuid, LabelsAndEventClasses::INFERENCE_GUID);
499  timelineUtils->CreateRelationship(ProfilingRelationshipType::RetentionLink, networkGuid, inferenceGuid);
500  timelineUtils->RecordEvent(inferenceGuid, LabelsAndEventClasses::ARMNN_PROFILING_SOL_EVENT_CLASS);
501  }
502 
503  bool executionSucceeded = true;
504 
505  {
506  if (m_ProfilingService.IsProfilingEnabled())
507  {
508  m_ProfilingService.IncrementCounterValue(armnn::profiling::INFERENCES_RUN);
509  }
511  ARMNN_SCOPED_HEAP_PROFILING("Executing");
512  executionSucceeded = Execute(timelineUtils, inferenceGuid);
513  }
514 
515  if (timelineUtils)
516  {
517  // Add end of life of the inference timeline if profiling is enabled.
518  timelineUtils->RecordEvent(inferenceGuid, LabelsAndEventClasses::ARMNN_PROFILING_EOL_EVENT_CLASS);
519  timelineUtils->Commit();
520  }
521  return executionSucceeded ? Status::Success : Status::Failure;
522 }
523 
524 void LoadedNetwork::EnqueueInput(const BindableLayer& layer, ITensorHandle* tensorHandle, const TensorInfo& tensorInfo)
525 {
526  if (layer.GetType() != LayerType::Input)
527  {
528  throw InvalidArgumentException("EnqueueInput: given layer not an InputLayer");
529  }
530 
531  if (tensorHandle == nullptr)
532  {
533  throw InvalidArgumentException("EnqueueInput: tensorHandle must not be NULL");
534  }
535 
536  InputQueueDescriptor inputQueueDescriptor;
538 
539  inputQueueDescriptor.m_Inputs.push_back(tensorHandle);
540  info.m_InputTensorInfos.push_back(tensorInfo);
541 
542  ARMNN_ASSERT_MSG(layer.GetNumOutputSlots() == 1, "Can only handle Input Layer with one output");
543  const OutputHandler& handler = layer.GetOutputHandler();
544  const TensorInfo& outputTensorInfo = handler.GetTensorInfo();
545  ITensorHandle* outputTensorHandle = handler.GetData();
546  ARMNN_ASSERT_MSG(outputTensorHandle != nullptr,
547  "Data should have been allocated.");
548  inputQueueDescriptor.m_Outputs.push_back(outputTensorHandle);
549  info.m_OutputTensorInfos.push_back(outputTensorInfo);
550 
551  MemorySourceFlags importFlags = outputTensorHandle->GetImportFlags();
552  if (m_IsImportEnabled) // Try import the input tensor
553  {
554  if(CheckFlag(importFlags, MemorySource::Malloc) )
555  {
556  // This assumes a CPU Tensor handle
557  void* mem = tensorHandle->Map(false);
558  if (outputTensorHandle->Import(mem, MemorySource::Malloc))
559  {
560  tensorHandle->Unmap();
561  return; // No need for a workload since the import has been done.
562  }
563  tensorHandle->Unmap();
564  throw MemoryImportException("EnqueueInput: Memory Import failed");
565  }
566  else
567  {
568  throw MemoryImportException("EnqueueInput: Memory Import failed, backend does not support Import");
569  }
570  }
571  else
572  {
573  // Create a mem copy workload for input since we did not import
574  std::unique_ptr<IWorkload> inputWorkload = std::make_unique<CopyMemGenericWorkload>(inputQueueDescriptor, info);
575 
576  ARMNN_ASSERT_MSG(inputWorkload, "No input workload created");
577 
578  std::unique_ptr<TimelineUtilityMethods> timelineUtils =
579  TimelineUtilityMethods::GetTimelineUtils(m_ProfilingService);
580  if (timelineUtils)
581  {
582  // Add Input Workload to the post-optimisation network structure
583  AddWorkloadStructure(timelineUtils, inputWorkload, layer);
584  timelineUtils->Commit();
585  }
586 
587  m_InputQueue.push_back(move(inputWorkload));
588  }
589 }
590 
591 void LoadedNetwork::EnqueueOutput(const BindableLayer& layer, ITensorHandle* tensorHandle, const TensorInfo& tensorInfo)
592 {
593  if (layer.GetType() != LayerType::Output)
594  {
595  throw InvalidArgumentException("EnqueueOutput: given layer not an OutputLayer");
596  }
597 
598  if (tensorHandle == nullptr)
599  {
600  throw InvalidArgumentException("EnqueueOutput: tensorHandle must not be NULL");
601  }
602 
603  OutputQueueDescriptor outputQueueDescriptor;
605 
606  outputQueueDescriptor.m_Outputs.push_back(tensorHandle);
607  info.m_OutputTensorInfos.push_back(tensorInfo);
608 
609  ARMNN_ASSERT_MSG(layer.GetNumInputSlots() == 1, "Output Layer should have exactly one input.");
610 
611  // Gets the output handler from the previous node.
612  const OutputHandler& outputHandler = layer.GetInputSlots()[0].GetConnectedOutputSlot()->GetOutputHandler();
613 
614  const TensorInfo& inputTensorInfo = outputHandler.GetTensorInfo();
615  ITensorHandle* inputTensorHandle = outputHandler.GetData();
616  ARMNN_ASSERT_MSG(inputTensorHandle != nullptr, "Data should have been allocated.");
617 
618  // Try import the output tensor.
619  // Note: We can only import the output pointer if all of the following hold true:
620  // a) The imported pointer is aligned sufficiently
621  // b) The tensor has zero padding
622  // c) There is only one connection to the OutputSlot and it is to an OutputLayer.
623  // d) The output pointer is allocated via malloc. (Other types will be supported in a later release)
624  // e) m_IsExportEnabled must be set to true
625  if (m_IsExportEnabled && (layer.GetInputSlots()[0].GetConnectedOutputSlot()->GetNumConnections() == 1))
626  {
627  if(layer.GetInputSlots()[0].GetConnectedOutputSlot()->GetOwningLayer().GetType() != LayerType::Input)
628  {
629  MemorySourceFlags importFlags = inputTensorHandle->GetImportFlags();
630  if (CheckFlag(importFlags, MemorySource::Malloc))
631  {
632  void *mem = tensorHandle->Map(false);
633  bool importOk = inputTensorHandle->Import(mem, MemorySource::Malloc);
634  tensorHandle->Unmap();
635 
636  if (importOk)
637  {
638  // Insert synchronization workload
639  MemSyncQueueDescriptor syncDesc;
640  syncDesc.m_Inputs.push_back(inputTensorHandle);
641  info.m_InputTensorInfos.push_back(inputTensorInfo);
642  auto syncWorkload = std::make_unique<SyncMemGenericWorkload>(syncDesc, info);
643  ARMNN_ASSERT_MSG(syncWorkload, "No sync workload created");
644  m_OutputQueue.push_back(move(syncWorkload));
645  }
646  else
647  {
648  throw MemoryExportException("EnqueueOutput: Memory Export failed");
649  }
650  }
651  else
652  {
653  throw MemoryExportException("EnqueueOutput: Memory Export failed, backend does not support Export");
654  }
655  }
656  else
657  {
658  throw MemoryExportException("EnqueueOutput: Memory Export failed, attempting to export Input Layer");
659  }
660  }
661  else
662  {
663  // If we got here then we didn't export the memory, so add an output workload which performs a memcopy.
664  outputQueueDescriptor.m_Inputs.push_back(inputTensorHandle);
665  info.m_InputTensorInfos.push_back(inputTensorInfo);
666 
667  std::unique_ptr<IWorkload> outputWorkload =
668  std::make_unique<CopyMemGenericWorkload>(outputQueueDescriptor, info);
669  ARMNN_ASSERT_MSG(outputWorkload, "No output workload created");
670 
671  std::unique_ptr<TimelineUtilityMethods> timelineUtils =
672  TimelineUtilityMethods::GetTimelineUtils(m_ProfilingService);
673  if (timelineUtils)
674  {
675  // Add Output Workload to the post-optimisation network structure
676  AddWorkloadStructure(timelineUtils, outputWorkload, layer);
677  timelineUtils->Commit();
678  }
679 
680  m_OutputQueue.push_back(move(outputWorkload));
681  }
682 }
683 
684 void LoadedNetwork::AllocateWorkingMemory()
685 {
686  if (m_IsWorkingMemAllocated)
687  {
688  return;
689  }
690  for (auto&& workloadFactory : m_WorkloadFactories)
691  {
692  IBackendInternal::IMemoryManagerSharedPtr memoryManager = workloadFactory.second.second;
693  if (memoryManager)
694  {
695  memoryManager->Acquire();
696  }
697  }
698  m_TensorHandleFactoryRegistry.AquireMemory();
699  m_IsWorkingMemAllocated = true;
700 }
701 
703 {
704  std::lock_guard<std::mutex> lockGuard(m_WorkingMemMutex);
705  if (!m_IsWorkingMemAllocated)
706  {
707  return;
708  }
709  // Informs the memory managers to release memory in it's respective memory group
710  for (auto&& workloadFactory : m_WorkloadFactories)
711  {
712  IBackendInternal::IMemoryManagerSharedPtr memoryManager = workloadFactory.second.second;
713  if (memoryManager)
714  {
715  memoryManager->Release();
716  }
717  }
718  m_TensorHandleFactoryRegistry.ReleaseMemory();
719  m_IsWorkingMemAllocated = false;
720 }
721 
722 bool LoadedNetwork::Execute(std::unique_ptr<TimelineUtilityMethods>& timelineUtils,
723  profiling::ProfilingGuid inferenceGuid)
724 {
725  bool success = true;
726 
727  auto Fail = [&](const std::exception& error)
728  {
729  ARMNN_LOG(error) << "An error occurred attempting to execute a workload: " << error.what();
730  success = false;
731  };
732 
733  try
734  {
735  std::lock_guard<std::mutex> lockGuard(m_WorkingMemMutex);
736  AllocateWorkingMemory();
737 
738  ProfilingDynamicGuid workloadInferenceID(0);
739  for (auto& input : m_InputQueue)
740  {
741  if(timelineUtils)
742  {
743  workloadInferenceID = timelineUtils->RecordWorkloadInferenceAndStartOfLifeEvent(input->GetGuid(),
744  inferenceGuid);
745  }
746  input->Execute();
747  if(timelineUtils)
748  {
749  timelineUtils->RecordEndOfLifeEvent(workloadInferenceID);
750  }
751  }
752 
753  for (auto& workload : m_WorkloadQueue)
754  {
755  if(timelineUtils)
756  {
757  workloadInferenceID = timelineUtils->RecordWorkloadInferenceAndStartOfLifeEvent(workload->GetGuid(),
758  inferenceGuid);
759  }
760  workload->Execute();
761  if(timelineUtils)
762  {
763  timelineUtils->RecordEndOfLifeEvent(workloadInferenceID);
764  }
765  }
766  for (auto& output: m_OutputQueue)
767  {
768  if(timelineUtils)
769  {
770  workloadInferenceID = timelineUtils->RecordWorkloadInferenceAndStartOfLifeEvent(output->GetGuid(),
771  inferenceGuid);
772  }
773  output->Execute();
774  if(timelineUtils)
775  {
776  timelineUtils->RecordEndOfLifeEvent(workloadInferenceID);
777  }
778  }
779  }
780  catch (const RuntimeException& error)
781  {
782  Fail(error);
783  }
784  catch (const std::runtime_error& error)
785  {
786  Fail(error);
787  }
788 
789  return success;
790 }
791 
793 {
794  for (auto&& workloadPtr: m_WorkloadQueue)
795  {
796  workloadPtr.get()->RegisterDebugCallback(func);
797  }
798 }
799 
800 }
static ARMNN_DLLEXPORT ProfilingStaticGuid INFERENCE_GUID
virtual void RegisterTensorHandleFactories(class TensorHandleFactoryRegistry &)
(Optional) Register TensorHandleFactories Either this method or CreateMemoryManager() and IWorkloadFa...
virtual bool Import(void *memory, MemorySource source)
Import externally allocated memory.
FactoryFunction GetFactory(const BackendId &id) const
unsigned int GetNumInputSlots() const override
Returns the number of connectable input slots.
Definition: Layer.hpp:307
const bool m_ImportEnabled
Definition: IRuntime.hpp:33
void RegisterProfiler(Profiler *profiler)
Definition: Profiling.cpp:493
static std::unique_ptr< TimelineUtilityMethods > GetTimelineUtils(ProfilingService &profilingService)
static ProfilerManager & GetInstance()
Definition: Profiling.cpp:486
virtual unsigned int GetImportFlags() const
Get flags describing supported import sources.
Strongly typed guids to distinguish between those generated at runtime, and those that are statically...
Definition: Types.hpp:296
const TensorInfo & GetTensorInfo(const ITensorHandle *tensorHandle)
float32 helpers
TensorInfo GetInputTensorInfo(LayerBindingId layerId) const
#define ARMNN_LOG(severity)
Definition: Logging.hpp:163
BackendRegistry & BackendRegistryInstance()
std::vector< std::pair< LayerBindingId, class ConstTensor > > InputTensors
Definition: Tensor.hpp:225
unsigned int MemorySourceFlags
size_t GetNumOutputs() const
Definition: Graph.hpp:178
TensorInfo GetOutputTensorInfo(LayerBindingId layerId) const
Copyright (c) 2020 ARM Limited.
void IgnoreUnused(Ts &&...)
const std::vector< InputSlot > & GetInputSlots() const
Definition: Layer.hpp:231
std::function< void(LayerGuid guid, unsigned int slotIndex, ITensorHandle *tensorHandle)> DebugCallbackFunction
Define the type of callback for the Debug layer to call.
Definition: Types.hpp:244
unsigned int GetNumOutputSlots() const override
Returns the number of connectable output slots.
Definition: Layer.hpp:308
static ARMNN_DLLEXPORT ProfilingStaticGuid WORKLOAD_GUID
static ARMNN_DLLEXPORT ProfilingStaticGuid ARMNN_PROFILING_EOL_EVENT_CLASS
#define ARMNN_SCOPED_PROFILING_EVENT(backendId, name)
Definition: Profiling.hpp:169
int LayerBindingId
Type of identifiers for bindable layers (inputs, outputs).
Definition: Types.hpp:171
static ARMNN_DLLEXPORT ProfilingStaticGuid ARMNN_PROFILING_SOL_EVENT_CLASS
virtual IWorkloadFactoryPtr CreateWorkloadFactory(const IMemoryManagerSharedPtr &memoryManager=nullptr) const =0
std::vector< TensorInfo > m_InputTensorInfos
static ARMNN_DLLEXPORT ProfilingStaticGuid LAYER_GUID
#define ARMNN_ASSERT_MSG(COND, MSG)
Definition: Assert.hpp:15
bool SupportsTensorAllocatorAPI() const
std::shared_ptr< IMemoryManager > IMemoryManagerSharedPtr
#define ARMNN_SCOPED_HEAP_PROFILING(TAG)
std::vector< std::pair< LayerBindingId, class Tensor > > OutputTensors
Definition: Tensor.hpp:226
const std::string & GetNameStr() const
Definition: Layer.hpp:216
Status
enumeration
Definition: Types.hpp:26
const bool m_ExportEnabled
Definition: IRuntime.hpp:34
#define ARMNN_ASSERT(COND)
Definition: Assert.hpp:14
std::vector< TensorInfo > m_OutputTensorInfos
static bool IsLayerSupported(const BackendId &backendId, const IConnectableLayer &layer, Optional< DataType > dataType, std::string &outReasonIfUnsupported)
#define CHECK_LOCATION()
Definition: Exceptions.hpp:192
const BackendId & GetBackendId() const
Definition: Layer.hpp:263
armnn::profiling::ProfilingService profilingService
OutputLayersAccessor GetOutputLayers() const
Returns a wrapper object with begin(), end() methods to iterate over the output layers in a range-bas...
Definition: Graph.hpp:186
static ARMNN_DLLEXPORT ProfilingStaticGuid NETWORK_GUID
Status EnqueueWorkload(const InputTensors &inputTensors, const OutputTensors &outputTensors)
virtual const void * Map(bool blocking=true) const =0
Map the tensor data for access.
virtual void Unmap() const =0
Unmap the tensor data.
std::vector< ITensorHandle * > m_Outputs
Base class for all ArmNN exceptions so that users can filter to just those.
Definition: Exceptions.hpp:46
const OutputHandler & GetOutputHandler(unsigned int i=0) const
Definition: Layer.hpp:221
const std::string & Get() const
Definition: BackendId.hpp:136
void RegisterDebugCallback(const DebugCallbackFunction &func)
LayerType GetType() const
Definition: Layer.hpp:259
Contains information about inputs and outputs to a layer.
bool CheckFlag(MemorySourceFlags flags, MemorySource source)
Graph & TopologicalSort()
Sorts layers in topological order and return this.
Definition: Graph.hpp:174
InputLayersAccessor GetInputLayers() const
Returns a wrapper object with begin(), end() methods to iterate over the input layers in a range-base...
Definition: Graph.hpp:182
std::vector< ITensorHandle * > m_Inputs
size_t GetNumLayers() const
Definition: Graph.hpp:188
virtual ARMNN_NO_DEPRECATE_WARN_END IMemoryManagerUniquePtr CreateMemoryManager() const
size_t GetNumInputs() const
Definition: Graph.hpp:177
static ARMNN_DLLEXPORT ProfilingStaticGuid BACKENDID_GUID
static std::unique_ptr< LoadedNetwork > MakeLoadedNetwork(std::unique_ptr< OptimizedNetwork > net, std::string &errorMessage, const INetworkProperties &networkProperties, profiling::ProfilingService &profilingService)