25 #include <fmt/format.h> 36 template <
typename ExceptionType>
37 std::string ToErrorMessage(
const char * prefix,
const ExceptionType &
error)
40 ss << prefix <<
" " << error.what();
44 void AddLayerStructure(std::unique_ptr<TimelineUtilityMethods>& timelineUtils,
49 std::string layerName = layer.GetNameStr().empty() ?
"<Unnamed>" : layer.GetNameStr();
50 timelineUtils->CreateNamedTypedChildEntity(layer.GetGuid(),
54 for (
auto&& input : layer.GetInputSlots())
56 const IOutputSlot* source = input.GetConnectedOutputSlot();
58 timelineUtils->CreateConnectionRelationship(ProfilingRelationshipType::RetentionLink,
59 source->GetOwningLayerGuid(),
64 void AddWorkloadStructure(std::unique_ptr<TimelineUtilityMethods>& timelineUtils,
65 std::unique_ptr<IWorkload>& workload,
70 timelineUtils->MarkEntityWithLabel(workload->GetGuid(),
71 layer.GetBackendId().Get(),
75 timelineUtils->CreateRelationship(ProfilingRelationshipType::RetentionLink,
84 std::string& errorMessage,
88 std::unique_ptr<LoadedNetwork> loadedNetwork;
90 auto Fail = [&](
const std::exception&
error) -> std::unique_ptr<LoadedNetwork>
92 errorMessage = ToErrorMessage(
"An error occurred when preparing the network workloads: ", error);
95 return std::unique_ptr<LoadedNetwork>();
100 loadedNetwork.reset(
new LoadedNetwork(std::move(net), networkProperties, profilingService));
110 catch (
const std::runtime_error& error)
115 return loadedNetwork;
118 LoadedNetwork::LoadedNetwork(std::unique_ptr<OptimizedNetwork> net,
121 m_OptimizedNetwork(std::move(net)),
124 m_TensorHandleFactoryRegistry(),
125 m_ProfilingService(profilingService)
128 m_Profiler = std::make_shared<Profiler>();
136 for (
auto&& layer : order)
138 auto const& backendId = layer->GetBackendId();
139 if (m_Backends.count(backendId) == 0)
142 auto it = m_Backends.emplace(std::make_pair(backendId, createBackend()));
149 m_TensorHandleFactoryRegistry, m_OptimizedNetwork->GetModelOptions());
150 m_WorkloadFactories.emplace(
151 std::make_pair(backendId, std::make_pair(std::move(workloadFactory),
nullptr)));
157 memoryManager, m_OptimizedNetwork->GetModelOptions());
159 m_WorkloadFactories.emplace(
160 std::make_pair(backendId, std::make_pair(std::move(workloadFactory), memoryManager)));
165 for (
auto&& layer : order)
167 auto& workloadFactory = GetWorkloadFactory(*layer);
169 switch (layer->GetType())
175 layer->CreateTensorHandles(m_TensorHandleFactoryRegistry, workloadFactory, !m_IsImportEnabled);
182 if((layer->GetNumOutputSlots() == 1) &&
183 (layer->GetOutputSlots()[0].GetNumConnections() == 1) &&
184 (layer->GetOutputSlots()[0].GetConnection(0)->GetOwningLayer().GetType() ==
LayerType::Output))
186 layer->CreateTensorHandles(m_TensorHandleFactoryRegistry, workloadFactory, !m_IsExportEnabled);
190 layer->CreateTensorHandles(m_TensorHandleFactoryRegistry, workloadFactory);
197 std::unique_ptr<TimelineUtilityMethods> timelineUtils =
206 std::stringstream ss;
212 for (
auto&& layer : order)
217 AddLayerStructure(timelineUtils, *layer, networkGuid);
222 switch (layer->GetType())
232 auto workload = layer->CreateWorkload(workloadFactory);
236 const char*
const layerName =
237 layer->GetNameStr().length() != 0 ? layer->GetName() :
"<Unnamed>";
239 fmt::format(
"No workload created for layer (name: '{0}' type: '{1}') (compute '{2}')",
240 layerName, static_cast<int>(layer->GetType()), layer->GetBackendId().Get()
247 AddWorkloadStructure(timelineUtils, workload, *layer);
250 m_WorkloadQueue.push_back(move(workload));
252 layer->ReleaseConstantData();
261 timelineUtils->Commit();
265 m_OptimizedNetwork->GetGraph().AllocateDynamicBuffers();
268 for (
auto& workload : m_WorkloadQueue)
270 workload->PostAllocationConfigure();
279 std::unique_ptr<TimelineUtilityMethods> timelineUtils =
284 for (
auto&& layer : order)
287 AddLayerStructure(timelineUtils, *layer, networkGuid);
288 switch (layer->GetType())
298 for (
auto& workload : m_WorkloadQueue)
301 AddWorkloadStructure(timelineUtils, workload, *layer);
308 timelineUtils->Commit();
313 return m_OptimizedNetwork->GetGuid();
318 for (
auto&& inputLayer : m_OptimizedNetwork->GetGraph().GetInputLayers())
320 ARMNN_ASSERT_MSG(inputLayer->GetNumOutputSlots() == 1,
"Input layer should have exactly 1 output slot");
321 if (inputLayer->GetBindingId() == layerId)
323 return inputLayer->GetOutputSlot(0).GetTensorInfo();
332 for (
auto&& outputLayer : m_OptimizedNetwork->GetGraph().GetOutputLayers())
334 ARMNN_ASSERT_MSG(outputLayer->GetNumInputSlots() == 1,
"Output layer should have exactly 1 input slot");
335 ARMNN_ASSERT_MSG(outputLayer->GetInputSlot(0).GetConnection(),
"Input slot on Output layer must be connected");
336 if (outputLayer->GetBindingId() == layerId)
338 return outputLayer->GetInputSlot(0).GetConnection()->GetTensorInfo();
349 auto it = m_WorkloadFactories.find(layer.
GetBackendId());
350 if (it == m_WorkloadFactories.end())
352 throw RuntimeException(fmt::format(
"No workload factory for {0} to be used for layer: {1}",
358 workloadFactory = it->second.first.get();
362 std::string reasonIfUnsupported;
366 m_OptimizedNetwork->GetModelOptions()),
367 "Factory does not support layer");
369 return *workloadFactory;
379 : m_TensorHandle(std::move(handle))
385 ITensorHandle* GetTensorHandle()
const {
return m_TensorHandle.get(); }
390 std::unique_ptr<ITensorHandle> m_TensorHandle;
396 const std::vector<TensorPin>& pins,
397 char const* bindingPointDesc)
399 auto it = std::find_if(pins.begin(), pins.end(),
400 [id](
const TensorPin& pin)
402 return pin.GetBindingId() == id;
405 if (it != pins.end())
421 m_InputTensorPins.reserve(inputTensors.size());
422 m_OutputTensorPins.reserve(outputTensors.size());
424 for (
auto inputTensorPair : inputTensors)
426 auto inputTensor = inputTensorPair.second;
428 std::unique_ptr<ITensorHandle> tensorHandle =
429 std::make_unique<ConstPassthroughCpuTensorHandle>(inputTensor.GetInfo(),inputTensor.GetMemoryArea());
432 m_InputTensorPins.emplace_back(std::move(tensorHandle), inputTensor.GetInfo(), layerId);
435 for (
auto outputTensorPair : outputTensors)
437 auto outputTensor = outputTensorPair.second;
439 std::unique_ptr<ITensorHandle> tensorHandle =
440 std::make_unique<PassthroughCpuTensorHandle>(outputTensor.GetInfo(), outputTensor.GetMemoryArea());
443 m_OutputTensorPins.emplace_back(std::move(tensorHandle), outputTensor.GetInfo(), layerId);
449 return GetTensorPin(
id, m_InputTensorPins,
"input");
454 return GetTensorPin(
id, m_OutputTensorPins,
"output");
459 std::vector<TensorPin> m_InputTensorPins;
460 std::vector<TensorPin> m_OutputTensorPins;
468 const Graph& graph = m_OptimizedNetwork->GetGraph();
473 ARMNN_LOG(
warning) <<
"IRuntime::EnqueueWorkload()::Less than two nodes in graph";
478 WorkloadData workloadData(inputTensors, outputTensors);
488 m_InputQueue.clear();
492 const TensorPin& pin = workloadData.GetInputTensorPin(inputLayer->GetBindingId());
493 EnqueueInput(*inputLayer, pin.GetTensorHandle(), pin.GetTensorInfo());
500 m_OutputQueue.clear();
504 const TensorPin& pin = workloadData.GetOutputTensorPin(outputLayer->GetBindingId());
505 EnqueueOutput(*outputLayer, pin.GetTensorHandle(), pin.GetTensorInfo());
509 std::unique_ptr<TimelineUtilityMethods> timelineUtils =
511 ProfilingGuid inferenceGuid = m_ProfilingService.GetNextGuid();
517 timelineUtils->CreateRelationship(ProfilingRelationshipType::RetentionLink,
524 bool executionSucceeded =
true;
527 if (m_ProfilingService.IsProfilingEnabled())
529 m_ProfilingService.IncrementCounterValue(armnn::profiling::INFERENCES_RUN);
533 executionSucceeded = Execute(timelineUtils, inferenceGuid);
540 timelineUtils->Commit();
552 if (tensorHandle ==
nullptr)
560 inputQueueDescriptor.
m_Inputs.push_back(tensorHandle);
565 const TensorInfo& outputTensorInfo = handler.GetTensorInfo();
568 "Data should have been allocated.");
569 inputQueueDescriptor.
m_Outputs.push_back(outputTensorHandle);
573 bool needMemCopy =
true;
574 if (m_IsImportEnabled)
580 void* mem = tensorHandle->
Map(
false);
583 tensorHandle->
Unmap();
586 tensorHandle->
Unmap();
593 std::unique_ptr<IWorkload> inputWorkload = std::make_unique<CopyMemGenericWorkload>(inputQueueDescriptor,
info);
597 std::unique_ptr<TimelineUtilityMethods> timelineUtils =
602 AddWorkloadStructure(timelineUtils, inputWorkload, layer);
603 timelineUtils->Commit();
606 m_InputQueue.push_back(move(inputWorkload));
617 if (tensorHandle ==
nullptr)
625 outputQueueDescriptor.
m_Outputs.push_back(tensorHandle);
633 const TensorInfo& inputTensorInfo = outputHandler.GetTensorInfo();
635 ARMNN_ASSERT_MSG(inputTensorHandle !=
nullptr,
"Data should have been allocated.");
644 bool needMemCopy =
true;
645 if (m_IsExportEnabled && (layer.
GetInputSlots()[0].GetConnectedOutputSlot()->GetNumConnections() == 1))
653 void *mem = tensorHandle->
Map(
false);
655 tensorHandle->
Unmap();
661 syncDesc.
m_Inputs.push_back(inputTensorHandle);
663 auto syncWorkload = std::make_unique<SyncMemGenericWorkload>(syncDesc,
info);
665 m_OutputQueue.push_back(move(syncWorkload));
677 outputQueueDescriptor.
m_Inputs.push_back(inputTensorHandle);
680 std::unique_ptr<IWorkload> outputWorkload =
681 std::make_unique<CopyMemGenericWorkload>(outputQueueDescriptor,
info);
684 std::unique_ptr<TimelineUtilityMethods> timelineUtils =
689 AddWorkloadStructure(timelineUtils, outputWorkload, layer);
690 timelineUtils->Commit();
693 m_OutputQueue.push_back(move(outputWorkload));
697 void LoadedNetwork::AllocateWorkingMemory(std::lock_guard<std::mutex>& lock)
704 if (m_IsWorkingMemAllocated)
708 for (
auto&& workloadFactory : m_WorkloadFactories)
713 memoryManager->Acquire();
716 m_TensorHandleFactoryRegistry.AquireMemory();
717 m_IsWorkingMemAllocated =
true;
722 std::lock_guard<std::mutex> lockGuard(m_WorkingMemMutex);
723 if (!m_IsWorkingMemAllocated)
728 for (
auto&& workloadFactory : m_WorkloadFactories)
733 memoryManager->Release();
736 m_TensorHandleFactoryRegistry.ReleaseMemory();
737 m_IsWorkingMemAllocated =
false;
740 bool LoadedNetwork::Execute(std::unique_ptr<TimelineUtilityMethods>& timelineUtils,
745 auto Fail = [&](
const std::exception&
error)
747 ARMNN_LOG(error) <<
"An error occurred attempting to execute a workload: " << error.what();
753 std::lock_guard<std::mutex> lockGuard(m_WorkingMemMutex);
754 AllocateWorkingMemory(lockGuard);
757 auto ExecuteQueue = [&timelineUtils, &workloadInferenceID, &inferenceGuid](
WorkloadQueue& queue)
759 for (
auto& workload : queue)
763 workloadInferenceID = timelineUtils->RecordWorkloadInferenceAndStartOfLifeEvent(workload->GetGuid(),
769 timelineUtils->RecordEndOfLifeEvent(workloadInferenceID);
774 ExecuteQueue(m_InputQueue);
775 ExecuteQueue(m_WorkloadQueue);
776 ExecuteQueue(m_OutputQueue);
782 catch (
const std::runtime_error& error)
792 for (
auto&& workloadPtr: m_WorkloadQueue)
794 workloadPtr.get()->RegisterDebugCallback(func);
static ARMNN_DLLEXPORT ProfilingStaticGuid INFERENCE_GUID
virtual bool Import(void *memory, MemorySource source)
Import externally allocated memory.
FactoryFunction GetFactory(const BackendId &id) const
unsigned int GetNumInputSlots() const override
Returns the number of connectable input slots.
std::vector< std::unique_ptr< IWorkload > > WorkloadQueue
const bool m_ImportEnabled
void RegisterProfiler(Profiler *profiler)
static std::unique_ptr< TimelineUtilityMethods > GetTimelineUtils(ProfilingService &profilingService)
static ProfilerManager & GetInstance()
virtual unsigned int GetImportFlags() const
Get flags describing supported import sources.
Strongly typed guids to distinguish between those generated at runtime, and those that are statically...
TensorInfo GetInputTensorInfo(LayerBindingId layerId) const
#define ARMNN_LOG(severity)
BackendRegistry & BackendRegistryInstance()
std::vector< std::pair< LayerBindingId, class ConstTensor > > InputTensors
unsigned int MemorySourceFlags
size_t GetNumOutputs() const
TensorInfo GetOutputTensorInfo(LayerBindingId layerId) const
Copyright (c) 2020 ARM Limited.
void IgnoreUnused(Ts &&...)
const std::vector< InputSlot > & GetInputSlots() const
std::function< void(LayerGuid guid, unsigned int slotIndex, ITensorHandle *tensorHandle)> DebugCallbackFunction
Define the type of callback for the Debug layer to call.
unsigned int GetNumOutputSlots() const override
Returns the number of connectable output slots.
static ARMNN_DLLEXPORT ProfilingStaticGuid WORKLOAD_GUID
static ARMNN_DLLEXPORT ProfilingStaticGuid ARMNN_PROFILING_EOL_EVENT_CLASS
#define ARMNN_SCOPED_PROFILING_EVENT(backendId, name)
int LayerBindingId
Type of identifiers for bindable layers (inputs, outputs).
static ARMNN_DLLEXPORT ProfilingStaticGuid ARMNN_PROFILING_SOL_EVENT_CLASS
virtual IWorkloadFactoryPtr CreateWorkloadFactory(const IMemoryManagerSharedPtr &memoryManager=nullptr) const =0
std::vector< TensorInfo > m_InputTensorInfos
static ARMNN_DLLEXPORT ProfilingStaticGuid LAYER_GUID
#define ARMNN_ASSERT_MSG(COND, MSG)
bool SupportsTensorAllocatorAPI() const
std::shared_ptr< IMemoryManager > IMemoryManagerSharedPtr
#define ARMNN_SCOPED_HEAP_PROFILING(TAG)
static ARMNN_DLLEXPORT ProfilingStaticGuid EXECUTION_OF_GUID
std::vector< std::pair< LayerBindingId, class Tensor > > OutputTensors
const std::string & GetNameStr() const
const bool m_ExportEnabled
#define ARMNN_ASSERT(COND)
std::vector< TensorInfo > m_OutputTensorInfos
static bool IsLayerSupported(const BackendId &backendId, const IConnectableLayer &layer, Optional< DataType > dataType, std::string &outReasonIfUnsupported)
const BackendId & GetBackendId() const
OutputLayersAccessor GetOutputLayers() const
Returns a wrapper object with begin(), end() methods to iterate over the output layers in a range-bas...
static ARMNN_DLLEXPORT ProfilingStaticGuid NETWORK_GUID
Status EnqueueWorkload(const InputTensors &inputTensors, const OutputTensors &outputTensors)
virtual const void * Map(bool blocking=true) const =0
Map the tensor data for access.
profiling::ProfilingGuid GetNetworkGuid()
virtual void Unmap() const =0
Unmap the tensor data.
std::vector< ITensorHandle * > m_Outputs
Base class for all ArmNN exceptions so that users can filter to just those.
const OutputHandler & GetOutputHandler(unsigned int i=0) const
const std::string & Get() const
void RegisterDebugCallback(const DebugCallbackFunction &func)
LayerType GetType() const
Contains information about inputs and outputs to a layer.
bool CheckFlag(MemorySourceFlags flags, MemorySource source)
Graph & TopologicalSort()
Sorts layers in topological order and return this.
InputLayersAccessor GetInputLayers() const
Returns a wrapper object with begin(), end() methods to iterate over the input layers in a range-base...
std::vector< ITensorHandle * > m_Inputs
static ARMNN_DLLEXPORT ProfilingStaticGuid PROCESS_ID_GUID
size_t GetNumLayers() const
virtual ARMNN_NO_DEPRECATE_WARN_END IMemoryManagerUniquePtr CreateMemoryManager() const
const TensorInfo & GetTensorInfo(const ITensorHandle *tensorHandle)
float32 helpers
size_t GetNumInputs() const
static ARMNN_DLLEXPORT ProfilingStaticGuid BACKENDID_GUID
static std::unique_ptr< LoadedNetwork > MakeLoadedNetwork(std::unique_ptr< OptimizedNetwork > net, std::string &errorMessage, const INetworkProperties &networkProperties, profiling::ProfilingService &profilingService)
static ARMNN_DLLEXPORT ProfilingStaticGuid CHILD_GUID
void SendNetworkStructure()