aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMike Kelly <mike.kelly@arm.com>2021-04-07 20:10:49 +0100
committerfinn.williams <finn.williams@arm.com>2021-04-08 11:23:47 +0000
commit55a8ffda24fff5515803df10fb4863d46a1effdf (patch)
treee314dea48f22ae88d452527b2decaca61df108ad
parentb76eaed55a89330b3b448c4f4522b3fc94a4f38d (diff)
downloadarmnn-55a8ffda24fff5515803df10fb4863d46a1effdf.tar.gz
IVGCVSW-5823 Refactor Async Network API
* Moved IAsyncNetwork into IRuntime. * All LoadedNetworks can be executed Asynchronously. Signed-off-by: Mike Kelly <mike.kelly@arm.com> Change-Id: Ibbc901ab9110dc2f881425b75489bccf9ad54169
-rw-r--r--Android.mk1
-rw-r--r--CMakeLists.txt3
-rw-r--r--include/armnn/ArmNN.hpp2
-rw-r--r--include/armnn/IAsyncNetwork.hpp64
-rw-r--r--include/armnn/IRuntime.hpp34
-rw-r--r--include/armnn/IWorkingMemHandle.hpp5
-rw-r--r--include/armnn/NetworkFwd.hpp7
-rw-r--r--src/armnn/AsyncNetwork.cpp665
-rw-r--r--src/armnn/AsyncNetwork.hpp106
-rw-r--r--src/armnn/LoadedNetwork.cpp362
-rw-r--r--src/armnn/LoadedNetwork.hpp33
-rw-r--r--src/armnn/Runtime.cpp135
-rw-r--r--src/armnn/Runtime.hpp20
-rw-r--r--src/armnn/WorkingMemHandle.cpp4
-rw-r--r--src/armnn/WorkingMemHandle.hpp12
-rw-r--r--src/backends/backendsCommon/test/StridedSliceAsyncEndToEndTest.hpp12
16 files changed, 528 insertions, 937 deletions
diff --git a/Android.mk b/Android.mk
index 806d81bcd5..416c00238c 100644
--- a/Android.mk
+++ b/Android.mk
@@ -108,7 +108,6 @@ LOCAL_SRC_FILES := \
profiling/server/src/timelineDecoder/TimelineCaptureCommandHandler.cpp \
profiling/server/src/timelineDecoder/TimelineDecoder.cpp \
profiling/server/src/timelineDecoder/TimelineDirectoryCaptureCommandHandler.cpp \
- src/armnn/AsyncNetwork.cpp \
src/armnn/BackendHelper.cpp \
src/armnn/BackendRegistry.cpp \
src/armnn/Descriptors.cpp \
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 62417bebb3..049a4f1e1b 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -242,7 +242,6 @@ list(APPEND armnn_sources
include/armnn/Descriptors.hpp
include/armnn/DescriptorsFwd.hpp
include/armnn/Exceptions.hpp
- include/armnn/IAsyncNetwork.hpp
include/armnn/ILayerSupport.hpp
include/armnn/ILayerVisitor.hpp
include/armnn/INetwork.hpp
@@ -408,8 +407,6 @@ list(APPEND armnn_sources
src/armnn/layers/TransposeLayer.cpp
src/armnn/layers/UnmapLayer.cpp
src/armnn/layers/UnmapLayer.hpp
- src/armnn/AsyncNetwork.cpp
- src/armnn/AsyncNetwork.hpp
src/armnn/BackendRegistry.cpp
src/armnn/BackendSettings.hpp
src/armnn/BackendHelper.cpp
diff --git a/include/armnn/ArmNN.hpp b/include/armnn/ArmNN.hpp
index ac4d33f737..e4d5ce1fa1 100644
--- a/include/armnn/ArmNN.hpp
+++ b/include/armnn/ArmNN.hpp
@@ -7,9 +7,9 @@
#include "BackendId.hpp"
#include "Descriptors.hpp"
#include "Exceptions.hpp"
-#include "IAsyncNetwork.hpp"
#include "INetwork.hpp"
#include "IRuntime.hpp"
+#include "IWorkingMemHandle.hpp"
#include "LstmParams.hpp"
#include "Optional.hpp"
#include "QuantizedLstmParams.hpp"
diff --git a/include/armnn/IAsyncNetwork.hpp b/include/armnn/IAsyncNetwork.hpp
deleted file mode 100644
index c234ae55ac..0000000000
--- a/include/armnn/IAsyncNetwork.hpp
+++ /dev/null
@@ -1,64 +0,0 @@
-//
-// Copyright © 2021 Arm Ltd and Contributors. All rights reserved.
-// SPDX-License-Identifier: MIT
-//
-
-#pragma once
-
-#include <armnn/NetworkFwd.hpp>
-
-#include "INetwork.hpp"
-#include "IProfiler.hpp"
-#include "IWorkingMemHandle.hpp"
-#include "Tensor.hpp"
-#include "Types.hpp"
-
-#include <mutex>
-
-namespace armnn
-{
-struct INetworkProperties;
-
-namespace profiling
-{
-class ProfilingService;
-}
-
-namespace experimental
-{
-class AsyncNetworkImpl;
-
-class IAsyncNetwork
-{
-public:
- IAsyncNetwork(std::unique_ptr<IOptimizedNetwork> net,
- const INetworkProperties& networkProperties,
- profiling::ProfilingService& profilingService);
- ~IAsyncNetwork();
-
- TensorInfo GetInputTensorInfo(LayerBindingId layerId) const;
- TensorInfo GetOutputTensorInfo(LayerBindingId layerId) const;
-
- /// Thread safe execution of the network. Returns once execution is complete.
- /// Will block until this and any other thread using the same workingMem object completes.
- Status Execute(const InputTensors& inputTensors,
- const OutputTensors& outputTensors,
- IWorkingMemHandle& workingMemHandle);
-
- /// Create a new unique WorkingMemHandle object. Create multiple handles if you wish to have
- /// overlapped Execution by calling this function from different threads.
- std::unique_ptr<IWorkingMemHandle> CreateWorkingMemHandle();
-
- /// Get the profiler used for this network
- std::shared_ptr<IProfiler> GetProfiler() const;
-
- /// Register a debug callback function to be used with this network
- void RegisterDebugCallback(const DebugCallbackFunction& func);
-
-private:
- std::unique_ptr<AsyncNetworkImpl> pAsyncNetworkImpl;
-};
-
-} // end experimental namespace
-
-} // end armnn namespace
diff --git a/include/armnn/IRuntime.hpp b/include/armnn/IRuntime.hpp
index 9f7032914f..fc203e67e4 100644
--- a/include/armnn/IRuntime.hpp
+++ b/include/armnn/IRuntime.hpp
@@ -5,9 +5,9 @@
#pragma once
#include "BackendOptions.hpp"
-#include "IAsyncNetwork.hpp"
#include "INetwork.hpp"
#include "IProfiler.hpp"
+#include "IWorkingMemHandle.hpp"
#include "Tensor.hpp"
#include "Types.hpp"
#include "TypesUtils.hpp"
@@ -28,12 +28,14 @@ using IRuntimePtr = std::unique_ptr<IRuntime, void(*)(IRuntime* runtime)>;
struct INetworkProperties
{
- INetworkProperties(bool importEnabled = false, bool exportEnabled = false)
+ INetworkProperties(bool importEnabled = false, bool exportEnabled = false, bool asyncEnabled = false)
: m_ImportEnabled(importEnabled),
- m_ExportEnabled(exportEnabled) {}
+ m_ExportEnabled(exportEnabled),
+ m_AsyncEnabled(asyncEnabled) {}
const bool m_ImportEnabled;
const bool m_ExportEnabled;
+ const bool m_AsyncEnabled;
virtual ~INetworkProperties() {}
};
@@ -145,20 +147,6 @@ public:
std::string& errorMessage,
const INetworkProperties& networkProperties);
- /// This is an experimental function.
- /// Creates an executable network. This network is thread safe allowing for multiple networks to be
- /// loaded simultaneously via different threads.
- /// Note that the network is never registered with the runtime so does not need to be 'Unloaded'.
- /// @param [out] networkIdOut Unique identifier for the network is returned in this reference.
- /// @param [in] network Complete network to load into the IRuntime.
- /// @param [out] errorMessage Error message if there were any errors.
- /// @param [out] networkProperties the INetworkProperties that govern how the network should operate.
- /// @return The IAsyncNetwork
- std::unique_ptr<IAsyncNetwork> CreateAsyncNetwork(NetworkId& networkIdOut,
- IOptimizedNetworkPtr network,
- std::string& errorMessage,
- const INetworkProperties& networkProperties);
-
TensorInfo GetInputTensorInfo(NetworkId networkId, LayerBindingId layerId) const;
TensorInfo GetOutputTensorInfo(NetworkId networkId, LayerBindingId layerId) const;
@@ -167,6 +155,14 @@ public:
const InputTensors& inputTensors,
const OutputTensors& outputTensors);
+ /// This is an experimental function.
+ /// Evaluates a network using input in inputTensors and outputs filled into outputTensors.
+ /// This function performs a thread safe execution of the network. Returns once execution is complete.
+ /// Will block until this and any other thread using the same workingMem object completes.
+ Status Execute(IWorkingMemHandle& workingMemHandle,
+ const InputTensors& inputTensors,
+ const OutputTensors& outputTensors);
+
/// Unloads a network from the IRuntime.
/// At the moment this only removes the network from the m_Impl->m_Network.
/// This might need more work in the future to be AndroidNN compliant.
@@ -176,6 +172,10 @@ public:
const IDeviceSpec& GetDeviceSpec() const;
+ /// Create a new unique WorkingMemHandle object. Create multiple handles if you wish to have
+ /// overlapped Execution by calling this function from different threads.
+ std::unique_ptr<IWorkingMemHandle> CreateWorkingMemHandle(NetworkId networkId);
+
/// Gets the profiler corresponding to the given network id.
/// @param networkId The id of the network for which to get the profile.
/// @return A pointer to the requested profiler, or nullptr if not found.
diff --git a/include/armnn/IWorkingMemHandle.hpp b/include/armnn/IWorkingMemHandle.hpp
index 921b7e1f40..171fa3d81c 100644
--- a/include/armnn/IWorkingMemHandle.hpp
+++ b/include/armnn/IWorkingMemHandle.hpp
@@ -10,6 +10,8 @@
namespace armnn
{
+using NetworkId = int;
+
namespace experimental
{
@@ -20,6 +22,9 @@ class IWorkingMemHandle
public:
virtual ~IWorkingMemHandle() {};
+ /// Returns the NetworkId of the Network that this IWorkingMemHandle works with.
+ virtual NetworkId GetNetworkId() = 0;
+
/// Allocate the backing memory required for execution. If this is not called, then allocation will be
/// deferred to execution time. The mutex must be locked.
virtual void Allocate() = 0;
diff --git a/include/armnn/NetworkFwd.hpp b/include/armnn/NetworkFwd.hpp
index 6c2970f28b..5db9ec4ebe 100644
--- a/include/armnn/NetworkFwd.hpp
+++ b/include/armnn/NetworkFwd.hpp
@@ -10,13 +10,6 @@ namespace armnn
struct LstmInputParams;
struct QuantizedLstmInputParams;
-namespace experimental
-{
-
-class IAsyncNetwork;
-
-} // end experimental namespace
-
class INetwork;
class IOptimizedNetwork;
class Graph;
diff --git a/src/armnn/AsyncNetwork.cpp b/src/armnn/AsyncNetwork.cpp
deleted file mode 100644
index 230346a0c3..0000000000
--- a/src/armnn/AsyncNetwork.cpp
+++ /dev/null
@@ -1,665 +0,0 @@
-//
-// Copyright © 2021 Arm Ltd and Contributors. All rights reserved.
-// SPDX-License-Identifier: MIT
-//
-
-#include "AsyncNetwork.hpp"
-#include "Graph.hpp"
-#include "Layer.hpp"
-#include "Profiling.hpp"
-
-#include <armnn/BackendHelper.hpp>
-#include <armnn/BackendRegistry.hpp>
-#include <armnn/Logging.hpp>
-#include <armnn/utility/Assert.hpp>
-
-#include <armnn/backends/IMemoryManager.hpp>
-#include <backendsCommon/CpuTensorHandle.hpp>
-#include <backendsCommon/WorkloadData.hpp>
-#include <backendsCommon/MemCopyWorkload.hpp>
-#include <LabelsAndEventClasses.hpp>
-
-#include <fmt/format.h>
-
-namespace armnn
-{
-
-namespace experimental
-{
-
-IAsyncNetwork::IAsyncNetwork(std::unique_ptr<IOptimizedNetwork> net,
- const INetworkProperties& networkProperties,
- profiling::ProfilingService& profilingService)
- : pAsyncNetworkImpl( new AsyncNetworkImpl(std::move(net), networkProperties, profilingService)) {};
-
-IAsyncNetwork::~IAsyncNetwork() = default;
-
-TensorInfo IAsyncNetwork::GetInputTensorInfo(LayerBindingId layerId) const
-{
- return pAsyncNetworkImpl->GetInputTensorInfo(layerId);
-}
-
-TensorInfo IAsyncNetwork::GetOutputTensorInfo(LayerBindingId layerId) const
-{
- return pAsyncNetworkImpl->GetOutputTensorInfo(layerId);
-}
-
-Status IAsyncNetwork::Execute(const InputTensors& inputTensors,
- const OutputTensors& outputTensors,
- IWorkingMemHandle& workingMemHandle)
-{
- return pAsyncNetworkImpl->Execute(inputTensors, outputTensors, workingMemHandle);
-}
-
-std::unique_ptr<IWorkingMemHandle> IAsyncNetwork::CreateWorkingMemHandle()
-{
- return pAsyncNetworkImpl->CreateWorkingMemHandle();
-}
-
-std::shared_ptr<IProfiler> IAsyncNetwork::GetProfiler() const
-{
- return pAsyncNetworkImpl->GetProfiler();
-}
-
-void IAsyncNetwork::RegisterDebugCallback(const DebugCallbackFunction& func)
-{
- pAsyncNetworkImpl->RegisterDebugCallback(func);
-}
-
-void AddLayerStructure(std::unique_ptr<profiling::TimelineUtilityMethods>& timelineUtils,
- const Layer& layer,
- profiling::ProfilingGuid networkGuid)
-{
- // Add layer to the post-optimisation network structure
- std::string layerName = layer.GetNameStr().empty() ? "<Unnamed>" : layer.GetNameStr();
- timelineUtils->CreateNamedTypedChildEntity(layer.GetGuid(),
- networkGuid,
- layerName,
- profiling::LabelsAndEventClasses::LAYER_GUID);
- for (auto&& input : layer.GetInputSlots())
- {
- const IOutputSlot* source = input.GetConnectedOutputSlot();
- ARMNN_ASSERT(source != NULL);
- timelineUtils->CreateConnectionRelationship(profiling::ProfilingRelationshipType::RetentionLink,
- source->GetOwningLayerGuid(),
- layer.GetGuid());
- }
-}
-
-void AddWorkloadStructure(std::unique_ptr<profiling::TimelineUtilityMethods>& timelineUtils,
- std::unique_ptr<IWorkload>& workload,
- const Layer& layer)
-{
- // Add workload to the post-optimisation network structure
- timelineUtils->CreateTypedEntity(workload->GetGuid(), profiling::LabelsAndEventClasses::WORKLOAD_GUID);
- timelineUtils->MarkEntityWithLabel(workload->GetGuid(),
- layer.GetBackendId().Get(),
- profiling::LabelsAndEventClasses::BACKENDID_GUID);
-
- // Link the workload to the layer
- timelineUtils->CreateRelationship(profiling::ProfilingRelationshipType::RetentionLink,
- layer.GetGuid(),
- workload->GetGuid(),
- profiling::LabelsAndEventClasses::CHILD_GUID);
-}
-
-TensorInfo AsyncNetworkImpl::GetInputTensorInfo(LayerBindingId layerId) const
-{
- for (auto&& inputLayer : m_OptimizedNetwork->pOptimizedNetworkImpl->GetGraph().GetInputLayers())
- {
- ARMNN_ASSERT_MSG(inputLayer->GetNumOutputSlots() == 1, "Input layer should have exactly 1 output slot");
- if (inputLayer->GetBindingId() == layerId)
- {
- return inputLayer->GetOutputSlot(0).GetTensorInfo();
- }
- }
-
- throw InvalidArgumentException(fmt::format("No input layer is associated with id {0}}", layerId));
-}
-
-TensorInfo AsyncNetworkImpl::GetOutputTensorInfo(LayerBindingId layerId) const
-{
- for (auto&& outputLayer : m_OptimizedNetwork->pOptimizedNetworkImpl->GetGraph().GetOutputLayers())
- {
- ARMNN_ASSERT_MSG(outputLayer->GetNumInputSlots() == 1, "Output layer should have exactly 1 input slot");
- ARMNN_ASSERT_MSG(outputLayer->GetInputSlot(0).GetConnection(), "Input slot on Output layer must be connected");
- if (outputLayer->GetBindingId() == layerId)
- {
- return outputLayer->GetInputSlot(0).GetConnection()->GetTensorInfo();
- }
- }
-
- throw InvalidArgumentException(fmt::format("No output layer is associated with id {0}}", layerId));
-}
-
-// Need something like the collectors to get the correct tensors for the inputs
-void AsyncNetworkImpl::CollectInputTensorHandles(
- std::unordered_map<LayerGuid, std::vector<ITensorHandle*> >& tensorHandles,
- std::vector<ITensorHandle*>& inputs,
- const armnn::Layer* layer,
- const TensorHandleFactoryRegistry& registry,
- const bool isMemoryManaged)
-{
- for (auto&& inputSlot : layer->GetInputSlots())
- {
- // The graph must be well-formed at this point.
- ARMNN_ASSERT(inputSlot.GetConnection());
- auto outputSlot = inputSlot.GetConnectedOutputSlot();
- auto key = outputSlot->GetOwningLayer().GetGuid();
- auto search = tensorHandles.find(key);
-
- if (search == tensorHandles.end())
- {
- ITensorHandleFactory::FactoryId factoryId = outputSlot->GetTensorHandleFactoryId();
- const TensorInfo& tensorInfo = outputSlot->GetTensorInfo();
-
- ARMNN_ASSERT(factoryId != ITensorHandleFactory::LegacyFactoryId);
- ITensorHandleFactory* handleFactory = registry.GetFactory(factoryId);
- ARMNN_ASSERT(handleFactory);
- std::unique_ptr<ITensorHandle> tensor = handleFactory->CreateTensorHandle(tensorInfo, isMemoryManaged);
- ITensorHandle* tensorPtr = tensor.release();
- inputs.push_back(tensorPtr);
- }
- else
- {
- unsigned int index = outputSlot->CalculateIndexOnOwner();
- inputs.push_back(search->second[index]);
- }
- }
-}
-
-void AsyncNetworkImpl::CreateOutputTensorHandles(
- std::unordered_map<LayerGuid, std::vector<ITensorHandle*> >& tensorHandles,
- std::vector<ITensorHandle*>& outputs,
- const armnn::Layer* layer,
- const TensorHandleFactoryRegistry& registry,
- const bool isMemoryManaged)
-{
- auto guid = layer->GetGuid();
- std::vector<ITensorHandle*> tensorHandleVectors;
- tensorHandleVectors.reserve(layer->GetNumOutputSlots());
-
- for (unsigned int idx=0; idx < layer->GetNumOutputSlots(); idx++)
- {
- const OutputSlot& slot = layer->GetOutputSlot(idx);
- ITensorHandleFactory::FactoryId factoryId = slot.GetTensorHandleFactoryId();
- const TensorInfo& tensorInfo = slot.GetTensorInfo();
-
- ARMNN_ASSERT(factoryId != ITensorHandleFactory::LegacyFactoryId);
- ITensorHandleFactory* handleFactory = registry.GetFactory(factoryId);
- ARMNN_ASSERT(handleFactory);
- std::unique_ptr<ITensorHandle> tensor = handleFactory->CreateTensorHandle(tensorInfo, isMemoryManaged);
- ITensorHandle* tensorPtr = tensor.release();
- outputs.push_back(tensorPtr);
- tensorHandleVectors.push_back(tensorPtr);
- }
- tensorHandles.insert({guid, tensorHandleVectors});
-}
-
-const IWorkloadFactory& AsyncNetworkImpl::GetWorkloadFactory(const Layer& layer) const
-{
- const IWorkloadFactory* workloadFactory = nullptr;
-
- auto it = m_WorkloadFactories.find(layer.GetBackendId());
- if (it == m_WorkloadFactories.end())
- {
- throw RuntimeException(
- fmt::format("No workload factory for {0} to be used for layer: {1}}",
- layer.GetBackendId().Get(),
- layer.GetNameStr()),
- CHECK_LOCATION());
- }
-
- workloadFactory = it->second.first.get();
-
- ARMNN_ASSERT_MSG(workloadFactory, "No workload factory");
-
- std::string reasonIfUnsupported;
- ARMNN_ASSERT_MSG(IWorkloadFactory::IsLayerSupported(layer, {}, reasonIfUnsupported),
- "Factory does not support layer");
- IgnoreUnused(reasonIfUnsupported);
- return *workloadFactory;
-}
-
-void AsyncNetworkImpl::EnqueueInput(const BindableLayer& layer,
- const ConstTensor& inputTensor,
- WorkingMemHandle& context)
-{
- if (layer.GetType() != LayerType::Input)
- {
- throw InvalidArgumentException("EnqueueInput: given layer not an InputLayer");
- }
- LayerGuid id = layer.GetOutputSlot(0).GetConnection(0)->GetOwningLayer().GetGuid();
- WorkingMemDescriptor descriptor = context.GetWorkingMemDescriptor(id);
- ARMNN_ASSERT_MSG(descriptor.m_Outputs.size() == 1, "Can only handle Input Layer with one output");
-
- MemorySourceFlags importFlags = descriptor.m_Outputs[0]->GetImportFlags();
- if (m_NetworkProperties.m_ImportEnabled) // Try import the input tensor
- {
- if (CheckFlag(importFlags, MemorySource::Malloc) )
- {
- // This assumes a CPU Tensor handle
- std::unique_ptr<ITensorHandle> tensorHandle =
- std::make_unique<ConstPassthroughCpuTensorHandle>(inputTensor.GetInfo(),
- inputTensor.GetMemoryArea());
-
- void* mem = tensorHandle->Map(false);
- if (descriptor.m_Outputs[0]->Import(mem, MemorySource::Malloc))
- {
- tensorHandle->Unmap();
- return;
- }
- tensorHandle->Unmap();
- throw MemoryImportException("EnqueueInput: Memory Import failed");
- }
- else
- {
- throw MemoryImportException("EnqueueInput: Memory Import failed, backend does not support Import");
- }
- }
- else
- {
- std::unique_ptr<ITensorHandle> tensorHandle =
- std::make_unique<ConstPassthroughCpuTensorHandle>(inputTensor.GetInfo(), inputTensor.GetMemoryArea());
-
- auto copyFunc = [](void* dst, const void* src, size_t size)
- {
- memcpy(dst, src, size);
- };
-
- for (const auto& input : descriptor.m_Inputs)
- {
- CopyTensorContentsGeneric(tensorHandle.get(), input, copyFunc);
- }
- }
-}
-
-void AsyncNetworkImpl::EnqueueOutput(const BindableLayer& layer, const Tensor& outputTensor, WorkingMemHandle& handle)
-{
- if (layer.GetType() != LayerType::Output)
- {
- throw InvalidArgumentException("EnqueueOutput: given layer not an OutputLayer");
- }
- ARMNN_ASSERT_MSG(layer.GetNumInputSlots() == 1, "Output Layer should have exactly one input.");
-
- LayerGuid id = layer.GetInputSlot(0).GetConnectedOutputSlot()->GetOwningLayerGuid();
- WorkingMemDescriptor descriptor = handle.GetWorkingMemDescriptor(id);
-
- ITensorHandle* inputTensorHandle = descriptor.m_Inputs[0];
- ARMNN_ASSERT_MSG(inputTensorHandle != nullptr, "Data should have been allocated.");
-
- // Try import the output tensor.
- // Note: We can only import the output pointer if all of the following hold true:
- // a) The imported pointer is aligned sufficiently
- // b) The tensor has zero padding
- // c) There is only one connection to the OutputSlot and it is to an OutputLayer.
- // d) The output pointer is allocated via malloc. (Other types will be supported in a later release)
- // e) m_IsExportEnabled must be set to true
- if (m_NetworkProperties.m_ExportEnabled &&
- (layer.GetInputSlots()[0].GetConnectedOutputSlot()->GetNumConnections() == 1))
- {
- if (layer.GetInputSlots()[0].GetConnectedOutputSlot()->GetOwningLayer().GetType() != LayerType::Input)
- {
- MemorySourceFlags importFlags = inputTensorHandle->GetImportFlags();
- if (CheckFlag(importFlags, MemorySource::Malloc))
- {
- std::unique_ptr<ITensorHandle> tensorHandle =
- std::make_unique<PassthroughCpuTensorHandle>(outputTensor.GetInfo(),
- outputTensor.GetMemoryArea());
-
- void* mem = tensorHandle->Map(false);
- bool importOk = inputTensorHandle->Import(mem, MemorySource::Malloc);
- tensorHandle->Unmap();
-
- if (importOk)
- {
- ARMNN_SCOPED_PROFILING_EVENT(Compute::Undefined, "SyncMemGeneric_Execute");
- descriptor.m_Inputs[0]->Map(true);
- descriptor.m_Inputs[0]->Unmap();
- }
- else
- {
- throw MemoryExportException("EnqueueOutput: Memory Export failed");
- }
- }
- else
- {
- throw MemoryExportException("EnqueueOutput: Memory Export failed, backend does not support Export");
- }
- }
- else
- {
- throw MemoryExportException("EnqueueOutput: Memory Export failed, attempting to export Input Layer");
- }
- }
- else
- {
- auto copyFunc = [](void* dst, const void* src, size_t size)
- {
- memcpy(dst, src, size);
- };
-
- std::unique_ptr<ITensorHandle> tensorHandle =
- std::make_unique<PassthroughCpuTensorHandle>(outputTensor.GetInfo(), outputTensor.GetMemoryArea());
-
- CopyTensorContentsGeneric(descriptor.m_Outputs[0], tensorHandle.get(), copyFunc);
- }
-}
-
-AsyncNetworkImpl::AsyncNetworkImpl(std::unique_ptr<IOptimizedNetwork> net,
- const INetworkProperties& networkProperties,
- profiling::ProfilingService& profilingService) :
- m_OptimizedNetwork(std::move(net)),
- m_NetworkProperties(networkProperties),
- m_ProfilingService(profilingService)
-{
- // Create a profiler and register it for the current thread.
- m_Profiler = std::make_shared<IProfiler>();
- ProfilerManager::GetInstance().RegisterProfiler(m_Profiler.get());
-
- Graph &order = m_OptimizedNetwork->pOptimizedNetworkImpl->GetGraph().TopologicalSort();
-
- //First create tensor handlers, backends and workload factories.
- //Handlers are created before workloads are.
- //Because workload creation can modify some of the handlers,
- //(for example the splitter and concat layers).
- for (auto &&layer : order)
- {
- auto const &backendId = layer->GetBackendId();
- if (m_Backends.count(backendId) == 0)
- {
- auto createBackend = BackendRegistryInstance().GetFactory(backendId);
- auto it = m_Backends.emplace(std::make_pair(backendId, createBackend()));
-
- IBackendInternal* backend = it.first->second.get();
-
- if (backend->SupportsTensorAllocatorAPI())
- {
- backend->RegisterTensorHandleFactories(m_TensorHandleFactoryRegistry);
-
- auto workloadFactory = backend->CreateWorkloadFactory(m_TensorHandleFactoryRegistry);
- m_WorkloadFactories.emplace(
- std::make_pair(backendId, std::make_pair(std::move(workloadFactory), nullptr)));
- }
- else
- {
- IBackendInternal::IMemoryManagerSharedPtr memoryManager = backend->CreateMemoryManager();
- auto workloadFactory = backend->CreateWorkloadFactory(memoryManager);
-
- m_WorkloadFactories.emplace(
- std::make_pair(backendId, std::make_pair(std::move(workloadFactory), memoryManager)));
- }
- }
- }
-
- // Check backends support BackendCapability::AsyncExecution
- for (auto const& backend : m_Backends)
- {
- if (!IsCapabilitySupported(backend.first, BackendCapability::AsyncExecution))
- {
- ARMNN_LOG(warning) << fmt::format("AsyncNetworkImpl() Backend: '{0}' does not support Async Execution. "
- "Will fall back to default implementation.",
- backend.first.Get());
- }
-
- }
-
- profiling::ProfilingGuid networkGuid = m_OptimizedNetwork->GetGuid();
- std::unique_ptr<profiling::TimelineUtilityMethods> timelineUtils =
- profiling::TimelineUtilityMethods::GetTimelineUtils(m_ProfilingService);
- if (timelineUtils)
- {
- timelineUtils->CreateTypedEntity(networkGuid, profiling::LabelsAndEventClasses::NETWORK_GUID);
- }
-
- //Then create workloads.
- for (auto &&layer : order)
- {
- if (timelineUtils)
- {
- // Add layer to the post-optimisation network structure
- AddLayerStructure(timelineUtils, *layer, networkGuid);
- }
-
- const IWorkloadFactory &workloadFactory = GetWorkloadFactory(*layer);
-
- switch (layer->GetType())
- {
- case LayerType::Input:
- case LayerType::Output:
- {
- // Inputs and outputs are treated in a special way - see EnqueueInput() and EnqueueOutput().
- break;
- }
- default:
- {
- auto workload = layer->CreateWorkload(workloadFactory);
-
- if (!workload)
- {
- const char* const layerName =
- layer->GetNameStr().length() != 0 ? layer->GetName() : "<Unnamed>";
- throw InvalidArgumentException(
- fmt::format("No workload created for layer (name: '{0}' type: '{1}') (compute '{2}')",
- layerName,
- static_cast<int>(layer->GetType()),
- layer->GetBackendId().Get()
- ));
- }
-
- if (timelineUtils)
- {
- // Add workload to the post-optimisation network structure
- AddWorkloadStructure(timelineUtils, workload, *layer);
- }
-
- m_WorkloadQueue.push_back(move(workload));
- // release the constant data in the layer..
- layer->ReleaseConstantData();
- break;
- }
- }
- }
-
- if (timelineUtils)
- {
- // Commit to send the post-optimisation network structure
- timelineUtils->Commit();
- }
-
- // Now that the intermediate tensor memory has been set-up, do any post allocation configuration for each workload.
- // PostAllocationConfiguure will now need to be handled in the ExecuteOn(WorkingMemDescriptor)
- for (auto &workload : m_WorkloadQueue)
- {
- workload->PostAllocationConfigure();
- }
-}
-
-Status AsyncNetworkImpl::Execute(const InputTensors& inputTensors,
- const OutputTensors& outputTensors,
- IWorkingMemHandle& iWorkingMemHandle)
-{
- const Graph& graph = m_OptimizedNetwork->pOptimizedNetworkImpl->GetGraph();
-
- // Walk graph to determine the order of execution.
- if (graph.GetNumLayers() < 2)
- {
- ARMNN_LOG(warning) << "IRuntime::EnqueueWorkload()::Less than two nodes in graph";
- return Status::Failure;
- }
-
- if (graph.GetNumInputs() != inputTensors.size())
- {
- throw InvalidArgumentException("Number of inputs provided does not match network.");
- }
-
- std::unique_ptr<profiling::TimelineUtilityMethods> timelineUtils =
- profiling::TimelineUtilityMethods::GetTimelineUtils(m_ProfilingService);
- profiling::ProfilingGuid inferenceGuid = m_ProfilingService.GetNextGuid();
- if (timelineUtils)
- {
- // Add inference timeline trace if profiling is enabled.
- profiling::ProfilingGuid networkGuid = m_OptimizedNetwork->GetGuid();
- timelineUtils->CreateTypedEntity(inferenceGuid, profiling::LabelsAndEventClasses::INFERENCE_GUID);
- timelineUtils->CreateRelationship(profiling::ProfilingRelationshipType::RetentionLink,
- networkGuid,
- inferenceGuid,
- profiling::LabelsAndEventClasses::EXECUTION_OF_GUID);
- timelineUtils->RecordEvent(inferenceGuid, profiling::LabelsAndEventClasses::ARMNN_PROFILING_SOL_EVENT_CLASS);
- }
-
- bool executionSucceeded = true;
-
- if (timelineUtils)
- {
- // Add end of life of the inference timeline if profiling is enabled.
- timelineUtils->RecordEvent(inferenceGuid, profiling::LabelsAndEventClasses::ARMNN_PROFILING_EOL_EVENT_CLASS);
- timelineUtils->Commit();
- }
- WorkingMemHandle& workingMemHandle = dynamic_cast<WorkingMemHandle&>(iWorkingMemHandle);
- std::lock_guard<std::mutex> lockGuard(workingMemHandle.GetMutex());
-
- if (!workingMemHandle.IsAllocated())
- {
- workingMemHandle.Allocate();
- }
-
- {
- ARMNN_SCOPED_PROFILING_EVENT(Compute::Undefined, "PrepareInputs");
- unsigned int i = 0;
-
- for (const BindableLayer* inputLayer : graph.GetInputLayers())
- {
- EnqueueInput(*inputLayer, inputTensors[i].second, workingMemHandle);
- ++i;
- }
- }
-
- auto Fail = [&](const std::exception& error)
- {
- ARMNN_LOG(error) << "An error occurred attempting to execute a workload: " << error.what();
- executionSucceeded = false;
- };
- profiling::ProfilingDynamicGuid workloadInferenceID(0);
-
- try
- {
- for (unsigned int i = 0; i < m_WorkloadQueue.size(); ++i)
- {
- auto& workload = m_WorkloadQueue[i];
- if (timelineUtils)
- {
- workloadInferenceID = timelineUtils->RecordWorkloadInferenceAndStartOfLifeEvent(workload->GetGuid(),
- inferenceGuid);
- }
- workload->ExecuteAsync(workingMemHandle.GetWorkingMemDescriptorAt(i));
-
- if (timelineUtils)
- {
- timelineUtils->RecordEndOfLifeEvent(workloadInferenceID);
- }
- }
- }
- catch (const RuntimeException& error)
- {
- Fail(error);
- }
- catch (const std::runtime_error& error)
- {
- Fail(error);
- }
- // For each output to the network, call EnqueueOutput with the data passed by the user.
- {
- ARMNN_SCOPED_PROFILING_EVENT(Compute::Undefined, "PrepareOutputs");
- unsigned int i = static_cast<unsigned int>(m_WorkloadQueue.size() - graph.GetNumOutputs());
-
- for (const BindableLayer* outputLayer : graph.GetOutputLayers())
- {
- EnqueueOutput(*outputLayer, outputTensors[i].second, workingMemHandle);
- ++i;
- }
- }
- return executionSucceeded ? Status::Success : Status::Failure;
-}
-
-/// Get the profiler used for this network
-std::shared_ptr<IProfiler> AsyncNetworkImpl::GetProfiler() const
-{
- return m_Profiler;
-}
-
-void AsyncNetworkImpl::RegisterDebugCallback(const DebugCallbackFunction& func)
-{
- for (auto&& workloadPtr: m_WorkloadQueue)
- {
- workloadPtr.get()->RegisterDebugCallback(func);
- }
-}
-
-/// Create a new unique WorkingMemHandle object. Create multiple handles if you wish to have
-/// overlapped Execution by calling this function from different threads.
-std::unique_ptr<IWorkingMemHandle> AsyncNetworkImpl::CreateWorkingMemHandle()
-{
- Graph& order = m_OptimizedNetwork->pOptimizedNetworkImpl->GetGraph();
- std::unordered_map<LayerGuid, std::vector<ITensorHandle*> > tensorHandles;
- std::vector<WorkingMemDescriptor> workingMemDescriptors;
- std::unordered_map<LayerGuid, WorkingMemDescriptor> workingMemDescriptorMap;
-
- for (auto&& layer : order)
- {
- if (layer->GetType() == LayerType::Input || layer->GetType() == LayerType::Output)
- {
- continue;
- }
- WorkingMemDescriptor workingMemDescriptor;
- // Look for the layer with 1 OutputSlot which has 1 connection and that connection is an Output Layer
- // If Export is enabled disable memory management so we can export, otherwise we do a copy
- if((layer->GetNumOutputSlots() == 1) &&
- (layer->GetOutputSlots()[0].GetNumConnections() == 1) &&
- (layer->GetOutputSlots()[0].GetConnection(0)->GetOwningLayer().GetType() == LayerType::Output))
- {
- CollectInputTensorHandles(tensorHandles,
- workingMemDescriptor.m_Inputs,
- layer,
- m_TensorHandleFactoryRegistry,
- !m_NetworkProperties.m_ExportEnabled);
- CreateOutputTensorHandles(tensorHandles,
- workingMemDescriptor.m_Outputs,
- layer,
- m_TensorHandleFactoryRegistry,
- !m_NetworkProperties.m_ExportEnabled);
- }
- else
- {
- CollectInputTensorHandles(tensorHandles,
- workingMemDescriptor.m_Inputs,
- layer,
- m_TensorHandleFactoryRegistry);
- CreateOutputTensorHandles(tensorHandles,
- workingMemDescriptor.m_Outputs,
- layer,
- m_TensorHandleFactoryRegistry);
- }
- workingMemDescriptorMap.insert({layer->GetGuid(), workingMemDescriptor});
- workingMemDescriptors.push_back(workingMemDescriptor);
- }
- return std::make_unique<WorkingMemHandle>(workingMemDescriptors, workingMemDescriptorMap);
-}
-
-void AsyncNetworkImpl::FreeWorkingMemory()
-{
- // Informs the memory managers to release memory in it's respective memory group
- for (auto&& workloadFactory : m_WorkloadFactories)
- {
- IBackendInternal::IMemoryManagerSharedPtr memoryManager = workloadFactory.second.second;
- if (memoryManager)
- {
- memoryManager->Release();
- }
- }
- m_TensorHandleFactoryRegistry.ReleaseMemory();
-}
-
-} // end experimental namespace
-
-} // end armnn namespace
diff --git a/src/armnn/AsyncNetwork.hpp b/src/armnn/AsyncNetwork.hpp
deleted file mode 100644
index 9bdc7eebd7..0000000000
--- a/src/armnn/AsyncNetwork.hpp
+++ /dev/null
@@ -1,106 +0,0 @@
-//
-// Copyright © 2021 Arm Ltd and Contributors. All rights reserved.
-// SPDX-License-Identifier: MIT
-//
-
-#pragma once
-
-#include <armnn/IAsyncNetwork.hpp>
-#include <armnn/Tensor.hpp>
-#include <armnn/Types.hpp>
-
-#include "LayerFwd.hpp"
-#include "Network.hpp"
-#include "Profiling.hpp"
-#include "WorkingMemHandle.hpp"
-
-#include <armnn/backends/IBackendInternal.hpp>
-#include <backendsCommon/TensorHandleFactoryRegistry.hpp>
-#include <backendsCommon/Workload.hpp>
-#include <backendsCommon/WorkloadFactory.hpp>
-#include <ProfilingService.hpp>
-#include <TimelineUtilityMethods.hpp>
-
-#include <unordered_map>
-
-namespace armnn
-{
-
-namespace experimental
-{
-
-class AsyncNetworkImpl final
-{
-public:
- using WorkloadQueue = std::vector<std::unique_ptr<IWorkload>>;
-
- AsyncNetworkImpl(std::unique_ptr<IOptimizedNetwork> net,
- const INetworkProperties &networkProperties,
- profiling::ProfilingService &profilingService);
-
- ~AsyncNetworkImpl() { FreeWorkingMemory(); }
-
- TensorInfo GetInputTensorInfo(LayerBindingId layerId) const;
- TensorInfo GetOutputTensorInfo(LayerBindingId layerId) const;
-
- /// Thread safe execution of the network. Returns once execution is complete.
- /// Will block until this and any other thread using the same workingMem object completes.
- virtual Status Execute(const InputTensors& inputTensors,
- const OutputTensors& outputTensors,
- IWorkingMemHandle& workingMemHandle);
-
- /// Create a new unique WorkingMemHandle object. Create multiple handles if you wish to have
- /// overlapped Execution by calling this function from different threads.
- std::unique_ptr<IWorkingMemHandle> CreateWorkingMemHandle();
-
- /// Get the profiler used for this network
- std::shared_ptr<IProfiler> GetProfiler() const;
-
- /// Register a debug callback function to be used with this network
- void RegisterDebugCallback(const DebugCallbackFunction& func);
-
-private:
- void FreeWorkingMemory();
-
- void CollectInputTensorHandles(std::unordered_map<LayerGuid, std::vector<ITensorHandle*> >& tensorHandles,
- std::vector<ITensorHandle*>& inputs,
- const armnn::Layer* layer,
- const TensorHandleFactoryRegistry& registry,
- const bool isMemoryManaged = false);
-
- void CreateOutputTensorHandles(std::unordered_map<LayerGuid, std::vector<ITensorHandle*> >& tensorHandles,
- std::vector<ITensorHandle*>& outputs,
- const armnn::Layer* layer,
- const TensorHandleFactoryRegistry& registry,
- const bool isMemoryManaged = false);
-
- void EnqueueInput(const BindableLayer& layer, const ConstTensor& inputTensor, WorkingMemHandle& handle);
-
- void EnqueueOutput(const BindableLayer& layer, const Tensor& outputTensor, WorkingMemHandle& handle);
-
- using BackendPtrMap = std::unordered_map<BackendId, IBackendInternalUniquePtr>;
-
- using WorkloadFactoryWithMemoryManager =
- std::pair<IBackendInternal::IWorkloadFactoryPtr, IBackendInternal::IMemoryManagerSharedPtr>;
-
- using WorkloadFactoryMap = std::unordered_map<BackendId, WorkloadFactoryWithMemoryManager>;
-
- const IWorkloadFactory& GetWorkloadFactory(const Layer& layer) const;
-
- BackendPtrMap m_Backends;
- WorkloadFactoryMap m_WorkloadFactories;
-
- std::unique_ptr<IOptimizedNetwork> m_OptimizedNetwork;
- INetworkProperties m_NetworkProperties;
- WorkloadQueue m_WorkloadQueue;
- std::shared_ptr<IProfiler> m_Profiler;
-
- TensorHandleFactoryRegistry m_TensorHandleFactoryRegistry;
-
- /// Profiling Service Instance
- profiling::ProfilingService& m_ProfilingService;
-};
-
-} // end experimental namespace
-
-} // end armnn namespace
diff --git a/src/armnn/LoadedNetwork.cpp b/src/armnn/LoadedNetwork.cpp
index ea09231c3c..d75a2021b2 100644
--- a/src/armnn/LoadedNetwork.cpp
+++ b/src/armnn/LoadedNetwork.cpp
@@ -10,6 +10,7 @@
#include <Processes.hpp>
#include "Profiling.hpp"
#include "HeapProfiling.hpp"
+#include "WorkingMemHandle.hpp"
#include <armnn/BackendRegistry.hpp>
#include <armnn/Logging.hpp>
@@ -119,8 +120,7 @@ LoadedNetwork::LoadedNetwork(std::unique_ptr<IOptimizedNetwork> net,
const INetworkProperties& networkProperties,
profiling::ProfilingService& profilingService) :
m_OptimizedNetwork(std::move(net)),
- m_IsImportEnabled(networkProperties.m_ImportEnabled),
- m_IsExportEnabled(networkProperties.m_ExportEnabled),
+ m_NetworkProperties(networkProperties),
m_TensorHandleFactoryRegistry(),
m_ProfilingService(profilingService)
{
@@ -172,7 +172,8 @@ LoadedNetwork::LoadedNetwork(std::unique_ptr<IOptimizedNetwork> net,
case LayerType::MemImport:
{
// If IsImportEnabled is true then we need to set IsMemoryManaged to false when creating TensorHandles
- layer->CreateTensorHandles(m_TensorHandleFactoryRegistry, workloadFactory, !m_IsImportEnabled);
+ layer->CreateTensorHandles(m_TensorHandleFactoryRegistry, workloadFactory,
+ !m_NetworkProperties.m_ImportEnabled);
break;
}
default:
@@ -183,7 +184,8 @@ LoadedNetwork::LoadedNetwork(std::unique_ptr<IOptimizedNetwork> net,
(layer->GetOutputSlots()[0].GetNumConnections() == 1) &&
(layer->GetOutputSlots()[0].GetConnection(0)->GetOwningLayer().GetType() == LayerType::Output))
{
- layer->CreateTensorHandles(m_TensorHandleFactoryRegistry, workloadFactory, !m_IsExportEnabled);
+ layer->CreateTensorHandles(m_TensorHandleFactoryRegistry, workloadFactory,
+ !m_NetworkProperties.m_ExportEnabled);
}
else
{
@@ -576,7 +578,7 @@ void LoadedNetwork::EnqueueInput(const BindableLayer& layer, ITensorHandle* tens
MemorySourceFlags importFlags = outputTensorHandle->GetImportFlags();
bool needMemCopy = true;
- if (m_IsImportEnabled) // Try import the input tensor
+ if (m_NetworkProperties.m_ImportEnabled) // Try import the input tensor
{
if(CheckFlag(importFlags, MemorySource::Malloc) )
{
@@ -647,7 +649,8 @@ void LoadedNetwork::EnqueueOutput(const BindableLayer& layer, ITensorHandle* ten
// d) The output pointer is allocated via malloc. (Other types will be supported in a later release)
// e) m_IsExportEnabled must be set to true
bool needMemCopy = true;
- if (m_IsExportEnabled && (layer.GetInputSlots()[0].GetConnectedOutputSlot()->GetNumConnections() == 1))
+ if (m_NetworkProperties.m_ExportEnabled &&
+ (layer.GetInputSlots()[0].GetConnectedOutputSlot()->GetNumConnections() == 1))
{
if(layer.GetInputSlots()[0].GetConnectedOutputSlot()->GetOwningLayer().GetType() != LayerType::Input)
{
@@ -792,6 +795,353 @@ bool LoadedNetwork::Execute(std::unique_ptr<TimelineUtilityMethods>& timelineUti
return success;
}
+void LoadedNetwork::EnqueueInput(const BindableLayer& layer,
+ const ConstTensor& inputTensor,
+ WorkingMemHandle& context)
+{
+ if (layer.GetType() != LayerType::Input)
+ {
+ throw InvalidArgumentException("EnqueueInput: given layer not an InputLayer");
+ }
+ LayerGuid id = layer.GetOutputSlot(0).GetConnection(0)->GetOwningLayer().GetGuid();
+ WorkingMemDescriptor descriptor = context.GetWorkingMemDescriptor(id);
+ ARMNN_ASSERT_MSG(descriptor.m_Outputs.size() == 1, "Can only handle Input Layer with one output");
+
+ MemorySourceFlags importFlags = descriptor.m_Outputs[0]->GetImportFlags();
+ if (m_NetworkProperties.m_ImportEnabled) // Try import the input tensor
+ {
+ if (CheckFlag(importFlags, MemorySource::Malloc) )
+ {
+ // This assumes a CPU Tensor handle
+ std::unique_ptr<ITensorHandle> tensorHandle =
+ std::make_unique<ConstPassthroughCpuTensorHandle>(inputTensor.GetInfo(),
+ inputTensor.GetMemoryArea());
+
+ void* mem = tensorHandle->Map(false);
+ if (descriptor.m_Outputs[0]->Import(mem, MemorySource::Malloc))
+ {
+ tensorHandle->Unmap();
+ return;
+ }
+ tensorHandle->Unmap();
+ throw MemoryImportException("EnqueueInput: Memory Import failed");
+ }
+ else
+ {
+ throw MemoryImportException("EnqueueInput: Memory Import failed, backend does not support Import");
+ }
+ }
+ else
+ {
+ std::unique_ptr<ITensorHandle> tensorHandle =
+ std::make_unique<ConstPassthroughCpuTensorHandle>(inputTensor.GetInfo(), inputTensor.GetMemoryArea());
+
+ auto copyFunc = [](void* dst, const void* src, size_t size)
+ {
+ memcpy(dst, src, size);
+ };
+
+ for (const auto& input : descriptor.m_Inputs)
+ {
+ CopyTensorContentsGeneric(tensorHandle.get(), input, copyFunc);
+ }
+ }
+}
+
+void LoadedNetwork::EnqueueOutput(const BindableLayer& layer, const Tensor& outputTensor, WorkingMemHandle& handle)
+{
+ if (layer.GetType() != LayerType::Output)
+ {
+ throw InvalidArgumentException("EnqueueOutput: given layer not an OutputLayer");
+ }
+ ARMNN_ASSERT_MSG(layer.GetNumInputSlots() == 1, "Output Layer should have exactly one input.");
+
+ LayerGuid id = layer.GetInputSlot(0).GetConnectedOutputSlot()->GetOwningLayerGuid();
+ WorkingMemDescriptor descriptor = handle.GetWorkingMemDescriptor(id);
+
+ ITensorHandle* inputTensorHandle = descriptor.m_Inputs[0];
+ ARMNN_ASSERT_MSG(inputTensorHandle != nullptr, "Data should have been allocated.");
+
+ // Try import the output tensor.
+ // Note: We can only import the output pointer if all of the following hold true:
+ // a) The imported pointer is aligned sufficiently
+ // b) The tensor has zero padding
+ // c) There is only one connection to the OutputSlot and it is to an OutputLayer.
+ // d) The output pointer is allocated via malloc. (Other types will be supported in a later release)
+ // e) m_IsExportEnabled must be set to true
+ if (m_NetworkProperties.m_ExportEnabled &&
+ (layer.GetInputSlots()[0].GetConnectedOutputSlot()->GetNumConnections() == 1))
+ {
+ if (layer.GetInputSlots()[0].GetConnectedOutputSlot()->GetOwningLayer().GetType() != LayerType::Input)
+ {
+ MemorySourceFlags importFlags = inputTensorHandle->GetImportFlags();
+ if (CheckFlag(importFlags, MemorySource::Malloc))
+ {
+ std::unique_ptr<ITensorHandle> tensorHandle =
+ std::make_unique<PassthroughCpuTensorHandle>(outputTensor.GetInfo(),
+ outputTensor.GetMemoryArea());
+
+ void* mem = tensorHandle->Map(false);
+ bool importOk = inputTensorHandle->Import(mem, MemorySource::Malloc);
+ tensorHandle->Unmap();
+
+ if (importOk)
+ {
+ ARMNN_SCOPED_PROFILING_EVENT(Compute::Undefined, "SyncMemGeneric_Execute");
+ descriptor.m_Inputs[0]->Map(true);
+ descriptor.m_Inputs[0]->Unmap();
+ }
+ else
+ {
+ throw MemoryExportException("EnqueueOutput: Memory Export failed");
+ }
+ }
+ else
+ {
+ throw MemoryExportException("EnqueueOutput: Memory Export failed, backend does not support Export");
+ }
+ }
+ else
+ {
+ throw MemoryExportException("EnqueueOutput: Memory Export failed, attempting to export Input Layer");
+ }
+ }
+ else
+ {
+ auto copyFunc = [](void* dst, const void* src, size_t size)
+ {
+ memcpy(dst, src, size);
+ };
+
+ std::unique_ptr<ITensorHandle> tensorHandle =
+ std::make_unique<PassthroughCpuTensorHandle>(outputTensor.GetInfo(), outputTensor.GetMemoryArea());
+
+ CopyTensorContentsGeneric(descriptor.m_Outputs[0], tensorHandle.get(), copyFunc);
+ }
+}
+
+Status LoadedNetwork::Execute(const InputTensors& inputTensors,
+ const OutputTensors& outputTensors,
+ IWorkingMemHandle& iWorkingMemHandle)
+{
+ const Graph& graph = m_OptimizedNetwork->pOptimizedNetworkImpl->GetGraph();
+
+ // Walk graph to determine the order of execution.
+ if (graph.GetNumLayers() < 2)
+ {
+ ARMNN_LOG(warning) << "IRuntime::EnqueueWorkload()::Less than two nodes in graph";
+ return Status::Failure;
+ }
+
+ if (graph.GetNumInputs() != inputTensors.size())
+ {
+ throw InvalidArgumentException("Number of inputs provided does not match network.");
+ }
+
+ std::unique_ptr<profiling::TimelineUtilityMethods> timelineUtils =
+ profiling::TimelineUtilityMethods::GetTimelineUtils(m_ProfilingService);
+ profiling::ProfilingGuid inferenceGuid = m_ProfilingService.GetNextGuid();
+ if (timelineUtils)
+ {
+ // Add inference timeline trace if profiling is enabled.
+ profiling::ProfilingGuid networkGuid = m_OptimizedNetwork->GetGuid();
+ timelineUtils->CreateTypedEntity(inferenceGuid, profiling::LabelsAndEventClasses::INFERENCE_GUID);
+ timelineUtils->CreateRelationship(profiling::ProfilingRelationshipType::RetentionLink,
+ networkGuid,
+ inferenceGuid,
+ profiling::LabelsAndEventClasses::EXECUTION_OF_GUID);
+ timelineUtils->RecordEvent(inferenceGuid, profiling::LabelsAndEventClasses::ARMNN_PROFILING_SOL_EVENT_CLASS);
+ }
+
+ bool executionSucceeded = true;
+
+ if (timelineUtils)
+ {
+ // Add end of life of the inference timeline if profiling is enabled.
+ timelineUtils->RecordEvent(inferenceGuid, profiling::LabelsAndEventClasses::ARMNN_PROFILING_EOL_EVENT_CLASS);
+ timelineUtils->Commit();
+ }
+ WorkingMemHandle& workingMemHandle = dynamic_cast<WorkingMemHandle&>(iWorkingMemHandle);
+ std::lock_guard<std::mutex> lockGuard(workingMemHandle.GetMutex());
+
+ if (!workingMemHandle.IsAllocated())
+ {
+ workingMemHandle.Allocate();
+ }
+
+ {
+ ARMNN_SCOPED_PROFILING_EVENT(Compute::Undefined, "PrepareInputs");
+ unsigned int i = 0;
+
+ for (const BindableLayer* inputLayer : graph.GetInputLayers())
+ {
+ EnqueueInput(*inputLayer, inputTensors[i].second, workingMemHandle);
+ ++i;
+ }
+ }
+
+ auto Fail = [&](const std::exception& error)
+ {
+ ARMNN_LOG(error) << "An error occurred attempting to execute a workload: " << error.what();
+ executionSucceeded = false;
+ };
+ profiling::ProfilingDynamicGuid workloadInferenceID(0);
+
+ try
+ {
+ for (unsigned int i = 0; i < m_WorkloadQueue.size(); ++i)
+ {
+ auto& workload = m_WorkloadQueue[i];
+ if (timelineUtils)
+ {
+ workloadInferenceID = timelineUtils->RecordWorkloadInferenceAndStartOfLifeEvent(workload->GetGuid(),
+ inferenceGuid);
+ }
+ workload->ExecuteAsync(workingMemHandle.GetWorkingMemDescriptorAt(i));
+
+ if (timelineUtils)
+ {
+ timelineUtils->RecordEndOfLifeEvent(workloadInferenceID);
+ }
+ }
+ }
+ catch (const RuntimeException& error)
+ {
+ Fail(error);
+ }
+ catch (const std::runtime_error& error)
+ {
+ Fail(error);
+ }
+ // For each output to the network, call EnqueueOutput with the data passed by the user.
+ {
+ ARMNN_SCOPED_PROFILING_EVENT(Compute::Undefined, "PrepareOutputs");
+ unsigned int i = static_cast<unsigned int>(m_WorkloadQueue.size() - graph.GetNumOutputs());
+
+ for (const BindableLayer* outputLayer : graph.GetOutputLayers())
+ {
+ EnqueueOutput(*outputLayer, outputTensors[i].second, workingMemHandle);
+ ++i;
+ }
+ }
+ return executionSucceeded ? Status::Success : Status::Failure;
+}
+// Need something like the collectors to get the correct tensors for the inputs
+void LoadedNetwork::CollectInputTensorHandles(
+ std::unordered_map<LayerGuid, std::vector<ITensorHandle*> >& tensorHandles,
+ std::vector<ITensorHandle*>& inputs,
+ const armnn::Layer* layer,
+ const TensorHandleFactoryRegistry& registry,
+ const bool isMemoryManaged)
+{
+ for (auto&& inputSlot : layer->GetInputSlots())
+ {
+ // The graph must be well-formed at this point.
+ ARMNN_ASSERT(inputSlot.GetConnection());
+ auto outputSlot = inputSlot.GetConnectedOutputSlot();
+ auto key = outputSlot->GetOwningLayer().GetGuid();
+ auto search = tensorHandles.find(key);
+
+ if (search == tensorHandles.end())
+ {
+ ITensorHandleFactory::FactoryId factoryId = outputSlot->GetTensorHandleFactoryId();
+ const TensorInfo& tensorInfo = outputSlot->GetTensorInfo();
+
+ ARMNN_ASSERT(factoryId != ITensorHandleFactory::LegacyFactoryId);
+ ITensorHandleFactory* handleFactory = registry.GetFactory(factoryId);
+ ARMNN_ASSERT(handleFactory);
+ std::unique_ptr<ITensorHandle> tensor = handleFactory->CreateTensorHandle(tensorInfo, isMemoryManaged);
+ ITensorHandle* tensorPtr = tensor.release();
+ inputs.push_back(tensorPtr);
+ }
+ else
+ {
+ unsigned int index = outputSlot->CalculateIndexOnOwner();
+ inputs.push_back(search->second[index]);
+ }
+ }
+}
+
+void LoadedNetwork::CreateOutputTensorHandles(
+ std::unordered_map<LayerGuid, std::vector<ITensorHandle*> >& tensorHandles,
+ std::vector<ITensorHandle*>& outputs,
+ const armnn::Layer* layer,
+ const TensorHandleFactoryRegistry& registry,
+ const bool isMemoryManaged)
+{
+ auto guid = layer->GetGuid();
+ std::vector<ITensorHandle*> tensorHandleVectors;
+ tensorHandleVectors.reserve(layer->GetNumOutputSlots());
+
+ for (unsigned int idx=0; idx < layer->GetNumOutputSlots(); idx++)
+ {
+ const OutputSlot& slot = layer->GetOutputSlot(idx);
+ ITensorHandleFactory::FactoryId factoryId = slot.GetTensorHandleFactoryId();
+ const TensorInfo& tensorInfo = slot.GetTensorInfo();
+
+ ARMNN_ASSERT(factoryId != ITensorHandleFactory::LegacyFactoryId);
+ ITensorHandleFactory* handleFactory = registry.GetFactory(factoryId);
+ ARMNN_ASSERT(handleFactory);
+ std::unique_ptr<ITensorHandle> tensor = handleFactory->CreateTensorHandle(tensorInfo, isMemoryManaged);
+ ITensorHandle* tensorPtr = tensor.release();
+ outputs.push_back(tensorPtr);
+ tensorHandleVectors.push_back(tensorPtr);
+ }
+ tensorHandles.insert({guid, tensorHandleVectors});
+}
+
+/// Create a new unique WorkingMemHandle object. Create multiple handles if you wish to have
+/// overlapped Execution by calling this function from different threads.
+std::unique_ptr<IWorkingMemHandle> LoadedNetwork::CreateWorkingMemHandle(NetworkId networkId)
+{
+ Graph& order = m_OptimizedNetwork->pOptimizedNetworkImpl->GetGraph();
+ std::unordered_map<LayerGuid, std::vector<ITensorHandle*> > tensorHandles;
+ std::vector<WorkingMemDescriptor> workingMemDescriptors;
+ std::unordered_map<LayerGuid, WorkingMemDescriptor> workingMemDescriptorMap;
+
+ for (auto&& layer : order)
+ {
+ if (layer->GetType() == LayerType::Input || layer->GetType() == LayerType::Output)
+ {
+ continue;
+ }
+ WorkingMemDescriptor workingMemDescriptor;
+ // Look for the layer with 1 OutputSlot which has 1 connection and that connection is an Output Layer
+ // If Export is enabled disable memory management so we can export, otherwise we do a copy
+ if((layer->GetNumOutputSlots() == 1) &&
+ (layer->GetOutputSlots()[0].GetNumConnections() == 1) &&
+ (layer->GetOutputSlots()[0].GetConnection(0)->GetOwningLayer().GetType() == LayerType::Output))
+ {
+ CollectInputTensorHandles(tensorHandles,
+ workingMemDescriptor.m_Inputs,
+ layer,
+ m_TensorHandleFactoryRegistry,
+ !m_NetworkProperties.m_ExportEnabled);
+ CreateOutputTensorHandles(tensorHandles,
+ workingMemDescriptor.m_Outputs,
+ layer,
+ m_TensorHandleFactoryRegistry,
+ !m_NetworkProperties.m_ExportEnabled);
+ }
+ else
+ {
+ CollectInputTensorHandles(tensorHandles,
+ workingMemDescriptor.m_Inputs,
+ layer,
+ m_TensorHandleFactoryRegistry);
+ CreateOutputTensorHandles(tensorHandles,
+ workingMemDescriptor.m_Outputs,
+ layer,
+ m_TensorHandleFactoryRegistry);
+ }
+ workingMemDescriptorMap.insert({layer->GetGuid(), workingMemDescriptor});
+ workingMemDescriptors.push_back(workingMemDescriptor);
+ }
+ return std::make_unique<WorkingMemHandle>(networkId,
+ workingMemDescriptors,
+ workingMemDescriptorMap);
+}
+
void LoadedNetwork::RegisterDebugCallback(const DebugCallbackFunction& func)
{
for (auto&& workloadPtr: m_WorkloadQueue)
diff --git a/src/armnn/LoadedNetwork.hpp b/src/armnn/LoadedNetwork.hpp
index c7dd37fdea..2bcf5c8c08 100644
--- a/src/armnn/LoadedNetwork.hpp
+++ b/src/armnn/LoadedNetwork.hpp
@@ -37,11 +37,19 @@ public:
using WorkloadQueue = std::vector< std::unique_ptr<IWorkload> >;
~LoadedNetwork(){ FreeWorkingMemory(); }
+ /// Create a new unique WorkingMemHandle object. Create multiple handles if you wish to have
+ /// overlapped Execution by calling this function from different threads.
+ std::unique_ptr<IWorkingMemHandle> CreateWorkingMemHandle(NetworkId networkId);
+
TensorInfo GetInputTensorInfo(LayerBindingId layerId) const;
TensorInfo GetOutputTensorInfo(LayerBindingId layerId) const;
Status EnqueueWorkload(const InputTensors& inputTensors, const OutputTensors& outputTensors);
+ Status Execute(const InputTensors& inputTensors,
+ const OutputTensors& outputTensors,
+ IWorkingMemHandle& workingMemHandle);
+
static std::unique_ptr<LoadedNetwork> MakeLoadedNetwork(std::unique_ptr<IOptimizedNetwork> net,
std::string & errorMessage,
const INetworkProperties& networkProperties,
@@ -58,6 +66,11 @@ public:
void SendNetworkStructure();
+ bool IsAsyncEnabled()
+ {
+ return m_NetworkProperties.m_AsyncEnabled;
+ }
+
profiling::ProfilingGuid GetNetworkGuid();
private:
@@ -67,14 +80,29 @@ private:
const INetworkProperties& networkProperties,
profiling::ProfilingService& profilingService);
+ void CollectInputTensorHandles(std::unordered_map<LayerGuid, std::vector<ITensorHandle*> >& tensorHandles,
+ std::vector<ITensorHandle*>& inputs,
+ const armnn::Layer* layer,
+ const TensorHandleFactoryRegistry& registry,
+ const bool isMemoryManaged = false);
+
+ void CreateOutputTensorHandles(std::unordered_map<LayerGuid, std::vector<ITensorHandle*> >& tensorHandles,
+ std::vector<ITensorHandle*>& outputs,
+ const armnn::Layer* layer,
+ const TensorHandleFactoryRegistry& registry,
+ const bool isMemoryManaged = false);
+
void EnqueueInput(const BindableLayer& layer, ITensorHandle* tensorHandle, const TensorInfo& tensorInfo);
void EnqueueOutput(const BindableLayer& layer, ITensorHandle* tensorHandle, const TensorInfo& tensorInfo);
+ void EnqueueInput(const BindableLayer& layer, const ConstTensor& inputTensor, WorkingMemHandle& handle);
+
+ void EnqueueOutput(const BindableLayer& layer, const Tensor& outputTensor, WorkingMemHandle& handle);
+
bool Execute(std::unique_ptr<profiling::TimelineUtilityMethods>& timelineUtils,
profiling::ProfilingGuid inferenceGuid);
-
const IWorkloadFactory& GetWorkloadFactory(const Layer& layer) const;
using BackendPtrMap = std::unordered_map<BackendId, IBackendInternalUniquePtr>;
@@ -96,8 +124,7 @@ private:
mutable std::mutex m_WorkingMemMutex;
bool m_IsWorkingMemAllocated=false;
- bool m_IsImportEnabled=false;
- bool m_IsExportEnabled=false;
+ INetworkProperties m_NetworkProperties;
TensorHandleFactoryRegistry m_TensorHandleFactoryRegistry;
diff --git a/src/armnn/Runtime.cpp b/src/armnn/Runtime.cpp
index 57aaabd277..91a21d4b53 100644
--- a/src/armnn/Runtime.cpp
+++ b/src/armnn/Runtime.cpp
@@ -64,14 +64,6 @@ Status IRuntime::LoadNetwork(NetworkId& networkIdOut,
return pRuntimeImpl->LoadNetwork(networkIdOut, std::move(network), errorMessage, networkProperties);
}
-std::unique_ptr<IAsyncNetwork> IRuntime::CreateAsyncNetwork(NetworkId& networkIdOut,
- IOptimizedNetworkPtr network,
- std::string& errorMessage,
- const INetworkProperties& networkProperties)
-{
- return pRuntimeImpl->CreateAsyncNetwork(networkIdOut, std::move(network), errorMessage, networkProperties);
-}
-
TensorInfo IRuntime::GetInputTensorInfo(NetworkId networkId, LayerBindingId layerId) const
{
return pRuntimeImpl->GetInputTensorInfo(networkId, layerId);
@@ -89,6 +81,13 @@ Status IRuntime::EnqueueWorkload(NetworkId networkId,
return pRuntimeImpl->EnqueueWorkload(networkId, inputTensors, outputTensors);
}
+Status IRuntime::Execute(IWorkingMemHandle& workingMemHandle,
+ const InputTensors& inputTensors,
+ const OutputTensors& outputTensors)
+{
+ return pRuntimeImpl->Execute(workingMemHandle, inputTensors, outputTensors);
+}
+
Status IRuntime::UnloadNetwork(NetworkId networkId)
{
return pRuntimeImpl->UnloadNetwork(networkId);
@@ -99,6 +98,11 @@ const IDeviceSpec& IRuntime::GetDeviceSpec() const
return pRuntimeImpl->GetDeviceSpec();
}
+std::unique_ptr<IWorkingMemHandle> IRuntime::CreateWorkingMemHandle(NetworkId networkId)
+{
+ return pRuntimeImpl->CreateWorkingMemHandle(networkId);
+}
+
const std::shared_ptr<IProfiler> IRuntime::GetProfiler(NetworkId networkId) const
{
return pRuntimeImpl->GetProfiler(networkId);
@@ -173,43 +177,6 @@ Status RuntimeImpl::LoadNetwork(NetworkId& networkIdOut,
return Status::Success;
}
-std::unique_ptr<IAsyncNetwork> RuntimeImpl::CreateAsyncNetwork(NetworkId& networkIdOut,
- IOptimizedNetworkPtr network,
- std::string&,
- const INetworkProperties& networkProperties)
-{
- IOptimizedNetwork* rawNetwork = network.release();
-
- networkIdOut = GenerateNetworkId();
-
- for (auto&& context : m_BackendContexts)
- {
- context.second->BeforeLoadNetwork(networkIdOut);
- }
-
- unique_ptr<IAsyncNetwork> asyncNetwork = std::make_unique<IAsyncNetwork>(
- std::unique_ptr<IOptimizedNetwork>(rawNetwork),
- networkProperties,
- m_ProfilingService);
-
- if (!asyncNetwork)
- {
- return nullptr;
- }
-
- for (auto&& context : m_BackendContexts)
- {
- context.second->AfterLoadNetwork(networkIdOut);
- }
-
- if (m_ProfilingService.IsProfilingEnabled())
- {
- m_ProfilingService.IncrementCounterValue(armnn::profiling::NETWORK_LOADS);
- }
-
- return asyncNetwork;
-}
-
Status RuntimeImpl::UnloadNetwork(NetworkId networkId)
{
bool unloadOk = true;
@@ -430,6 +397,17 @@ Status RuntimeImpl::EnqueueWorkload(NetworkId networkId,
const OutputTensors& outputTensors)
{
LoadedNetwork* loadedNetwork = GetLoadedNetworkPtr(networkId);
+
+ if (!loadedNetwork)
+ {
+ ARMNN_LOG(error) << "A Network with an id of " << networkId << " does not exist.\n";
+ return Status::Failure;
+ }
+ if (loadedNetwork->IsAsyncEnabled())
+ {
+ ARMNN_LOG(error) << "Network " << networkId << " is async enabled.\n";
+ return Status::Failure;
+ }
ProfilerManager::GetInstance().RegisterProfiler(loadedNetwork->GetProfiler().get());
ARMNN_SCOPED_PROFILING_EVENT(Compute::Undefined, "EnqueueWorkload");
@@ -447,6 +425,73 @@ Status RuntimeImpl::EnqueueWorkload(NetworkId networkId,
return loadedNetwork->EnqueueWorkload(inputTensors, outputTensors);
}
+Status RuntimeImpl::Execute(IWorkingMemHandle& iWorkingMemHandle,
+ const InputTensors& inputTensors,
+ const OutputTensors& outputTensors)
+{
+ NetworkId networkId = iWorkingMemHandle.GetNetworkId();
+ LoadedNetwork* loadedNetwork = GetLoadedNetworkPtr(networkId);
+
+ if (!loadedNetwork)
+ {
+ ARMNN_LOG(error) << "A Network with an id of " << networkId << " does not exist.\n";
+ return Status::Failure;
+ }
+ if (!loadedNetwork->IsAsyncEnabled())
+ {
+ ARMNN_LOG(error) << "Network " << networkId << " is not async enabled.\n";
+ return Status::Failure;
+ }
+ ProfilerManager::GetInstance().RegisterProfiler(loadedNetwork->GetProfiler().get());
+
+ ARMNN_SCOPED_PROFILING_EVENT(Compute::Undefined, "Execute");
+
+ static thread_local NetworkId lastId = networkId;
+ if (lastId != networkId)
+ {
+ LoadedNetworkFuncSafe(lastId, [](LoadedNetwork* network)
+ {
+ network->FreeWorkingMemory();
+ });
+ }
+ lastId=networkId;
+
+ return loadedNetwork->Execute(inputTensors, outputTensors, iWorkingMemHandle);
+}
+
+/// Create a new unique WorkingMemHandle object. Create multiple handles if you wish to have
+/// overlapped Execution by calling this function from different threads.
+std::unique_ptr<IWorkingMemHandle> RuntimeImpl::CreateWorkingMemHandle(NetworkId networkId)
+{
+ LoadedNetwork* loadedNetwork = GetLoadedNetworkPtr(networkId);
+
+ if (!loadedNetwork)
+ {
+ ARMNN_LOG(error) << "A Network with an id of " << networkId << " does not exist.\n";
+ return nullptr;
+ }
+ if (!loadedNetwork->IsAsyncEnabled())
+ {
+ ARMNN_LOG(error) << "Network " << networkId << " is not async enabled.\n";
+ return nullptr;
+ }
+ ProfilerManager::GetInstance().RegisterProfiler(loadedNetwork->GetProfiler().get());
+
+ ARMNN_SCOPED_PROFILING_EVENT(Compute::Undefined, "CreateWorkingMemHandle");
+
+ static thread_local NetworkId lastId = networkId;
+ if (lastId != networkId)
+ {
+ LoadedNetworkFuncSafe(lastId, [](LoadedNetwork* network)
+ {
+ network->FreeWorkingMemory();
+ });
+ }
+ lastId=networkId;
+
+ return loadedNetwork->CreateWorkingMemHandle(networkId);
+}
+
void RuntimeImpl::RegisterDebugCallback(NetworkId networkId, const DebugCallbackFunction& func)
{
LoadedNetwork* loadedNetwork = GetLoadedNetworkPtr(networkId);
diff --git a/src/armnn/Runtime.hpp b/src/armnn/Runtime.hpp
index 150012eb61..da5445383f 100644
--- a/src/armnn/Runtime.hpp
+++ b/src/armnn/Runtime.hpp
@@ -4,7 +4,6 @@
//
#pragma once
-#include "AsyncNetwork.hpp"
#include "LoadedNetwork.hpp"
#include "DeviceSpec.hpp"
@@ -56,17 +55,14 @@ public:
TensorInfo GetInputTensorInfo(NetworkId networkId, LayerBindingId layerId) const;
TensorInfo GetOutputTensorInfo(NetworkId networkId, LayerBindingId layerId) const;
- // Create Aysnchronous Network from the IOptimizedNetowrkPtr
- std::unique_ptr<IAsyncNetwork> CreateAsyncNetwork(NetworkId& networkIdOut,
- IOptimizedNetworkPtr network,
- std::string& errorMessage,
- const INetworkProperties& networkProperties);
-
-
// Evaluates network using input in inputTensors, outputs filled into outputTensors.
Status EnqueueWorkload(NetworkId networkId,
- const InputTensors& inputTensors,
- const OutputTensors& outputTensors);
+ const InputTensors& inputTensors,
+ const OutputTensors& outputTensors);
+
+ Status Execute(IWorkingMemHandle& workingMemHandle,
+ const InputTensors& inputTensors,
+ const OutputTensors& outputTensors);
/// Unloads a network from the Runtime.
/// At the moment this only removes the network from the m_Impl->m_Network.
@@ -82,6 +78,10 @@ public:
/// @return A pointer to the requested profiler, or nullptr if not found.
const std::shared_ptr<IProfiler> GetProfiler(NetworkId networkId) const;
+ /// Create a new unique WorkingMemHandle object. Create multiple handles if you wish to have
+ /// overlapped Execution by calling this function from different threads.
+ std::unique_ptr<IWorkingMemHandle> CreateWorkingMemHandle(NetworkId networkId);
+
/// Registers a callback function to debug layers performing custom computations on intermediate tensors.
/// @param networkId The id of the network to register the callback.
/// @param func callback function to pass to the debug layer.
diff --git a/src/armnn/WorkingMemHandle.cpp b/src/armnn/WorkingMemHandle.cpp
index 7a901b296b..c1a48d482f 100644
--- a/src/armnn/WorkingMemHandle.cpp
+++ b/src/armnn/WorkingMemHandle.cpp
@@ -13,8 +13,10 @@ namespace armnn
namespace experimental
{
-WorkingMemHandle::WorkingMemHandle(std::vector<WorkingMemDescriptor> workingMemDescriptors,
+WorkingMemHandle::WorkingMemHandle(NetworkId networkId,
+ std::vector<WorkingMemDescriptor> workingMemDescriptors,
std::unordered_map<LayerGuid, WorkingMemDescriptor> workingMemDescriptorMap) :
+ m_NetworkId(networkId),
m_WorkingMemDescriptors(workingMemDescriptors),
m_WorkingMemDescriptorMap(workingMemDescriptorMap),
m_IsAllocated(false),
diff --git a/src/armnn/WorkingMemHandle.hpp b/src/armnn/WorkingMemHandle.hpp
index 090f180206..cef6fb6fd3 100644
--- a/src/armnn/WorkingMemHandle.hpp
+++ b/src/armnn/WorkingMemHandle.hpp
@@ -24,10 +24,17 @@ class WorkingMemHandle final : public IWorkingMemHandle
{
public:
- WorkingMemHandle(std::vector<WorkingMemDescriptor> workingMemDescriptors,
+ WorkingMemHandle(NetworkId networkId,
+ std::vector<WorkingMemDescriptor> workingMemDescriptors,
std::unordered_map<LayerGuid, WorkingMemDescriptor> workingMemDescriptorMap);
- ~WorkingMemHandle() { FreeWorkingMemory(); }
+ ~WorkingMemHandle()
+ { FreeWorkingMemory(); }
+
+ NetworkId GetNetworkId() override
+ {
+ return m_NetworkId;
+ }
/// Allocate the backing memory required for execution. If this is not called, then allocation will be
/// deferred to execution time. The mutex must be locked.
@@ -106,6 +113,7 @@ public:
private:
void FreeWorkingMemory();
+ NetworkId m_NetworkId;
std::shared_ptr<ProfilerImpl> m_Profiler;
std::vector<WorkingMemDescriptor> m_WorkingMemDescriptors;
diff --git a/src/backends/backendsCommon/test/StridedSliceAsyncEndToEndTest.hpp b/src/backends/backendsCommon/test/StridedSliceAsyncEndToEndTest.hpp
index 2ccd2b13af..66ccdbf1d9 100644
--- a/src/backends/backendsCommon/test/StridedSliceAsyncEndToEndTest.hpp
+++ b/src/backends/backendsCommon/test/StridedSliceAsyncEndToEndTest.hpp
@@ -40,15 +40,15 @@ void AsyncEndToEndTestImpl(INetworkPtr network,
// Creates AsyncNetwork
NetworkId networkId = 0;
std::string errorMessage;
- const INetworkProperties networkProperties;
- auto asyncNetwork = runtime->CreateAsyncNetwork(networkId, std::move(optNet), errorMessage, networkProperties);
+ const INetworkProperties networkProperties(false, false, true);
+ runtime->LoadNetwork(networkId, std::move(optNet), errorMessage, networkProperties);
InputTensors inputTensors;
inputTensors.reserve(inputTensorData.size());
for (auto&& it : inputTensorData)
{
inputTensors.push_back({it.first,
- ConstTensor(asyncNetwork->GetInputTensorInfo(it.first), it.second.data())});
+ ConstTensor(runtime->GetInputTensorInfo(networkId, it.first), it.second.data())});
}
OutputTensors outputTensors;
@@ -59,16 +59,16 @@ void AsyncEndToEndTestImpl(INetworkPtr network,
std::vector<TOutput> out(it.second.size());
outputStorage.emplace(it.first, out);
outputTensors.push_back({it.first,
- Tensor(asyncNetwork->GetOutputTensorInfo(it.first),
+ Tensor(runtime->GetOutputTensorInfo(networkId, it.first),
outputStorage.at(it.first).data())});
}
// Create WorkingMemHandle for this async network
- std::unique_ptr<IWorkingMemHandle> workingMemHandle = asyncNetwork->CreateWorkingMemHandle();
+ std::unique_ptr<IWorkingMemHandle> workingMemHandle = runtime->CreateWorkingMemHandle(networkId);
IWorkingMemHandle& workingMemHandleRef = *workingMemHandle.get();
// Run the async network
- asyncNetwork->Execute(inputTensors, outputTensors, workingMemHandleRef);
+ runtime->Execute(workingMemHandleRef, inputTensors, outputTensors);
// Checks the results.
for (auto&& it : expectedOutputData)