diff options
author | Derek Lamberti <derek.lamberti@arm.com> | 2019-06-13 11:40:08 +0100 |
---|---|---|
committer | Derek Lamberti <derek.lamberti@arm.com> | 2019-06-24 15:00:15 +0000 |
commit | 84da38b0f11ca3db0a439e510514be780f3933ff (patch) | |
tree | 56532f4842abc1ad00ae57bc20ddc72cada59b4c | |
parent | 9515c7ec4f4535fff2c8f2d3f88974474d3f3468 (diff) | |
download | armnn-84da38b0f11ca3db0a439e510514be780f3933ff.tar.gz |
IVGCVSW-3277 Refactor TensorHandle factory API
* Added backend support for multiple types of TensorHandle factories
* Refactored the backend API to enable new tensor strategies
* Added mechanism to determine memory strategies during optimization
* Perform mem-copy only when Direct access is not found
* Explicitly deleted the copy-constructor from OutputSlot to prevent
accidental local copies that would cause the DisconnectAll to be
called by the destructor
Change-Id: I7e812c8e5e6c1c20db1c5932749ac70fd93db7f8
Signed-off-by: Derek Lamberti <derek.lamberti@arm.com>
Signed-off-by: Matteo Martincigh <matteo.martincigh@arm.com>
29 files changed, 1220 insertions, 103 deletions
diff --git a/Android.mk b/Android.mk index 0eb2e011f8..b516e696f9 100644 --- a/Android.mk +++ b/Android.mk @@ -263,7 +263,8 @@ LOCAL_SRC_FILES := \ src/armnn/test/ProfilingEventTest.cpp \ src/armnn/test/ObservableTest.cpp \ src/armnn/test/OptionalTest.cpp \ - src/armnn/test/TestUtils.cpp + src/armnn/test/TestUtils.cpp \ + src/armnn/test/TensorHandleStrategyTest.cpp LOCAL_STATIC_LIBRARIES := \ libneuralnetworks_common \ diff --git a/CMakeLists.txt b/CMakeLists.txt index 8fa2bf904d..836465ab08 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -484,6 +484,7 @@ if(BUILD_UNIT_TESTS) src/armnn/test/RuntimeTests.cpp src/armnn/test/RuntimeTests.hpp src/armnn/test/SubgraphViewTests.cpp + src/armnn/test/TensorHandleStrategyTest.cpp src/armnn/test/TensorHelpers.hpp src/armnn/test/TensorTest.cpp src/armnn/test/TestInputOutputLayerVisitor.cpp diff --git a/src/armnn/Graph.cpp b/src/armnn/Graph.cpp index a5c85092b0..e521623737 100644 --- a/src/armnn/Graph.cpp +++ b/src/armnn/Graph.cpp @@ -7,6 +7,9 @@ #include "SubgraphView.hpp" #include "LayersFwd.hpp" +#include <backendsCommon/IBackendInternal.hpp> + +#include <armnn/BackendId.hpp> #include <armnn/Utils.hpp> #include <armnn/TypesUtils.hpp> @@ -252,53 +255,96 @@ const Graph& Graph::TopologicalSort() const return *this; } -void Graph::AddCopyLayers() +void Graph::AddCopyLayers(std::map<BackendId, std::unique_ptr<IBackendInternal>>& backends, + TensorHandleFactoryRegistry& registry) { // Returns true if the given layer could potentially need an intermediate copy layer (depending on its // connections to other layers). At the time of writing, copy layers will be inserted in the following situations: // CPU -> CL (and viceversa) // CPU -> Neon (and viceversa) auto MayNeedCopyLayer = [](const Layer& layer) - { - // All layers should have been associated with a valid compute device at this point. - BOOST_ASSERT(layer.GetBackendId() != Compute::Undefined); - // Does not need another copy layer if a copy layer is already present. - return layer.GetType() != LayerType::MemCopy && - // Input and Output layers can perform their own copies internally. - layer.GetType() != LayerType::Input && - layer.GetType() != LayerType::Output; - }; - - for (auto&& srcLayer : m_Layers) { - if (MayNeedCopyLayer(*srcLayer)) + // All layers should have been associated with a valid compute device at this point. + BOOST_ASSERT(layer.GetBackendId() != Compute::Undefined); + // Does not need another copy layer if a copy layer is already present. + return layer.GetType() != LayerType::MemCopy; + }; + + ForEachLayer([this, &backends, ®istry, MayNeedCopyLayer](Layer* srcLayer) + { + BOOST_ASSERT(srcLayer); + + if (!MayNeedCopyLayer(*srcLayer)) + { + // The current layer does not need copy layers, move to the next one + return; + } + + const std::vector<OutputSlot>& srcOutputSlots = srcLayer->GetOutputSlots(); + for (unsigned int srcOutputIndex = 0; srcOutputIndex < srcOutputSlots.size(); srcOutputIndex++) { - unsigned int srcOutputIndex = 0; - for (auto&& srcOutput : srcLayer->GetOutputSlots()) + OutputSlot& srcOutputSlot = srcLayer->GetOutputSlot(srcOutputIndex); + const std::vector<InputSlot*> srcConnections = srcOutputSlot.GetConnections(); + for (unsigned int srcConnectionIndex = 0; srcConnectionIndex < srcConnections.size(); srcConnectionIndex++) { - std::vector<InputSlot*> connectionCopy = srcOutput.GetConnections(); - for (auto&& dstInput : connectionCopy) + InputSlot* dstInputSlot = srcConnections[srcConnectionIndex]; + BOOST_ASSERT(dstInputSlot); + + auto strategy = srcOutputSlot.GetMemoryStrategyForConnection(srcConnectionIndex); + BOOST_ASSERT_MSG(strategy != MemoryStrategy::Undefined, + "Undefined memory strategy found while adding copy layers for compatibility"); + + const Layer& dstLayer = dstInputSlot->GetOwningLayer(); + if (MayNeedCopyLayer(dstLayer) && + strategy == MemoryStrategy::CopyToTarget) { - Layer& dstLayer = dstInput->GetOwningLayer(); - if (MayNeedCopyLayer(dstLayer) && (dstLayer.GetBackendId() != srcLayer->GetBackendId())) + // A copy layer is needed in between the source and destination layers. + // Record the operation rather than attempting to modify the graph as we go. + // (invalidating iterators) + const std::string copyLayerName = boost::str(boost::format("[ %1% (%2%) -> %3% (%4%) ]") + % srcLayer->GetName() + % srcOutputIndex + % dstLayer.GetName() + % dstInputSlot->GetSlotIndex()); + + MemCopyLayer* const copyLayer = InsertNewLayer<MemCopyLayer>(*dstInputSlot, copyLayerName.c_str()); + copyLayer->SetBackendId(dstLayer.GetBackendId()); + + OutputSlot& copyOutputSlot = copyLayer->GetOutputSlot(0); + auto backendIt = backends.find(dstLayer.GetBackendId()); + if (backendIt != backends.end() && + backendIt->second && + backendIt->second->SupportsTensorAllocatorAPI()) + { + auto backend = backendIt->second.get(); + auto tensorHandleFactoryIds = backend->GetHandleFactoryPreferences(); + bool found = false; + boost::ignore_unused(found); + + for (auto preference : tensorHandleFactoryIds) + { + auto factory = registry.GetFactory(preference); + if (factory && factory->SupportsMapUnmap()) + { + copyOutputSlot.SetTensorHandleFactory(preference); + found = true; + break; + } + } + + BOOST_ASSERT_MSG(found, "Could not find a mappable TensorHandle for copy layer"); + } + else { - // A copy layer is needed in between the source and destination layers. - // Record the operation rather than attempting to modify the graph as we go. - // (invalidating iterators) - const std::string copyLayerName = boost::str(boost::format("[ %1% (%2%) -> %3% (%4%) ]") - % srcLayer->GetName() - % srcOutputIndex - % dstLayer.GetName() - % dstInput->GetSlotIndex()); - - MemCopyLayer* const copyLayer = InsertNewLayer<MemCopyLayer>(*dstInput, copyLayerName.c_str()); - copyLayer->SetBackendId(dstLayer.GetBackendId()); + copyOutputSlot.SetTensorHandleFactory(ITensorHandleFactory::LegacyFactoryId); } + + copyOutputSlot.SetMemoryStrategy(0, MemoryStrategy::DirectCompatibility); + srcOutputSlot.SetMemoryStrategy(srcConnectionIndex, MemoryStrategy::DirectCompatibility); } - ++srcOutputIndex; } } - } + }); } void Graph::SubstituteSubgraph(SubgraphView& subgraph, IConnectableLayer* substituteLayer) diff --git a/src/armnn/Graph.hpp b/src/armnn/Graph.hpp index 47e0e3b317..f8113375c9 100644 --- a/src/armnn/Graph.hpp +++ b/src/armnn/Graph.hpp @@ -191,7 +191,8 @@ public: /// Modifies the graph in-place, removing edges connecting layers using different compute devices, /// and relinking them via an intermediary copy layers. - void AddCopyLayers(); + void AddCopyLayers(std::map<BackendId, std::unique_ptr<class IBackendInternal>>& backends, + TensorHandleFactoryRegistry& registry); /// Substitutes the given sub-graph with either a new layer or a new sub-graph. /// In either case, the given layer or all the layers in the given sub-graph must belong to this graph. diff --git a/src/armnn/Layer.cpp b/src/armnn/Layer.cpp index ced87b095c..a287220702 100644 --- a/src/armnn/Layer.cpp +++ b/src/armnn/Layer.cpp @@ -30,7 +30,8 @@ void InputSlot::Insert(Layer& layer) // Connects inserted layer to parent. BOOST_ASSERT(layer.GetNumInputSlots() == 1); - prevSlot->Connect(layer.GetInputSlot(0)); + int idx = prevSlot->Connect(layer.GetInputSlot(0)); + prevSlot->SetMemoryStrategy(boost::numeric_cast<unsigned int>(idx), MemoryStrategy::Undefined); // Sets tensor info for inserted layer. const TensorInfo& tensorInfo = prevSlot->GetTensorInfo(); @@ -39,6 +40,7 @@ void InputSlot::Insert(Layer& layer) // Connects inserted layer to this. layer.GetOutputSlot(0).Connect(*this); + layer.GetOutputSlot(0).SetMemoryStrategy(0, MemoryStrategy::Undefined); } const InputSlot* OutputSlot::GetConnection(unsigned int index) const @@ -78,13 +80,24 @@ int OutputSlot::Connect(InputSlot& destination) { destination.SetConnection(this); m_Connections.push_back(&destination); + m_MemoryStrategies.push_back(MemoryStrategy::Undefined); return boost::numeric_cast<int>(m_Connections.size() - 1); } void OutputSlot::Disconnect(InputSlot& slot) { slot.SetConnection(nullptr); + auto it = std::find(m_Connections.begin(), m_Connections.end(), &slot); + + if (it == m_Connections.end()) + { + return; + } + + auto idx = std::distance(m_Connections.begin(), it); m_Connections.erase(std::remove(m_Connections.begin(), m_Connections.end(), &slot), m_Connections.end()); + + m_MemoryStrategies.erase(m_MemoryStrategies.begin() + idx); } void OutputSlot::DisconnectAll() @@ -100,6 +113,9 @@ void OutputSlot::MoveAllConnections(OutputSlot& destination) { while (GetNumConnections() > 0) { + BOOST_ASSERT_MSG(m_MemoryStrategies[0] == MemoryStrategy::Undefined, + "Cannot move connections once memory strategies have be established."); + InputSlot& connection = *GetConnection(0); Disconnect(connection); destination.Connect(connection); @@ -148,6 +164,26 @@ LayerGuid OutputSlot::GetOwningLayerGuid() const return GetOwningLayer().GetGuid(); } +void OutputSlot::SetTensorHandleFactory(const ITensorHandleFactory::FactoryId& id) +{ + m_TensorHandleFactoryId = id; +} + +ITensorHandleFactory::FactoryId OutputSlot::GetTensorHandleFactoryId() const +{ + return m_TensorHandleFactoryId; +} + +void OutputSlot::SetMemoryStrategy(unsigned int connectionIndex, MemoryStrategy strategy) +{ + m_MemoryStrategies[connectionIndex] = strategy; +} + +MemoryStrategy OutputSlot::GetMemoryStrategyForConnection(unsigned int connectionIdx) const +{ + return m_MemoryStrategies[connectionIdx]; +} + namespace { LayerGuid GenerateLayerGuid() { @@ -208,11 +244,26 @@ void Layer::CollectWorkloadOutputs(WorkloadDataCollector& dataCollector, const G } } -void Layer::CreateTensorHandles(Graph& graph, const IWorkloadFactory& factory) +void Layer::CreateTensorHandles(const TensorHandleFactoryRegistry& registry, const IWorkloadFactory& workloadFactory) { - for (auto&& outputHandler : m_OutputHandlers) + for (unsigned int idx=0; idx < GetNumOutputSlots(); idx++) { - outputHandler.CreateTensorHandles(factory); + + OutputSlot& slot = GetOutputSlot(idx); + ITensorHandleFactory::FactoryId factoryId = slot.GetTensorHandleFactoryId(); + + OutputHandler& handler = GetOutputHandler(idx); + if (factoryId == ITensorHandleFactory::LegacyFactoryId) + { + handler.CreateTensorHandles(workloadFactory); + } + else + { + ITensorHandleFactory* handleFactory = registry.GetFactory(factoryId); + BOOST_ASSERT(handleFactory); + + handler.CreateTensorHandles(*handleFactory); + } } } diff --git a/src/armnn/Layer.hpp b/src/armnn/Layer.hpp index cbb1771668..1ddbc00bc7 100644 --- a/src/armnn/Layer.hpp +++ b/src/armnn/Layer.hpp @@ -6,7 +6,9 @@ #include "LayerFwd.hpp" +#include <backendsCommon/ITensorHandleFactory.hpp> #include <backendsCommon/OutputHandler.hpp> +#include <backendsCommon/TensorHandleFactoryRegistry.hpp> #include <backendsCommon/WorkloadDataCollector.hpp> #include <backendsCommon/WorkloadInfo.hpp> #include "InternalTypes.hpp" @@ -84,8 +86,15 @@ public: explicit OutputSlot(Layer& owner, OutputHandler& outputHandler) : m_OwningLayer(owner) , m_OutputHandler(outputHandler) + , m_TensorHandleFactoryId(ITensorHandleFactory::LegacyFactoryId) {} + OutputSlot(const OutputSlot&) = delete; + OutputSlot& operator=(const OutputSlot&) = delete; + + OutputSlot(OutputSlot&&) = default; + OutputSlot& operator=(OutputSlot&&) = default; + ~OutputSlot() { try @@ -147,12 +156,21 @@ public: bool operator==(const OutputSlot& other) const; + void SetTensorHandleFactory(const ITensorHandleFactory::FactoryId& id); + ITensorHandleFactory::FactoryId GetTensorHandleFactoryId() const; + + void SetMemoryStrategy(unsigned int connectionIndex, MemoryStrategy strategy); + MemoryStrategy GetMemoryStrategyForConnection(unsigned int connectionIdx) const; + private: void ValidateConnectionIndex(unsigned int index) const; Layer& m_OwningLayer; OutputHandler& m_OutputHandler; std::vector<InputSlot*> m_Connections; + + ITensorHandleFactory::FactoryId m_TensorHandleFactoryId; + std::vector<MemoryStrategy> m_MemoryStrategies; }; // InputSlot inlines that need OutputSlot declaration. @@ -248,7 +266,7 @@ public: virtual std::unique_ptr<IWorkload> CreateWorkload(const Graph& graph, const IWorkloadFactory& factory) const = 0; - virtual void CreateTensorHandles(Graph& graph, const IWorkloadFactory& factory); + virtual void CreateTensorHandles(const TensorHandleFactoryRegistry& registry, const IWorkloadFactory& factory); /// Creates a dynamically-allocated copy of this layer. /// @param graph - The Graph into which this Layer is being cloned. diff --git a/src/armnn/LoadedNetwork.cpp b/src/armnn/LoadedNetwork.cpp index 3c7dfb07a9..7873e48780 100644 --- a/src/armnn/LoadedNetwork.cpp +++ b/src/armnn/LoadedNetwork.cpp @@ -85,19 +85,37 @@ LoadedNetwork::LoadedNetwork(std::unique_ptr<OptimizedNetwork> net) //(for example the splitter and concat layers). for (auto&& layer : order) { - auto const& backend = layer->GetBackendId(); - if (m_Backends.count(backend) == 0) + auto const& backendId = layer->GetBackendId(); + if (m_Backends.count(backendId) == 0) { - auto createBackend = BackendRegistryInstance().GetFactory(backend); - auto it = m_Backends.emplace(std::make_pair(backend, createBackend())); + auto createBackend = BackendRegistryInstance().GetFactory(backendId); + auto it = m_Backends.emplace(std::make_pair(backendId, createBackend())); - IBackendInternal::IMemoryManagerSharedPtr memoryManager = it.first->second->CreateMemoryManager(); - auto workloadFactory = it.first->second->CreateWorkloadFactory(memoryManager); + IBackendInternal* backend = it.first->second.get(); - m_WorkloadFactories.emplace(std::make_pair(backend, - std::make_pair(std::move(workloadFactory), memoryManager))); + if (backend->SupportsTensorAllocatorAPI()) + { + backend->RegisterTensorHandleFactories(m_TensorHandleFactoryRegistry); + + auto workloadFactory = backend->CreateWorkloadFactory(); + m_WorkloadFactories.emplace( + std::make_pair(backendId, std::make_pair(std::move(workloadFactory), nullptr))); + } + else + { + IBackendInternal::IMemoryManagerSharedPtr memoryManager = backend->CreateMemoryManager(); + auto workloadFactory = backend->CreateWorkloadFactory(memoryManager); + + m_WorkloadFactories.emplace( + std::make_pair(backendId, std::make_pair(std::move(workloadFactory), memoryManager))); + } } - layer->CreateTensorHandles(m_OptimizedNetwork->GetGraph(), GetWorkloadFactory(*layer)); + } + + for (auto&& layer : order) + { + auto& workloadFacory = GetWorkloadFactory(*layer); + layer->CreateTensorHandles(m_TensorHandleFactoryRegistry, workloadFacory); } //Then create workloads. diff --git a/src/armnn/LoadedNetwork.hpp b/src/armnn/LoadedNetwork.hpp index 75af4a4e28..808a93222a 100644 --- a/src/armnn/LoadedNetwork.hpp +++ b/src/armnn/LoadedNetwork.hpp @@ -12,6 +12,7 @@ #include "Profiling.hpp" #include <backendsCommon/IBackendInternal.hpp> +#include <backendsCommon/TensorHandleFactoryRegistry.hpp> #include <backendsCommon/Workload.hpp> #include <backendsCommon/WorkloadFactory.hpp> @@ -83,6 +84,8 @@ private: mutable std::mutex m_WorkingMemMutex; bool m_IsWorkingMemAllocated=false; + + TensorHandleFactoryRegistry m_TensorHandleFactoryRegistry; }; } diff --git a/src/armnn/Network.cpp b/src/armnn/Network.cpp index 9436fc6f9c..58ccfb7813 100644 --- a/src/armnn/Network.cpp +++ b/src/armnn/Network.cpp @@ -16,6 +16,7 @@ #include <backendsCommon/WorkloadFactory.hpp> #include <backendsCommon/BackendRegistry.hpp> #include <backendsCommon/IBackendInternal.hpp> +#include <backendsCommon/TensorHandleFactoryRegistry.hpp> #include <armnn/Exceptions.hpp> #include <armnn/Utils.hpp> @@ -74,16 +75,7 @@ Status OptimizedNetwork::SerializeToDot(std::ostream& stream) const return m_Graph->SerializeToDot(stream); } -struct OptimizationResult -{ - bool m_Warning; - bool m_Error; - OptimizationResult() - : m_Warning(false) - , m_Error(false) - {} -}; void ReportError(const std::string& errorMessage, Optional<std::vector<std::string>&> errorMessages) @@ -323,8 +315,28 @@ OptimizationResult AssignBackends(OptimizedNetwork* optNetObjPtr, errMessages); } +BackendsMap CreateSupportedBackends(TensorHandleFactoryRegistry& handleFactoryRegistry, + BackendSettings& backendSettings) +{ + BackendsMap backends; + auto const& backendRegistry = BackendRegistryInstance(); + for (auto&& selectedBackend : backendSettings.m_SupportedBackends) + { + auto backendFactory = backendRegistry.GetFactory(selectedBackend); + auto backendObjPtr = backendFactory(); + BOOST_ASSERT(backendObjPtr); + + backendObjPtr->RegisterTensorHandleFactories(handleFactoryRegistry); + + backends[backendObjPtr->GetId()] = std::move(backendObjPtr); + } + + return backends; +} + OptimizationResult ApplyBackendOptimizations(OptimizedNetwork* optNetObjPtr, BackendSettings& backendSettings, + BackendsMap& backends, Optional<std::vector<std::string>&> errMessages) { BOOST_ASSERT(optNetObjPtr); @@ -338,11 +350,9 @@ OptimizationResult ApplyBackendOptimizations(OptimizedNetwork* optNetObjPtr, SubgraphView mainSubgraph(optGraph); // Run backend specific optimizations - auto const& backendRegistry = BackendRegistryInstance(); for (auto&& selectedBackend : backendSettings.m_SelectedBackends) { - auto backendFactory = backendRegistry.GetFactory(selectedBackend); - auto backendObjPtr = backendFactory(); + auto backendObjPtr = backends.find(selectedBackend)->second.get(); BOOST_ASSERT(backendObjPtr); // Select sub-graphs based on backend @@ -425,6 +435,359 @@ OptimizationResult ApplyBackendOptimizations(OptimizedNetwork* optNetObjPtr, return result; } +bool RequiresCopy(ITensorHandleFactory::FactoryId src, + ITensorHandleFactory::FactoryId dst, + TensorHandleFactoryRegistry& registry) +{ + if (src != dst) + { + ITensorHandleFactory* srcFactory = registry.GetFactory(src); + ITensorHandleFactory* dstFactory = registry.GetFactory(dst); + + if (srcFactory->SupportsExport() && dstFactory->SupportsImport()) + { + return false; + } + return true; + } + return false; +} + +// Find the handle factory for the input layer which results in fewest required copies. +ITensorHandleFactory::FactoryId CalculateSlotOptionForInput(BackendsMap& backends, + OutputSlot& slot, + TensorHandleFactoryRegistry& registry) +{ + Layer& layer = slot.GetOwningLayer(); + BOOST_ASSERT(layer.GetType() == LayerType::Input); + + // Explicitly select the tensorhandle factory for InputLayer because the rules for it are slightly different. It + // doesn't matter which backend it is assigned to because they all use the same implementation, which + // requires Map/Unmap support. This means that, so long as the handle type supports map/unmap semantics, we can + // select a factory with maximum compatibility with the layers connected to the InputLayer. + + // First ensure the from backends can support the TensorHandeAPI + auto frmBackend = backends.find(layer.GetBackendId()); + if (frmBackend == backends.end() || + !frmBackend->second->SupportsTensorAllocatorAPI()) + { + return ITensorHandleFactory::LegacyFactoryId; + } + + // Go through all connections to the output slot and determine the TensorHandleFactory which results in the + // fewest copies. + std::map<ITensorHandleFactory::FactoryId, int> factoryScores; + int topScore = 0; + ITensorHandleFactory::FactoryId topChoice = ITensorHandleFactory::LegacyFactoryId; + + for (auto&& connection : slot.GetConnections()) + { + const Layer& connectedLayer = connection->GetOwningLayer(); + + auto toBackend = backends.find(connectedLayer.GetBackendId()); + BOOST_ASSERT_MSG(toBackend != backends.end(), "Backend id not found for the connected layer"); + + if (!toBackend->second.get()->SupportsTensorAllocatorAPI()) + { + // The destination backend does not support the tensor allocator API, move to the next one + continue; + } + + auto dstPrefs = toBackend->second.get()->GetHandleFactoryPreferences(); + for (auto&& dst : dstPrefs) + { + // Input layers use the mem copy workload, so the selected factory must support map/unmap API + ITensorHandleFactory* factory = registry.GetFactory(dst); + if (!factory->SupportsMapUnmap()) + { + // The current tensor handle factory does not support the map/unmap strategy, move to the next one + continue; + } + + auto it = factoryScores.find(dst); + if (it == factoryScores.end()) + { + // Add new score to the table + factoryScores[dst] = 0; + if (topChoice == ITensorHandleFactory::LegacyFactoryId) + { + topChoice = dst; + } + } + else + { + // Increase the score + factoryScores[dst]++; + + // Track the best option + if (factoryScores[dst] > topScore) + { + topScore = factoryScores[dst]; + topChoice = dst; + } + } + } + } + + return topChoice; +} + +// Find the handle factory for the output layer which results in fewest required copies. +ITensorHandleFactory::FactoryId CalculateSlotOptionForOutput(BackendsMap& backends, + OutputSlot& slot, + TensorHandleFactoryRegistry& registry) +{ + return ITensorHandleFactory::DeferredFactoryId; +} + +// For all handle factories supported on the source backend, we wish to find the one which requires the fewest copies +// when considering all connections. +ITensorHandleFactory::FactoryId CalculateSlotOption(BackendsMap& backends, + OutputSlot& outputSlot, + TensorHandleFactoryRegistry& registry) +{ + // First ensure the from backends can support the TensorHandeAPI + Layer& layer = outputSlot.GetOwningLayer(); + auto frmBackend = backends.find(layer.GetBackendId()); + if (frmBackend == backends.end() || + !frmBackend->second->SupportsTensorAllocatorAPI()) + { + return ITensorHandleFactory::LegacyFactoryId; + } + + // Connections to Output Layers requires support for map/unmap on the TensorHandle. + bool requiresMapUnmap = false; + for (auto&& connection : outputSlot.GetConnections()) + { + const Layer& connectedLayer = connection->GetOwningLayer(); + if (connectedLayer.GetType() == LayerType::Output) + { + requiresMapUnmap = true; + } + } + + IBackendInternal* srcBackend = frmBackend->second.get(); + auto srcPrefs = srcBackend->GetHandleFactoryPreferences(); + + // Initialize the scores + std::map<ITensorHandleFactory::FactoryId, int> factoryScores; + for (auto&& pref : srcPrefs) + { + if (requiresMapUnmap) // Only consider factories that support map/unmap if required + { + ITensorHandleFactory* factory = registry.GetFactory(pref); + if (!factory->SupportsMapUnmap()) + { + // The current tensor handle factory does not support the map/unmap strategy, move to the next one + continue; + } + } + + auto it = factoryScores.find(pref); + if (it == factoryScores.end()) + { + // Add new score to the table + factoryScores[pref] = 0; + } + } + + // Score each handle factory based on how many times it requires copies on the slot connections + for (auto&& connection : outputSlot.GetConnections()) + { + const Layer& connectedLayer = connection->GetOwningLayer(); + + auto toBackend = backends.find(connectedLayer.GetBackendId()); + BOOST_ASSERT_MSG(toBackend != backends.end(), "Backend id not found for the connected layer"); + + auto dstPrefs = toBackend->second.get()->GetHandleFactoryPreferences(); + for (auto&& src : srcPrefs) + { + if (factoryScores.find(src) == factoryScores.end()) // Don't consider excluded factories + { + continue; + } + + for (auto&& dst : dstPrefs) + { + if (RequiresCopy(src, dst, registry)) + { + // Copy avoided, increase the score + factoryScores[src]++; + break; + } + } + } + } + + // Find the lowest score + int minScore = std::numeric_limits<int>::max(); + for (auto it : factoryScores) + { + minScore = std::min(minScore, it.second); + } + + // Collect factories matching the best(lowest) score + std::vector<ITensorHandleFactory::FactoryId> optimalFactories; + for (auto it : factoryScores) + { + if (it.second == minScore) + { + optimalFactories.push_back(it.first); + } + } + + // For all compatible Factories matching the best score, find the preferred one for the current layer. + for (auto&& srcPref : srcPrefs) + { + for (auto&& comp : optimalFactories) + { + if (comp == srcPref) + { + return comp; + } + } + } + + return ITensorHandleFactory::LegacyFactoryId; +} + +MemoryStrategy CalculateStrategy(BackendsMap& backends, + ITensorHandleFactory::FactoryId srcFactoryId, + const Layer& layer, + const Layer& connectedLayer, + TensorHandleFactoryRegistry& registry) +{ + auto toBackend = backends.find(connectedLayer.GetBackendId()); + BOOST_ASSERT_MSG(toBackend != backends.end(), "Backend id not found for the connected layer"); + + auto dstPrefs = toBackend->second.get()->GetHandleFactoryPreferences(); + + // Legacy API check for backward compatibility + if (srcFactoryId == ITensorHandleFactory::LegacyFactoryId || dstPrefs.empty()) + { + if (layer.GetBackendId() != connectedLayer.GetBackendId()) + { + return MemoryStrategy::CopyToTarget; + } + else + { + return MemoryStrategy::DirectCompatibility; + } + } + + // TensorHandleFactory API present, so perform more sophisticated strategies. + // Dst Output layers don't require copy because they use map/unmap + if (connectedLayer.GetType() == LayerType::Output) + { + return MemoryStrategy::DirectCompatibility; + } + + // Search for direct match in prefs + for (auto&& pref : dstPrefs) + { + if (pref == srcFactoryId) + { + return MemoryStrategy::DirectCompatibility; + } + } + + // Search for export/import options + ITensorHandleFactory* srcFactory = registry.GetFactory(srcFactoryId); + if (srcFactory->SupportsExport()) + { + for (auto&& pref : dstPrefs) + { + ITensorHandleFactory* dstFactory = registry.GetFactory(pref); + if (dstFactory->SupportsImport()) + { + return MemoryStrategy::ExportToTarget; + } + } + } + + // Search for copy options via map/unmap + if (srcFactory->SupportsMapUnmap()) + { + for (auto&& pref : dstPrefs) + { + ITensorHandleFactory* dstFactory = registry.GetFactory(pref); + if (dstFactory->SupportsMapUnmap()) + { + return MemoryStrategy::CopyToTarget; + } + } + } + + return MemoryStrategy::Undefined; +} + +// Select the TensorHandleFactories and the corresponding memory strategy +OptimizationResult SelectTensorHandleStrategy(Graph& optGraph, + BackendsMap& backends, + TensorHandleFactoryRegistry& registry, + Optional<std::vector<std::string>&> errMessages) +{ + OptimizationResult result; + + optGraph.ForEachLayer([&backends, ®istry, &result, &errMessages](Layer* layer) + { + BOOST_ASSERT(layer); + + // Lets make sure the backend is in our list of supported backends. Something went wrong during backend + // assignment if this check fails + BOOST_ASSERT(backends.find(layer->GetBackendId()) != backends.end()); + + // Check each output separately + for (unsigned int slotIdx = 0; slotIdx < layer->GetNumOutputSlots(); slotIdx++) + { + OutputSlot& outputSlot = layer->GetOutputSlot(slotIdx); + + ITensorHandleFactory::FactoryId slotOption = ITensorHandleFactory::LegacyFactoryId; + + // Calculate the factory to use which results in the fewest copies being made. + switch(layer->GetType()) + { + case LayerType::Input: + slotOption = CalculateSlotOptionForInput(backends, outputSlot, registry); + break; + case LayerType::Output: + slotOption = CalculateSlotOptionForOutput(backends, outputSlot, registry); + break; + default: + slotOption = CalculateSlotOption(backends, outputSlot, registry); + break; + } + outputSlot.SetTensorHandleFactory(slotOption); + + // Now determine the "best" memory strategy for each connection given the slotOption. + unsigned int connectionIdx = 0; + for (auto&& connection : outputSlot.GetConnections()) + { + const Layer& connectedLayer = connection->GetOwningLayer(); + + MemoryStrategy strategy = CalculateStrategy(backends, slotOption, *layer, connectedLayer, registry); + + if (strategy == MemoryStrategy::Undefined) + { + result.m_Error = true; + if (errMessages) + { + errMessages.value().emplace_back("Could not find valid strategy required for compatibility" + " between backends."); + } + return; + } + + outputSlot.SetMemoryStrategy(connectionIdx, strategy); + + connectionIdx++; + } + } + }); + + return result; +} + IOptimizedNetworkPtr Optimize(const INetwork& inNetwork, const std::vector<BackendId>& backendPreferences, const IDeviceSpec& deviceSpec, @@ -476,15 +839,19 @@ IOptimizedNetworkPtr Optimize(const INetwork& inNetwork, return IOptimizedNetworkPtr(nullptr, &IOptimizedNetwork::Destroy); } + // Create a map to temporarily hold initialized backend objects + TensorHandleFactoryRegistry tensorHandleFactoryRegistry; + BackendsMap backends = CreateSupportedBackends(tensorHandleFactoryRegistry, backendSettings); + // Assign an available backend to each layer Graph::Iterator firstLayer = optGraph.begin(); Graph::Iterator lastLayer = optGraph.end(); - OptimizationResult assigBackendsResult = AssignBackends(optNetObjPtr, - backendSettings, - firstLayer, - lastLayer, - errMessages); - if (assigBackendsResult.m_Error) + OptimizationResult assignBackendsResult = AssignBackends(optNetObjPtr, + backendSettings, + firstLayer, + lastLayer, + errMessages); + if (assignBackendsResult.m_Error) { // Failed to assign a backend to each layer return IOptimizedNetworkPtr(nullptr, &IOptimizedNetwork::Destroy); @@ -496,6 +863,7 @@ IOptimizedNetworkPtr Optimize(const INetwork& inNetwork, // Apply the backend-specific optimizations OptimizationResult backendOptimizationResult = ApplyBackendOptimizations(optNetObjPtr, backendSettings, + backends, errMessages); if (backendOptimizationResult.m_Error) { @@ -510,13 +878,25 @@ IOptimizedNetworkPtr Optimize(const INetwork& inNetwork, Optimizer::Pass(optGraph, MakeOptimizations(InsertDebugLayer())); } - optGraph.AddCopyLayers(); + // Calculate the compatibility strategies for tensor handles + OptimizationResult strategyResult = SelectTensorHandleStrategy(optGraph, + backends, + tensorHandleFactoryRegistry, + errMessages); + if (strategyResult.m_Error) + { + // Failed to apply the backend-specific optimizations + return IOptimizedNetworkPtr(nullptr, &IOptimizedNetwork::Destroy); + } + + // Based on the tensor handle strategy determined above, insert copy layers where required. + optGraph.AddCopyLayers(backends, tensorHandleFactoryRegistry); // Convert constants Optimizer::Pass(optGraph, MakeOptimizations(ConvertConstantsFloatToHalf())); Optimizer::Pass(optGraph, MakeOptimizations(ConvertConstantsHalfToFloat())); - // Run backend specific optimizations + // Run backend specific optimizations (deprecated) for (auto&& chosenBackend : backendSettings.m_SelectedBackends) { auto factoryFun = BackendRegistryInstance().GetFactory(chosenBackend); diff --git a/src/armnn/Network.hpp b/src/armnn/Network.hpp index b90e3c2f8d..8db968a3f9 100644 --- a/src/armnn/Network.hpp +++ b/src/armnn/Network.hpp @@ -13,6 +13,7 @@ #include <string> #include <vector> +#include <map> #include <memory> #include "Layer.hpp" @@ -229,4 +230,27 @@ private: std::unique_ptr<Graph> m_Graph; }; + + +struct OptimizationResult +{ + bool m_Warning; + bool m_Error; + + OptimizationResult() + : m_Warning(false) + , m_Error(false) + {} +}; + +using BackendsMap = std::map<BackendId, std::unique_ptr<class IBackendInternal>>; + +BackendsMap CreateSupportedBackends(TensorHandleFactoryRegistry& handleFactoryRegistry, + struct BackendSettings& backendSettings); + +OptimizationResult SelectTensorHandleStrategy(Graph& optGraph, + BackendsMap& backends, + TensorHandleFactoryRegistry& registry, + Optional<std::vector<std::string>&> errMessages); + } // namespace armnn diff --git a/src/armnn/Optimizer.cpp b/src/armnn/Optimizer.cpp index 4d0aae8491..0a31f84654 100644 --- a/src/armnn/Optimizer.cpp +++ b/src/armnn/Optimizer.cpp @@ -28,6 +28,7 @@ void Optimizer::Pass(Graph& graph, const Optimizations& optimizations) --it; for (auto&& optimization : optimizations) { + BOOST_ASSERT(*it); optimization->Run(graph, **it); if ((*it)->IsOutputUnconnected()) diff --git a/src/armnn/layers/ConcatLayer.cpp b/src/armnn/layers/ConcatLayer.cpp index 1d2641cd60..24051a24d2 100644 --- a/src/armnn/layers/ConcatLayer.cpp +++ b/src/armnn/layers/ConcatLayer.cpp @@ -34,7 +34,8 @@ std::unique_ptr<IWorkload> ConcatLayer::CreateWorkload(const Graph& graph, const return factory.CreateConcat(descriptor, PrepInfoAndDesc(descriptor, graph)); } -void ConcatLayer::CreateTensorHandles(Graph& graph, const IWorkloadFactory& factory) +template<typename FactoryType> +void ConcatLayer::CreateTensors(const FactoryType& factory) { //If sub tensors are supported then the concat //just needs to make sure that the outputs of the prev layer @@ -43,6 +44,8 @@ void ConcatLayer::CreateTensorHandles(Graph& graph, const IWorkloadFactory& fact if (factory.SupportsSubTensors()) { + ITensorHandleFactory::FactoryId factoryId = GetOutputSlot(0).GetTensorHandleFactoryId(); + std::queue<ConcatLayer*> m_ConcatLayers; m_ConcatLayers.push(this); @@ -66,7 +69,8 @@ void ConcatLayer::CreateTensorHandles(Graph& graph, const IWorkloadFactory& fact auto CreateSubTensor = [&]() { // Make sure quantization parameters are in the same space - if (parentInfo.IsTypeSpaceMatch(info)) + if (parentInfo.IsTypeSpaceMatch(info) && + factoryId == slot->GetTensorHandleFactoryId()) { return factory.CreateSubTensorHandle(*parentTensor, info.GetShape(), @@ -114,6 +118,24 @@ void ConcatLayer::CreateTensorHandles(Graph& graph, const IWorkloadFactory& fact } } +void ConcatLayer::CreateTensorHandles(const TensorHandleFactoryRegistry& registry, + const IWorkloadFactory& workloadFactory) +{ + OutputSlot& slot = GetOutputSlot(0); + ITensorHandleFactory::FactoryId factoryId = slot.GetTensorHandleFactoryId(); + + if (factoryId == ITensorHandleFactory::LegacyFactoryId) + { + CreateTensors(workloadFactory); + } + else + { + ITensorHandleFactory* handleFactory = registry.GetFactory(factoryId); + BOOST_ASSERT(handleFactory); + CreateTensors(*handleFactory); + } +} + ConcatLayer* ConcatLayer::Clone(Graph& graph) const { return CloneBase<ConcatLayer>(graph, m_Param, GetName()); diff --git a/src/armnn/layers/ConcatLayer.hpp b/src/armnn/layers/ConcatLayer.hpp index 4268291916..eb7d93ce14 100644 --- a/src/armnn/layers/ConcatLayer.hpp +++ b/src/armnn/layers/ConcatLayer.hpp @@ -22,9 +22,11 @@ public: /// Set the outputs to be appropriate sub tensors of the input if sub tensors are supported /// otherwise creates tensor handlers. - /// @param [in] graph The graph where this layer can be found. + /// @param [in] registry Contains all the registered tensor handle factories available for use. /// @param [in] factory The workload factory which will create the workload. - virtual void CreateTensorHandles(Graph& graph, const IWorkloadFactory& factory) override; +// virtual void CreateTensorHandles(Graph& graph, const IWorkloadFactory& factory) override; + virtual void CreateTensorHandles(const TensorHandleFactoryRegistry& registry, + const IWorkloadFactory& factory) override; /// Creates a dynamically-allocated copy of this layer. /// @param [in] graph The graph into which this layer is being cloned. @@ -50,6 +52,11 @@ protected: /// Default destructor ~ConcatLayer() = default; + +private: + template <typename FactoryType> + void CreateTensors(const FactoryType& factory); + }; } // namespace diff --git a/src/armnn/layers/OutputLayer.hpp b/src/armnn/layers/OutputLayer.hpp index b86f8e2dfe..2aa2dbd6c9 100644 --- a/src/armnn/layers/OutputLayer.hpp +++ b/src/armnn/layers/OutputLayer.hpp @@ -22,11 +22,12 @@ public: /// Set the outputs to be appropriate sub tensors of the input if sub tensors are supported /// otherwise creates tensor handlers by default. Ignores parameters for Output type. - /// @param [in] graph The graph where this layer can be found. + /// @param [in] registry Contains all the registered tensor handle factories available for use. /// @param [in] factory The workload factory which will create the workload. - virtual void CreateTensorHandles(Graph& graph, const IWorkloadFactory& factory) override + virtual void CreateTensorHandles(const TensorHandleFactoryRegistry& registry, + const IWorkloadFactory& factory) override { - boost::ignore_unused(graph, factory); + boost::ignore_unused(registry, factory); } /// Creates a dynamically-allocated copy of this layer. diff --git a/src/armnn/layers/SplitterLayer.cpp b/src/armnn/layers/SplitterLayer.cpp index 4a6b2220a7..dc04b3fd15 100644 --- a/src/armnn/layers/SplitterLayer.cpp +++ b/src/armnn/layers/SplitterLayer.cpp @@ -32,7 +32,8 @@ std::unique_ptr<IWorkload> SplitterLayer::CreateWorkload(const Graph& graph, con return factory.CreateSplitter(descriptor, PrepInfoAndDesc(descriptor, graph)); } -void SplitterLayer::CreateTensorHandles(Graph& graph, const IWorkloadFactory& factory) +template<typename FactoryType> +void SplitterLayer::CreateTensors(const FactoryType& factory) { //If sub tensors are supported than all the "splitter" need to do is to //set the outputs to be appropriate sub tensors of the input. @@ -40,6 +41,7 @@ void SplitterLayer::CreateTensorHandles(Graph& graph, const IWorkloadFactory& fa if (useSubTensors) { + const OutputSlot* slot = GetInputSlots()[0].GetConnectedOutputSlot(); const OutputHandler& outputHandler = GetInputSlots()[0].GetConnectedOutputSlot()->GetOutputHandler(); const TensorInfo& parentInfo = outputHandler.GetTensorInfo(); @@ -53,10 +55,13 @@ void SplitterLayer::CreateTensorHandles(Graph& graph, const IWorkloadFactory& fa { const TensorInfo& info = m_OutputHandlers[i].GetTensorInfo(); + OutputSlot& outSlot = GetOutputSlot(i); + ITensorHandleFactory::FactoryId factoryId = outSlot.GetTensorHandleFactoryId(); auto CreateSubTensor = [&]() { // Make sure quantization parameters are in the same space - if (parentInfo.IsTypeSpaceMatch(info)) + if (parentInfo.IsTypeSpaceMatch(info) && + factoryId == slot->GetTensorHandleFactoryId()) { return factory.CreateSubTensorHandle(*inputData, info.GetShape(), @@ -95,6 +100,24 @@ void SplitterLayer::CreateTensorHandles(Graph& graph, const IWorkloadFactory& fa } } +void SplitterLayer::CreateTensorHandles(const TensorHandleFactoryRegistry& registry, + const IWorkloadFactory& workloadFactory) +{ + OutputSlot& slot = GetOutputSlot(0); + ITensorHandleFactory::FactoryId factoryId = slot.GetTensorHandleFactoryId(); + + if (factoryId == ITensorHandleFactory::LegacyFactoryId) + { + CreateTensors(workloadFactory); + } + else + { + ITensorHandleFactory* handleFactory = registry.GetFactory(factoryId); + BOOST_ASSERT(handleFactory); + CreateTensors(*handleFactory); + } +} + SplitterLayer* SplitterLayer::Clone(Graph& graph) const { return CloneBase<SplitterLayer>(graph, m_Param, GetName()); diff --git a/src/armnn/layers/SplitterLayer.hpp b/src/armnn/layers/SplitterLayer.hpp index 19b05562e8..9c684d479f 100644 --- a/src/armnn/layers/SplitterLayer.hpp +++ b/src/armnn/layers/SplitterLayer.hpp @@ -22,9 +22,11 @@ public: /// Set the outputs to be appropriate sub tensors of the input if sub tensors are supported /// otherwise creates tensor handlers. - /// @param [in] graph The graph where this layer can be found. + /// @param [in] registry Contains all the registered tensor handle factories available for use. /// @param [in] factory The workload factory which will create the workload. - virtual void CreateTensorHandles(Graph& graph, const IWorkloadFactory& factory) override; + //virtual void CreateTensorHandles(Graph& graph, const IWorkloadFactory& factory) override; + virtual void CreateTensorHandles(const TensorHandleFactoryRegistry& registry, + const IWorkloadFactory& factory) override; /// Creates a dynamically-allocated copy of this layer. /// @param [in] graph The graph into which this layer is being cloned. @@ -50,6 +52,10 @@ protected: /// Default destructor ~SplitterLayer() = default; + +private: + template <typename FactoryType> + void CreateTensors(const FactoryType& factory); }; } // namespace diff --git a/src/armnn/test/CreateWorkload.hpp b/src/armnn/test/CreateWorkload.hpp index b07197797c..47af4a89b5 100644 --- a/src/armnn/test/CreateWorkload.hpp +++ b/src/armnn/test/CreateWorkload.hpp @@ -41,11 +41,13 @@ std::unique_ptr<Workload> MakeAndCheckWorkload(Layer& layer, Graph& graph, const } // Helper function to create tensor handlers for workloads, assuming they all use the same factory. -void CreateTensorHandles(armnn::Graph& graph, armnn::IWorkloadFactory& factory) +void CreateTensorHandles(armnn::Graph& graph, + armnn::IWorkloadFactory& factory) { + TensorHandleFactoryRegistry tmpRegistry; for (auto&& layer : graph.TopologicalSort()) { - layer->CreateTensorHandles(graph, factory); + layer->CreateTensorHandles(tmpRegistry, factory); } } diff --git a/src/armnn/test/GraphTests.cpp b/src/armnn/test/GraphTests.cpp index 44cfa2eaec..ae5ff2232b 100644 --- a/src/armnn/test/GraphTests.cpp +++ b/src/armnn/test/GraphTests.cpp @@ -14,6 +14,8 @@ #include <Layer.hpp> #include <backendsCommon/CpuTensorHandle.hpp> +#include <backendsCommon/IBackendInternal.hpp> +#include <backendsCommon/TensorHandleFactoryRegistry.hpp> /// Checks that first comes before second in the order. @@ -477,10 +479,21 @@ struct CopyLayersFixture outputLayer->SetBackendId(armnn::Compute::CpuRef); softmaxLayer->GetOutputSlot(0).Connect(outputLayer->GetInputSlot(0)); + + // Set the memory strategies + inputLayer->GetOutputSlot(0).SetMemoryStrategy(0, MemoryStrategy::DirectCompatibility); + convLayer1->GetOutputSlot(0).SetMemoryStrategy(0, MemoryStrategy::DirectCompatibility); + convLayer1->GetOutputSlot(0).SetMemoryStrategy(1, MemoryStrategy::DirectCompatibility); + convLayer2->GetOutputSlot(0).SetMemoryStrategy(0, MemoryStrategy::DirectCompatibility); + concatLayer->GetOutputSlot(0).SetMemoryStrategy(0, MemoryStrategy::DirectCompatibility); + actLayer->GetOutputSlot(0).SetMemoryStrategy(0, MemoryStrategy::DirectCompatibility); + softmaxLayer->GetOutputSlot(0).SetMemoryStrategy(0, MemoryStrategy::DirectCompatibility); } armnn::TensorInfo m_TensorDesc; armnn::Graph m_Graph; + std::map<armnn::BackendId, std::unique_ptr<armnn::IBackendInternal>> m_Backends; + armnn::TensorHandleFactoryRegistry m_FactoryRegistry; private: @@ -501,26 +514,26 @@ private: BOOST_FIXTURE_TEST_CASE(AddCopyLayers, CopyLayersFixture) { const armnn::Graph origGraph(m_Graph); - m_Graph.AddCopyLayers(); + m_Graph.AddCopyLayers(m_Backends, m_FactoryRegistry); TestGraphAfterAddingCopyLayers(m_Graph, origGraph); } BOOST_FIXTURE_TEST_CASE(AddCopyLayersSeveralTimes, CopyLayersFixture) { - m_Graph.AddCopyLayers(); + m_Graph.AddCopyLayers(m_Backends, m_FactoryRegistry); // Calling AddCopyLayers() several times should not change the connections. const std::vector<Edge> edges = GetEdgeList(m_Graph); for (int i = 0; i < 4; ++i) { - m_Graph.AddCopyLayers(); + m_Graph.AddCopyLayers(m_Backends, m_FactoryRegistry); const std::vector<Edge> otherEdges = GetEdgeList(m_Graph); BOOST_TEST((edges == otherEdges)); } } -BOOST_AUTO_TEST_CASE(CopyLayersAddedBetweenSameLayersHaveDifferentNames) +BOOST_FIXTURE_TEST_CASE(CopyLayersAddedBetweenSameLayersHaveDifferentNames, CopyLayersFixture) { armnn::Graph graph; @@ -542,7 +555,12 @@ BOOST_AUTO_TEST_CASE(CopyLayersAddedBetweenSameLayersHaveDifferentNames) splitterLayer->GetOutputSlot(1).Connect(additionLayer->GetInputSlot(1)); additionLayer->GetOutputSlot(0).Connect(outputLayer->GetInputSlot(0)); - graph.AddCopyLayers(); + inputLayer->GetOutputSlot(0).SetMemoryStrategy(0, armnn::MemoryStrategy::DirectCompatibility); + splitterLayer->GetOutputSlot(0).SetMemoryStrategy(0, armnn::MemoryStrategy::CopyToTarget); + splitterLayer->GetOutputSlot(1).SetMemoryStrategy(0, armnn::MemoryStrategy::CopyToTarget); + additionLayer->GetOutputSlot(0).SetMemoryStrategy(0, armnn::MemoryStrategy::DirectCompatibility); + + graph.AddCopyLayers(m_Backends, m_FactoryRegistry); std::vector<Edge> edges = GetEdgeList(graph); BOOST_CHECK(edges.size() == 6u); diff --git a/src/armnn/test/TensorHandleStrategyTest.cpp b/src/armnn/test/TensorHandleStrategyTest.cpp new file mode 100644 index 0000000000..3bb1c68169 --- /dev/null +++ b/src/armnn/test/TensorHandleStrategyTest.cpp @@ -0,0 +1,274 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// +#include <boost/test/unit_test.hpp> + +#include <armnn/LayerVisitorBase.hpp> + +#include <backendsCommon/IBackendContext.hpp> +#include <backendsCommon/IBackendInternal.hpp> +#include <backendsCommon/IMemoryManager.hpp> +#include <backendsCommon/ITensorHandleFactory.hpp> +#include <backendsCommon/TensorHandleFactoryRegistry.hpp> + +#include <optimizations/Optimization.hpp> + +#include <Network.hpp> + +#include <vector> +#include <string> + +using namespace armnn; + +class TestMemMgr : public IMemoryManager +{ +public: + TestMemMgr() = default; + + void Acquire() override {} + void Release() override {} +}; + +class TestFactory1 : public ITensorHandleFactory +{ +public: + TestFactory1(std::weak_ptr<IMemoryManager> mgr, ITensorHandleFactory::FactoryId id) + : m_Id(id) + , m_MemMgr(mgr) + {} + + std::unique_ptr<ITensorHandle> CreateSubTensorHandle(ITensorHandle& parent, + TensorShape const& subTensorShape, + unsigned int const* subTensorOrigin) const override + { + return nullptr; + } + + std::unique_ptr<ITensorHandle> CreateTensorHandle(const TensorInfo& tensorInfo) const override + { + return nullptr; + } + + virtual const FactoryId GetId() const override { return m_Id; } + + virtual bool SupportsSubTensors() const override { return true; } + +private: + FactoryId m_Id = "UninitializedId"; + + std::weak_ptr<IMemoryManager> m_MemMgr; +}; + +class TestBackendA : public IBackendInternal +{ +public: + TestBackendA() = default; + + const BackendId& GetId() const override { return m_Id; } + + IWorkloadFactoryPtr CreateWorkloadFactory(const IMemoryManagerSharedPtr& memoryManager = nullptr) const override + { + return IWorkloadFactoryPtr{}; + } + + IBackendInternal::ILayerSupportSharedPtr GetLayerSupport() const override + { + return ILayerSupportSharedPtr{}; + } + + std::vector<ITensorHandleFactory::FactoryId> GetHandleFactoryPreferences() const override + { + return std::vector<ITensorHandleFactory::FactoryId> + { + "TestHandleFactoryA1", + "TestHandleFactoryA2", + "TestHandleFactoryB1" + }; + } + + void RegisterTensorHandleFactories(TensorHandleFactoryRegistry& registry) override + { + auto mgr = std::make_shared<TestMemMgr>(); + + registry.RegisterMemoryManager(mgr); + registry.RegisterFactory(std::make_unique<TestFactory1>(mgr, "TestHandleFactoryA1")); + registry.RegisterFactory(std::make_unique<TestFactory1>(mgr, "TestHandleFactoryA2")); + } + +private: + BackendId m_Id = "BackendA"; +}; + +class TestBackendB : public IBackendInternal +{ +public: + TestBackendB() = default; + + const BackendId& GetId() const override { return m_Id; } + + IWorkloadFactoryPtr CreateWorkloadFactory(const IMemoryManagerSharedPtr& memoryManager = nullptr) const override + { + return IWorkloadFactoryPtr{}; + } + + IBackendInternal::ILayerSupportSharedPtr GetLayerSupport() const override + { + return ILayerSupportSharedPtr{}; + } + + std::vector<ITensorHandleFactory::FactoryId> GetHandleFactoryPreferences() const override + { + return std::vector<ITensorHandleFactory::FactoryId> + { + "TestHandleFactoryB1" + }; + } + + void RegisterTensorHandleFactories(TensorHandleFactoryRegistry& registry) override + { + auto mgr = std::make_shared<TestMemMgr>(); + + registry.RegisterMemoryManager(mgr); + registry.RegisterFactory(std::make_unique<TestFactory1>(mgr, "TestHandleFactoryB1")); + } + +private: + BackendId m_Id = "BackendB"; +}; + +class TestBackendC : public IBackendInternal +{ +public: + TestBackendC() = default; + + const BackendId& GetId() const override { return m_Id; } + + IWorkloadFactoryPtr CreateWorkloadFactory(const IMemoryManagerSharedPtr& memoryManager = nullptr) const override + { + return IWorkloadFactoryPtr{}; + } + + IBackendInternal::ILayerSupportSharedPtr GetLayerSupport() const override + { + return ILayerSupportSharedPtr{}; + } + + std::vector<ITensorHandleFactory::FactoryId> GetHandleFactoryPreferences() const override + { + return std::vector<ITensorHandleFactory::FactoryId>{ + "TestHandleFactoryC1" + }; + } + + void RegisterTensorHandleFactories(TensorHandleFactoryRegistry& registry) override + { + auto mgr = std::make_shared<TestMemMgr>(); + + registry.RegisterMemoryManager(mgr); + registry.RegisterFactory(std::make_unique<TestFactory1>(mgr, "TestHandleFactoryC1")); + } + +private: + BackendId m_Id = "BackendC"; +}; + + +BOOST_AUTO_TEST_SUITE(TensorHandle) + +BOOST_AUTO_TEST_CASE(RegisterFactories) +{ + TestBackendA backendA; + TestBackendB backendB; + + BOOST_TEST(backendA.GetHandleFactoryPreferences()[0] == "TestHandleFactoryA1"); + BOOST_TEST(backendA.GetHandleFactoryPreferences()[1] == "TestHandleFactoryA2"); + BOOST_TEST(backendA.GetHandleFactoryPreferences()[2] == "TestHandleFactoryB1"); + + TensorHandleFactoryRegistry registry; + backendA.RegisterTensorHandleFactories(registry); + backendB.RegisterTensorHandleFactories(registry); + + BOOST_TEST((registry.GetFactory("Non-existing Backend") == nullptr)); + BOOST_TEST((registry.GetFactory("TestHandleFactoryA1") != nullptr)); + BOOST_TEST((registry.GetFactory("TestHandleFactoryA2") != nullptr)); + BOOST_TEST((registry.GetFactory("TestHandleFactoryB1") != nullptr)); +} + +BOOST_AUTO_TEST_CASE(TensorHandleSelectionStrategy) +{ + auto backendA = std::make_unique<TestBackendA>(); + auto backendB = std::make_unique<TestBackendB>(); + auto backendC = std::make_unique<TestBackendC>(); + + TensorHandleFactoryRegistry registry; + backendA->RegisterTensorHandleFactories(registry); + backendB->RegisterTensorHandleFactories(registry); + backendC->RegisterTensorHandleFactories(registry); + + BackendsMap backends; + backends["BackendA"] = std::move(backendA); + backends["BackendB"] = std::move(backendB); + backends["BackendC"] = std::move(backendC); + + armnn::Graph graph; + + armnn::InputLayer* const inputLayer = graph.AddLayer<armnn::InputLayer>(0, "input"); + inputLayer->SetBackendId("BackendA"); + + armnn::SoftmaxDescriptor smDesc; + armnn::SoftmaxLayer* const softmaxLayer1 = graph.AddLayer<armnn::SoftmaxLayer>(smDesc, "softmax1"); + softmaxLayer1->SetBackendId("BackendA"); + + armnn::SoftmaxLayer* const softmaxLayer2 = graph.AddLayer<armnn::SoftmaxLayer>(smDesc, "softmax2"); + softmaxLayer2->SetBackendId("BackendB"); + + armnn::SoftmaxLayer* const softmaxLayer3 = graph.AddLayer<armnn::SoftmaxLayer>(smDesc, "softmax3"); + softmaxLayer3->SetBackendId("BackendC"); + + armnn::OutputLayer* const outputLayer = graph.AddLayer<armnn::OutputLayer>(0, "output"); + outputLayer->SetBackendId("BackendA"); + + inputLayer->GetOutputSlot(0).Connect(softmaxLayer1->GetInputSlot(0)); + softmaxLayer1->GetOutputSlot(0).Connect(softmaxLayer2->GetInputSlot(0)); + softmaxLayer2->GetOutputSlot(0).Connect(softmaxLayer3->GetInputSlot(0)); + softmaxLayer3->GetOutputSlot(0).Connect(outputLayer->GetInputSlot(0)); + + graph.TopologicalSort(); + + std::vector<std::string> errors; + auto result = SelectTensorHandleStrategy(graph, backends, registry, errors); + + BOOST_TEST(result.m_Error == false); + BOOST_TEST(result.m_Warning == false); + + OutputSlot& inputLayerOut = inputLayer->GetOutputSlot(0); + OutputSlot& softmaxLayer1Out = softmaxLayer1->GetOutputSlot(0); + OutputSlot& softmaxLayer2Out = softmaxLayer2->GetOutputSlot(0); + OutputSlot& softmaxLayer3Out = softmaxLayer3->GetOutputSlot(0); + + // Check that the correct factory was selected + BOOST_TEST(inputLayerOut.GetTensorHandleFactoryId() == "TestHandleFactoryA1"); + BOOST_TEST(softmaxLayer1Out.GetTensorHandleFactoryId() == "TestHandleFactoryB1"); + BOOST_TEST(softmaxLayer2Out.GetTensorHandleFactoryId() == "TestHandleFactoryB1"); + BOOST_TEST(softmaxLayer3Out.GetTensorHandleFactoryId() == "TestHandleFactoryC1"); + + // Check that the correct strategy was selected + BOOST_TEST((inputLayerOut.GetMemoryStrategyForConnection(0) == MemoryStrategy::DirectCompatibility)); + BOOST_TEST((softmaxLayer1Out.GetMemoryStrategyForConnection(0) == MemoryStrategy::DirectCompatibility)); + BOOST_TEST((softmaxLayer2Out.GetMemoryStrategyForConnection(0) == MemoryStrategy::CopyToTarget)); + BOOST_TEST((softmaxLayer3Out.GetMemoryStrategyForConnection(0) == MemoryStrategy::DirectCompatibility)); + + graph.AddCopyLayers(backends, registry); + int count= 0; + graph.ForEachLayer([&count](Layer* layer) + { + if (layer->GetType() == LayerType::MemCopy) + { + count++; + } + }); + BOOST_TEST(count == 1); +} + +BOOST_AUTO_TEST_SUITE_END() diff --git a/src/backends/aclCommon/test/CreateWorkloadClNeon.hpp b/src/backends/aclCommon/test/CreateWorkloadClNeon.hpp index f544c12c30..03bcf32387 100644 --- a/src/backends/aclCommon/test/CreateWorkloadClNeon.hpp +++ b/src/backends/aclCommon/test/CreateWorkloadClNeon.hpp @@ -62,6 +62,7 @@ boost::test_tools::predicate_result CompareTensorHandleShape(IComputeTensorHandl template<typename IComputeTensorHandle> void CreateMemCopyWorkloads(IWorkloadFactory& factory) { + TensorHandleFactoryRegistry registry; Graph graph; RefWorkloadFactory refFactory; @@ -79,10 +80,10 @@ void CreateMemCopyWorkloads(IWorkloadFactory& factory) Connect(layer1, layer2, tensorInfo); Connect(layer2, output, tensorInfo); - input->CreateTensorHandles(graph, refFactory); - layer1->CreateTensorHandles(graph, factory); - layer2->CreateTensorHandles(graph, refFactory); - output->CreateTensorHandles(graph, refFactory); + input->CreateTensorHandles(registry, refFactory); + layer1->CreateTensorHandles(registry, factory); + layer2->CreateTensorHandles(registry, refFactory); + output->CreateTensorHandles(registry, refFactory); // make the workloads and check them auto workload1 = MakeAndCheckWorkload<CopyMemGenericWorkload>(*layer1, graph, factory); diff --git a/src/backends/backendsCommon/CMakeLists.txt b/src/backends/backendsCommon/CMakeLists.txt index e1e387bc6f..bc1c15beef 100644 --- a/src/backends/backendsCommon/CMakeLists.txt +++ b/src/backends/backendsCommon/CMakeLists.txt @@ -11,6 +11,8 @@ list(APPEND armnnBackendsCommon_sources CpuTensorHandle.hpp IBackendInternal.hpp IBackendContext.hpp + ITensorHandleFactory.cpp + ITensorHandleFactory.hpp LayerSupportBase.cpp LayerSupportBase.hpp IMemoryManager.hpp @@ -22,6 +24,8 @@ list(APPEND armnnBackendsCommon_sources OptimizationViews.hpp OutputHandler.cpp OutputHandler.hpp + TensorHandleFactoryRegistry.cpp + TensorHandleFactoryRegistry.hpp WorkloadDataCollector.hpp WorkloadData.cpp WorkloadDataFwd.hpp diff --git a/src/backends/backendsCommon/IBackendInternal.hpp b/src/backends/backendsCommon/IBackendInternal.hpp index fe9d620278..a0d6569949 100644 --- a/src/backends/backendsCommon/IBackendInternal.hpp +++ b/src/backends/backendsCommon/IBackendInternal.hpp @@ -10,7 +10,11 @@ #include <ISubgraphViewConverter.hpp> #include <SubgraphView.hpp> +#include <optimizations/Optimization.hpp> +#include "IBackendContext.hpp" +#include "IMemoryManager.hpp" +#include "ITensorHandleFactory.hpp" #include "OptimizationViews.hpp" #include <vector> @@ -18,9 +22,7 @@ namespace armnn { class IWorkloadFactory; -class IBackendContext; class IMemoryManager; -class Optimization; class ILayerSupport; class IBackendInternal : public IBackend @@ -60,7 +62,10 @@ public: } ARMNN_DEPRECATED_MSG("Use \"OptimizationViews OptimizeSubgraphView(const SubgraphView&)\" instead") - virtual Optimizations GetOptimizations() const = 0; + virtual Optimizations GetOptimizations() const + { + return Optimizations{}; + } ARMNN_DEPRECATED_MSG("Use \"OptimizationViews OptimizeSubgraphView(const SubgraphView&)\" instead") virtual SubGraphUniquePtr OptimizeSubGraph(const SubGraph& subGraph, bool& optimizationAttempted) const @@ -70,12 +75,19 @@ public: } ARMNN_NO_DEPRECATE_WARN_END - virtual IMemoryManagerUniquePtr CreateMemoryManager() const = 0; + + virtual IMemoryManagerUniquePtr CreateMemoryManager() const + { + return IMemoryManagerUniquePtr(); + }; virtual IWorkloadFactoryPtr CreateWorkloadFactory( const IMemoryManagerSharedPtr& memoryManager = nullptr) const = 0; - virtual IBackendContextPtr CreateBackendContext(const IRuntime::CreationOptions&) const = 0; + virtual IBackendContextPtr CreateBackendContext(const IRuntime::CreationOptions&) const + { + return IBackendContextPtr{}; + } virtual ILayerSupportSharedPtr GetLayerSupport() const = 0; @@ -107,6 +119,29 @@ public: } return result; } + + bool SupportsTensorAllocatorAPI() const { return GetHandleFactoryPreferences().empty() == false; } + + ITensorHandleFactory::FactoryId GetBackwardCompatibleFavoriteHandleFactory() + { + auto favorites = GetHandleFactoryPreferences(); + if (favorites.empty()) + { + return ITensorHandleFactory::LegacyFactoryId; + } + return favorites[0]; + } + + /// (Optional) Returns a vector of supported TensorHandleFactory ids in preference order. + virtual std::vector<ITensorHandleFactory::FactoryId> GetHandleFactoryPreferences() const + { + return std::vector<ITensorHandleFactory::FactoryId>(); + } + + /// (Optional) Register TensorHandleFactories + /// Either this method or CreateMemoryManager() and + /// IWorkloadFactory::CreateTensor()/IWorkloadFactory::CreateSubtensor() methods must be implemented. + virtual void RegisterTensorHandleFactories(class TensorHandleFactoryRegistry& registry) {} }; using IBackendInternalUniquePtr = std::unique_ptr<IBackendInternal>; diff --git a/src/backends/backendsCommon/ITensorHandleFactory.cpp b/src/backends/backendsCommon/ITensorHandleFactory.cpp new file mode 100644 index 0000000000..91f5692723 --- /dev/null +++ b/src/backends/backendsCommon/ITensorHandleFactory.cpp @@ -0,0 +1,14 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "ITensorHandleFactory.hpp" + +namespace armnn +{ + +const ITensorHandleFactory::FactoryId ITensorHandleFactory::LegacyFactoryId = "armnn_legacy_factory"; +const ITensorHandleFactory::FactoryId ITensorHandleFactory::DeferredFactoryId = "armnn_deferred_factory"; + +} // namespace armnn diff --git a/src/backends/backendsCommon/ITensorHandleFactory.hpp b/src/backends/backendsCommon/ITensorHandleFactory.hpp new file mode 100644 index 0000000000..7685061eb3 --- /dev/null +++ b/src/backends/backendsCommon/ITensorHandleFactory.hpp @@ -0,0 +1,49 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include <armnn/Types.hpp> +#include <armnn/IRuntime.hpp> + +namespace armnn +{ + +class ITensorHandleFactory +{ +public: + using FactoryId = std::string; + static const FactoryId LegacyFactoryId; // Use the workload factory to create the tensor handle + static const FactoryId DeferredFactoryId; // Some TensorHandleFactory decisions are deferred to run-time + + virtual ~ITensorHandleFactory() {} + + + virtual std::unique_ptr<ITensorHandle> CreateSubTensorHandle(ITensorHandle& parent, + TensorShape const& subTensorShape, + unsigned int const* subTensorOrigin) const = 0; + + virtual std::unique_ptr<ITensorHandle> CreateTensorHandle(const TensorInfo& tensorInfo) const = 0; + + virtual const FactoryId GetId() const = 0; + + virtual bool SupportsSubTensors() const = 0; + + virtual bool SupportsMapUnmap() const final { return true; } + + virtual bool SupportsExport() const final { return false; } + + virtual bool SupportsImport() const final { return false; } +}; + +enum class MemoryStrategy +{ + Undefined, + DirectCompatibility, // Only allocate the tensorhandle using the assigned factory + CopyToTarget, // Default + Insert MemCopy node before target + ExportToTarget, // Default + Insert Import node +}; + +} //namespace armnn diff --git a/src/backends/backendsCommon/OutputHandler.cpp b/src/backends/backendsCommon/OutputHandler.cpp index 2df2fb5181..8f4942d8ba 100644 --- a/src/backends/backendsCommon/OutputHandler.cpp +++ b/src/backends/backendsCommon/OutputHandler.cpp @@ -27,9 +27,9 @@ void OutputHandler::CreateTensorHandles(const IWorkloadFactory& factory) m_TensorHandle = factory.CreateTensorHandle(m_TensorInfo); } -void OutputHandler::CreateTensorHandles(const IWorkloadFactory& factory, DataLayout dataLayout) +void OutputHandler::CreateTensorHandles(const ITensorHandleFactory& factory) { - m_TensorHandle = factory.CreateTensorHandle(m_TensorInfo, dataLayout); + m_TensorHandle = factory.CreateTensorHandle(m_TensorInfo); } void OutputHandler::CollectWorkloadOutputs(WorkloadDataCollector& dataCollector) const diff --git a/src/backends/backendsCommon/OutputHandler.hpp b/src/backends/backendsCommon/OutputHandler.hpp index 240b369fab..01e255deaa 100644 --- a/src/backends/backendsCommon/OutputHandler.hpp +++ b/src/backends/backendsCommon/OutputHandler.hpp @@ -5,6 +5,7 @@ #pragma once #include "ITensorHandle.hpp" +#include "ITensorHandleFactory.hpp" #include <armnn/Descriptors.hpp> #include <armnn/INetwork.hpp> @@ -35,14 +36,10 @@ public: /// @param tensorInfo - TensorInfo for the output. void SetTensorInfo(const TensorInfo& tensorInfo); - /// @brief - Creates tensor handlers used by the intermediate tensors. Does not allocate memory. + /// @brief - Creates tensor handles used by the intermediate tensors. Does not allocate memory. /// @param factory - Factory to be used for handler creation. void CreateTensorHandles(const IWorkloadFactory& factory); - - /// @brief - Creates tensor handlers used by the intermediate tensors. Does not allocate memory. - /// @param factory - Factory to be used for handler creation. - /// @param dataLayout - Data Layout to be used for handler creation. - void CreateTensorHandles(const IWorkloadFactory& factory, DataLayout dataLayout); + void CreateTensorHandles(const ITensorHandleFactory& factory); /// @brief - Gets the matching TensorInfo for the output. /// @return - References to the output TensorInfo. diff --git a/src/backends/backendsCommon/TensorHandleFactoryRegistry.cpp b/src/backends/backendsCommon/TensorHandleFactoryRegistry.cpp new file mode 100644 index 0000000000..4692b9f960 --- /dev/null +++ b/src/backends/backendsCommon/TensorHandleFactoryRegistry.cpp @@ -0,0 +1,69 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "TensorHandleFactoryRegistry.hpp" +#include "IMemoryManager.hpp" + + +namespace armnn +{ + +void TensorHandleFactoryRegistry::RegisterFactory(std::unique_ptr <ITensorHandleFactory> newFactory) +{ + if (!newFactory) + { + return; + } + + ITensorHandleFactory::FactoryId id = newFactory->GetId(); + + // Don't register duplicates + for (auto& registeredFactory : m_Factories) + { + if (id == registeredFactory->GetId()) + { + return; + } + } + + // Take ownership of the new allocator + m_Factories.push_back(std::move(newFactory)); +} + +void TensorHandleFactoryRegistry::RegisterMemoryManager(std::shared_ptr<armnn::IMemoryManager> memoryManger) +{ + m_MemoryManagers.push_back(memoryManger); +} + +ITensorHandleFactory* TensorHandleFactoryRegistry::GetFactory(ITensorHandleFactory::FactoryId id) const +{ + for (auto& factory : m_Factories) + { + if (factory->GetId() == id) + { + return factory.get(); + } + } + + return nullptr; +} + +void TensorHandleFactoryRegistry::AquireMemory() +{ + for (auto& mgr : m_MemoryManagers) + { + mgr->Acquire(); + } +} + +void TensorHandleFactoryRegistry::ReleaseMemory() +{ + for (auto& mgr : m_MemoryManagers) + { + mgr->Release(); + } +} + +} // namespace armnn diff --git a/src/backends/backendsCommon/TensorHandleFactoryRegistry.hpp b/src/backends/backendsCommon/TensorHandleFactoryRegistry.hpp new file mode 100644 index 0000000000..9e02985301 --- /dev/null +++ b/src/backends/backendsCommon/TensorHandleFactoryRegistry.hpp @@ -0,0 +1,49 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include "ITensorHandleFactory.hpp" + +#include <memory> +#include <vector> + +namespace armnn +{ + +//Forward +class IMemoryManager; + +/// +class TensorHandleFactoryRegistry +{ +public: + TensorHandleFactoryRegistry() = default; + + TensorHandleFactoryRegistry(const TensorHandleFactoryRegistry& other) = delete; + TensorHandleFactoryRegistry(TensorHandleFactoryRegistry&& other) = delete; + + /// Register a TensorHandleFactory and transfer ownership + void RegisterFactory(std::unique_ptr<ITensorHandleFactory> allocator); + + /// Register a memory manager with shared ownership + void RegisterMemoryManager(std::shared_ptr<IMemoryManager> memoryManger); + + /// Find a TensorHandleFactory by Id + /// Returns nullptr if not found + ITensorHandleFactory* GetFactory(ITensorHandleFactory::FactoryId id) const; + + /// Aquire memory required for inference + void AquireMemory(); + + /// Release memory required for inference + void ReleaseMemory(); + +private: + std::vector<std::unique_ptr<ITensorHandleFactory>> m_Factories; + std::vector<std::shared_ptr<IMemoryManager>> m_MemoryManagers; +}; + +} // namespace armnn diff --git a/src/backends/backendsCommon/common.mk b/src/backends/backendsCommon/common.mk index 90d3d16ebb..8df5ab9203 100644 --- a/src/backends/backendsCommon/common.mk +++ b/src/backends/backendsCommon/common.mk @@ -10,10 +10,12 @@ COMMON_SOURCES := \ BackendRegistry.cpp \ CpuTensorHandle.cpp \ + ITensorHandleFactory.cpp \ LayerSupportBase.cpp \ MemCopyWorkload.cpp \ OptimizationViews.cpp \ OutputHandler.cpp \ + TensorHandleFactoryRegistry.cpp \ WorkloadData.cpp \ WorkloadFactory.cpp \ WorkloadUtils.cpp |