aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDerek Lamberti <derek.lamberti@arm.com>2019-06-13 11:40:08 +0100
committerDerek Lamberti <derek.lamberti@arm.com>2019-06-24 15:00:15 +0000
commit84da38b0f11ca3db0a439e510514be780f3933ff (patch)
tree56532f4842abc1ad00ae57bc20ddc72cada59b4c
parent9515c7ec4f4535fff2c8f2d3f88974474d3f3468 (diff)
downloadarmnn-84da38b0f11ca3db0a439e510514be780f3933ff.tar.gz
IVGCVSW-3277 Refactor TensorHandle factory API
* Added backend support for multiple types of TensorHandle factories * Refactored the backend API to enable new tensor strategies * Added mechanism to determine memory strategies during optimization * Perform mem-copy only when Direct access is not found * Explicitly deleted the copy-constructor from OutputSlot to prevent accidental local copies that would cause the DisconnectAll to be called by the destructor Change-Id: I7e812c8e5e6c1c20db1c5932749ac70fd93db7f8 Signed-off-by: Derek Lamberti <derek.lamberti@arm.com> Signed-off-by: Matteo Martincigh <matteo.martincigh@arm.com>
-rw-r--r--Android.mk3
-rw-r--r--CMakeLists.txt1
-rw-r--r--src/armnn/Graph.cpp110
-rw-r--r--src/armnn/Graph.hpp3
-rw-r--r--src/armnn/Layer.cpp59
-rw-r--r--src/armnn/Layer.hpp20
-rw-r--r--src/armnn/LoadedNetwork.cpp36
-rw-r--r--src/armnn/LoadedNetwork.hpp3
-rw-r--r--src/armnn/Network.cpp420
-rw-r--r--src/armnn/Network.hpp24
-rw-r--r--src/armnn/Optimizer.cpp1
-rw-r--r--src/armnn/layers/ConcatLayer.cpp26
-rw-r--r--src/armnn/layers/ConcatLayer.hpp11
-rw-r--r--src/armnn/layers/OutputLayer.hpp7
-rw-r--r--src/armnn/layers/SplitterLayer.cpp27
-rw-r--r--src/armnn/layers/SplitterLayer.hpp10
-rw-r--r--src/armnn/test/CreateWorkload.hpp6
-rw-r--r--src/armnn/test/GraphTests.cpp28
-rw-r--r--src/armnn/test/TensorHandleStrategyTest.cpp274
-rw-r--r--src/backends/aclCommon/test/CreateWorkloadClNeon.hpp9
-rw-r--r--src/backends/backendsCommon/CMakeLists.txt4
-rw-r--r--src/backends/backendsCommon/IBackendInternal.hpp45
-rw-r--r--src/backends/backendsCommon/ITensorHandleFactory.cpp14
-rw-r--r--src/backends/backendsCommon/ITensorHandleFactory.hpp49
-rw-r--r--src/backends/backendsCommon/OutputHandler.cpp4
-rw-r--r--src/backends/backendsCommon/OutputHandler.hpp9
-rw-r--r--src/backends/backendsCommon/TensorHandleFactoryRegistry.cpp69
-rw-r--r--src/backends/backendsCommon/TensorHandleFactoryRegistry.hpp49
-rw-r--r--src/backends/backendsCommon/common.mk2
29 files changed, 1220 insertions, 103 deletions
diff --git a/Android.mk b/Android.mk
index 0eb2e011f8..b516e696f9 100644
--- a/Android.mk
+++ b/Android.mk
@@ -263,7 +263,8 @@ LOCAL_SRC_FILES := \
src/armnn/test/ProfilingEventTest.cpp \
src/armnn/test/ObservableTest.cpp \
src/armnn/test/OptionalTest.cpp \
- src/armnn/test/TestUtils.cpp
+ src/armnn/test/TestUtils.cpp \
+ src/armnn/test/TensorHandleStrategyTest.cpp
LOCAL_STATIC_LIBRARIES := \
libneuralnetworks_common \
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 8fa2bf904d..836465ab08 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -484,6 +484,7 @@ if(BUILD_UNIT_TESTS)
src/armnn/test/RuntimeTests.cpp
src/armnn/test/RuntimeTests.hpp
src/armnn/test/SubgraphViewTests.cpp
+ src/armnn/test/TensorHandleStrategyTest.cpp
src/armnn/test/TensorHelpers.hpp
src/armnn/test/TensorTest.cpp
src/armnn/test/TestInputOutputLayerVisitor.cpp
diff --git a/src/armnn/Graph.cpp b/src/armnn/Graph.cpp
index a5c85092b0..e521623737 100644
--- a/src/armnn/Graph.cpp
+++ b/src/armnn/Graph.cpp
@@ -7,6 +7,9 @@
#include "SubgraphView.hpp"
#include "LayersFwd.hpp"
+#include <backendsCommon/IBackendInternal.hpp>
+
+#include <armnn/BackendId.hpp>
#include <armnn/Utils.hpp>
#include <armnn/TypesUtils.hpp>
@@ -252,53 +255,96 @@ const Graph& Graph::TopologicalSort() const
return *this;
}
-void Graph::AddCopyLayers()
+void Graph::AddCopyLayers(std::map<BackendId, std::unique_ptr<IBackendInternal>>& backends,
+ TensorHandleFactoryRegistry& registry)
{
// Returns true if the given layer could potentially need an intermediate copy layer (depending on its
// connections to other layers). At the time of writing, copy layers will be inserted in the following situations:
// CPU -> CL (and viceversa)
// CPU -> Neon (and viceversa)
auto MayNeedCopyLayer = [](const Layer& layer)
- {
- // All layers should have been associated with a valid compute device at this point.
- BOOST_ASSERT(layer.GetBackendId() != Compute::Undefined);
- // Does not need another copy layer if a copy layer is already present.
- return layer.GetType() != LayerType::MemCopy &&
- // Input and Output layers can perform their own copies internally.
- layer.GetType() != LayerType::Input &&
- layer.GetType() != LayerType::Output;
- };
-
- for (auto&& srcLayer : m_Layers)
{
- if (MayNeedCopyLayer(*srcLayer))
+ // All layers should have been associated with a valid compute device at this point.
+ BOOST_ASSERT(layer.GetBackendId() != Compute::Undefined);
+ // Does not need another copy layer if a copy layer is already present.
+ return layer.GetType() != LayerType::MemCopy;
+ };
+
+ ForEachLayer([this, &backends, &registry, MayNeedCopyLayer](Layer* srcLayer)
+ {
+ BOOST_ASSERT(srcLayer);
+
+ if (!MayNeedCopyLayer(*srcLayer))
+ {
+ // The current layer does not need copy layers, move to the next one
+ return;
+ }
+
+ const std::vector<OutputSlot>& srcOutputSlots = srcLayer->GetOutputSlots();
+ for (unsigned int srcOutputIndex = 0; srcOutputIndex < srcOutputSlots.size(); srcOutputIndex++)
{
- unsigned int srcOutputIndex = 0;
- for (auto&& srcOutput : srcLayer->GetOutputSlots())
+ OutputSlot& srcOutputSlot = srcLayer->GetOutputSlot(srcOutputIndex);
+ const std::vector<InputSlot*> srcConnections = srcOutputSlot.GetConnections();
+ for (unsigned int srcConnectionIndex = 0; srcConnectionIndex < srcConnections.size(); srcConnectionIndex++)
{
- std::vector<InputSlot*> connectionCopy = srcOutput.GetConnections();
- for (auto&& dstInput : connectionCopy)
+ InputSlot* dstInputSlot = srcConnections[srcConnectionIndex];
+ BOOST_ASSERT(dstInputSlot);
+
+ auto strategy = srcOutputSlot.GetMemoryStrategyForConnection(srcConnectionIndex);
+ BOOST_ASSERT_MSG(strategy != MemoryStrategy::Undefined,
+ "Undefined memory strategy found while adding copy layers for compatibility");
+
+ const Layer& dstLayer = dstInputSlot->GetOwningLayer();
+ if (MayNeedCopyLayer(dstLayer) &&
+ strategy == MemoryStrategy::CopyToTarget)
{
- Layer& dstLayer = dstInput->GetOwningLayer();
- if (MayNeedCopyLayer(dstLayer) && (dstLayer.GetBackendId() != srcLayer->GetBackendId()))
+ // A copy layer is needed in between the source and destination layers.
+ // Record the operation rather than attempting to modify the graph as we go.
+ // (invalidating iterators)
+ const std::string copyLayerName = boost::str(boost::format("[ %1% (%2%) -> %3% (%4%) ]")
+ % srcLayer->GetName()
+ % srcOutputIndex
+ % dstLayer.GetName()
+ % dstInputSlot->GetSlotIndex());
+
+ MemCopyLayer* const copyLayer = InsertNewLayer<MemCopyLayer>(*dstInputSlot, copyLayerName.c_str());
+ copyLayer->SetBackendId(dstLayer.GetBackendId());
+
+ OutputSlot& copyOutputSlot = copyLayer->GetOutputSlot(0);
+ auto backendIt = backends.find(dstLayer.GetBackendId());
+ if (backendIt != backends.end() &&
+ backendIt->second &&
+ backendIt->second->SupportsTensorAllocatorAPI())
+ {
+ auto backend = backendIt->second.get();
+ auto tensorHandleFactoryIds = backend->GetHandleFactoryPreferences();
+ bool found = false;
+ boost::ignore_unused(found);
+
+ for (auto preference : tensorHandleFactoryIds)
+ {
+ auto factory = registry.GetFactory(preference);
+ if (factory && factory->SupportsMapUnmap())
+ {
+ copyOutputSlot.SetTensorHandleFactory(preference);
+ found = true;
+ break;
+ }
+ }
+
+ BOOST_ASSERT_MSG(found, "Could not find a mappable TensorHandle for copy layer");
+ }
+ else
{
- // A copy layer is needed in between the source and destination layers.
- // Record the operation rather than attempting to modify the graph as we go.
- // (invalidating iterators)
- const std::string copyLayerName = boost::str(boost::format("[ %1% (%2%) -> %3% (%4%) ]")
- % srcLayer->GetName()
- % srcOutputIndex
- % dstLayer.GetName()
- % dstInput->GetSlotIndex());
-
- MemCopyLayer* const copyLayer = InsertNewLayer<MemCopyLayer>(*dstInput, copyLayerName.c_str());
- copyLayer->SetBackendId(dstLayer.GetBackendId());
+ copyOutputSlot.SetTensorHandleFactory(ITensorHandleFactory::LegacyFactoryId);
}
+
+ copyOutputSlot.SetMemoryStrategy(0, MemoryStrategy::DirectCompatibility);
+ srcOutputSlot.SetMemoryStrategy(srcConnectionIndex, MemoryStrategy::DirectCompatibility);
}
- ++srcOutputIndex;
}
}
- }
+ });
}
void Graph::SubstituteSubgraph(SubgraphView& subgraph, IConnectableLayer* substituteLayer)
diff --git a/src/armnn/Graph.hpp b/src/armnn/Graph.hpp
index 47e0e3b317..f8113375c9 100644
--- a/src/armnn/Graph.hpp
+++ b/src/armnn/Graph.hpp
@@ -191,7 +191,8 @@ public:
/// Modifies the graph in-place, removing edges connecting layers using different compute devices,
/// and relinking them via an intermediary copy layers.
- void AddCopyLayers();
+ void AddCopyLayers(std::map<BackendId, std::unique_ptr<class IBackendInternal>>& backends,
+ TensorHandleFactoryRegistry& registry);
/// Substitutes the given sub-graph with either a new layer or a new sub-graph.
/// In either case, the given layer or all the layers in the given sub-graph must belong to this graph.
diff --git a/src/armnn/Layer.cpp b/src/armnn/Layer.cpp
index ced87b095c..a287220702 100644
--- a/src/armnn/Layer.cpp
+++ b/src/armnn/Layer.cpp
@@ -30,7 +30,8 @@ void InputSlot::Insert(Layer& layer)
// Connects inserted layer to parent.
BOOST_ASSERT(layer.GetNumInputSlots() == 1);
- prevSlot->Connect(layer.GetInputSlot(0));
+ int idx = prevSlot->Connect(layer.GetInputSlot(0));
+ prevSlot->SetMemoryStrategy(boost::numeric_cast<unsigned int>(idx), MemoryStrategy::Undefined);
// Sets tensor info for inserted layer.
const TensorInfo& tensorInfo = prevSlot->GetTensorInfo();
@@ -39,6 +40,7 @@ void InputSlot::Insert(Layer& layer)
// Connects inserted layer to this.
layer.GetOutputSlot(0).Connect(*this);
+ layer.GetOutputSlot(0).SetMemoryStrategy(0, MemoryStrategy::Undefined);
}
const InputSlot* OutputSlot::GetConnection(unsigned int index) const
@@ -78,13 +80,24 @@ int OutputSlot::Connect(InputSlot& destination)
{
destination.SetConnection(this);
m_Connections.push_back(&destination);
+ m_MemoryStrategies.push_back(MemoryStrategy::Undefined);
return boost::numeric_cast<int>(m_Connections.size() - 1);
}
void OutputSlot::Disconnect(InputSlot& slot)
{
slot.SetConnection(nullptr);
+ auto it = std::find(m_Connections.begin(), m_Connections.end(), &slot);
+
+ if (it == m_Connections.end())
+ {
+ return;
+ }
+
+ auto idx = std::distance(m_Connections.begin(), it);
m_Connections.erase(std::remove(m_Connections.begin(), m_Connections.end(), &slot), m_Connections.end());
+
+ m_MemoryStrategies.erase(m_MemoryStrategies.begin() + idx);
}
void OutputSlot::DisconnectAll()
@@ -100,6 +113,9 @@ void OutputSlot::MoveAllConnections(OutputSlot& destination)
{
while (GetNumConnections() > 0)
{
+ BOOST_ASSERT_MSG(m_MemoryStrategies[0] == MemoryStrategy::Undefined,
+ "Cannot move connections once memory strategies have be established.");
+
InputSlot& connection = *GetConnection(0);
Disconnect(connection);
destination.Connect(connection);
@@ -148,6 +164,26 @@ LayerGuid OutputSlot::GetOwningLayerGuid() const
return GetOwningLayer().GetGuid();
}
+void OutputSlot::SetTensorHandleFactory(const ITensorHandleFactory::FactoryId& id)
+{
+ m_TensorHandleFactoryId = id;
+}
+
+ITensorHandleFactory::FactoryId OutputSlot::GetTensorHandleFactoryId() const
+{
+ return m_TensorHandleFactoryId;
+}
+
+void OutputSlot::SetMemoryStrategy(unsigned int connectionIndex, MemoryStrategy strategy)
+{
+ m_MemoryStrategies[connectionIndex] = strategy;
+}
+
+MemoryStrategy OutputSlot::GetMemoryStrategyForConnection(unsigned int connectionIdx) const
+{
+ return m_MemoryStrategies[connectionIdx];
+}
+
namespace {
LayerGuid GenerateLayerGuid()
{
@@ -208,11 +244,26 @@ void Layer::CollectWorkloadOutputs(WorkloadDataCollector& dataCollector, const G
}
}
-void Layer::CreateTensorHandles(Graph& graph, const IWorkloadFactory& factory)
+void Layer::CreateTensorHandles(const TensorHandleFactoryRegistry& registry, const IWorkloadFactory& workloadFactory)
{
- for (auto&& outputHandler : m_OutputHandlers)
+ for (unsigned int idx=0; idx < GetNumOutputSlots(); idx++)
{
- outputHandler.CreateTensorHandles(factory);
+
+ OutputSlot& slot = GetOutputSlot(idx);
+ ITensorHandleFactory::FactoryId factoryId = slot.GetTensorHandleFactoryId();
+
+ OutputHandler& handler = GetOutputHandler(idx);
+ if (factoryId == ITensorHandleFactory::LegacyFactoryId)
+ {
+ handler.CreateTensorHandles(workloadFactory);
+ }
+ else
+ {
+ ITensorHandleFactory* handleFactory = registry.GetFactory(factoryId);
+ BOOST_ASSERT(handleFactory);
+
+ handler.CreateTensorHandles(*handleFactory);
+ }
}
}
diff --git a/src/armnn/Layer.hpp b/src/armnn/Layer.hpp
index cbb1771668..1ddbc00bc7 100644
--- a/src/armnn/Layer.hpp
+++ b/src/armnn/Layer.hpp
@@ -6,7 +6,9 @@
#include "LayerFwd.hpp"
+#include <backendsCommon/ITensorHandleFactory.hpp>
#include <backendsCommon/OutputHandler.hpp>
+#include <backendsCommon/TensorHandleFactoryRegistry.hpp>
#include <backendsCommon/WorkloadDataCollector.hpp>
#include <backendsCommon/WorkloadInfo.hpp>
#include "InternalTypes.hpp"
@@ -84,8 +86,15 @@ public:
explicit OutputSlot(Layer& owner, OutputHandler& outputHandler)
: m_OwningLayer(owner)
, m_OutputHandler(outputHandler)
+ , m_TensorHandleFactoryId(ITensorHandleFactory::LegacyFactoryId)
{}
+ OutputSlot(const OutputSlot&) = delete;
+ OutputSlot& operator=(const OutputSlot&) = delete;
+
+ OutputSlot(OutputSlot&&) = default;
+ OutputSlot& operator=(OutputSlot&&) = default;
+
~OutputSlot()
{
try
@@ -147,12 +156,21 @@ public:
bool operator==(const OutputSlot& other) const;
+ void SetTensorHandleFactory(const ITensorHandleFactory::FactoryId& id);
+ ITensorHandleFactory::FactoryId GetTensorHandleFactoryId() const;
+
+ void SetMemoryStrategy(unsigned int connectionIndex, MemoryStrategy strategy);
+ MemoryStrategy GetMemoryStrategyForConnection(unsigned int connectionIdx) const;
+
private:
void ValidateConnectionIndex(unsigned int index) const;
Layer& m_OwningLayer;
OutputHandler& m_OutputHandler;
std::vector<InputSlot*> m_Connections;
+
+ ITensorHandleFactory::FactoryId m_TensorHandleFactoryId;
+ std::vector<MemoryStrategy> m_MemoryStrategies;
};
// InputSlot inlines that need OutputSlot declaration.
@@ -248,7 +266,7 @@ public:
virtual std::unique_ptr<IWorkload> CreateWorkload(const Graph& graph, const IWorkloadFactory& factory) const = 0;
- virtual void CreateTensorHandles(Graph& graph, const IWorkloadFactory& factory);
+ virtual void CreateTensorHandles(const TensorHandleFactoryRegistry& registry, const IWorkloadFactory& factory);
/// Creates a dynamically-allocated copy of this layer.
/// @param graph - The Graph into which this Layer is being cloned.
diff --git a/src/armnn/LoadedNetwork.cpp b/src/armnn/LoadedNetwork.cpp
index 3c7dfb07a9..7873e48780 100644
--- a/src/armnn/LoadedNetwork.cpp
+++ b/src/armnn/LoadedNetwork.cpp
@@ -85,19 +85,37 @@ LoadedNetwork::LoadedNetwork(std::unique_ptr<OptimizedNetwork> net)
//(for example the splitter and concat layers).
for (auto&& layer : order)
{
- auto const& backend = layer->GetBackendId();
- if (m_Backends.count(backend) == 0)
+ auto const& backendId = layer->GetBackendId();
+ if (m_Backends.count(backendId) == 0)
{
- auto createBackend = BackendRegistryInstance().GetFactory(backend);
- auto it = m_Backends.emplace(std::make_pair(backend, createBackend()));
+ auto createBackend = BackendRegistryInstance().GetFactory(backendId);
+ auto it = m_Backends.emplace(std::make_pair(backendId, createBackend()));
- IBackendInternal::IMemoryManagerSharedPtr memoryManager = it.first->second->CreateMemoryManager();
- auto workloadFactory = it.first->second->CreateWorkloadFactory(memoryManager);
+ IBackendInternal* backend = it.first->second.get();
- m_WorkloadFactories.emplace(std::make_pair(backend,
- std::make_pair(std::move(workloadFactory), memoryManager)));
+ if (backend->SupportsTensorAllocatorAPI())
+ {
+ backend->RegisterTensorHandleFactories(m_TensorHandleFactoryRegistry);
+
+ auto workloadFactory = backend->CreateWorkloadFactory();
+ m_WorkloadFactories.emplace(
+ std::make_pair(backendId, std::make_pair(std::move(workloadFactory), nullptr)));
+ }
+ else
+ {
+ IBackendInternal::IMemoryManagerSharedPtr memoryManager = backend->CreateMemoryManager();
+ auto workloadFactory = backend->CreateWorkloadFactory(memoryManager);
+
+ m_WorkloadFactories.emplace(
+ std::make_pair(backendId, std::make_pair(std::move(workloadFactory), memoryManager)));
+ }
}
- layer->CreateTensorHandles(m_OptimizedNetwork->GetGraph(), GetWorkloadFactory(*layer));
+ }
+
+ for (auto&& layer : order)
+ {
+ auto& workloadFacory = GetWorkloadFactory(*layer);
+ layer->CreateTensorHandles(m_TensorHandleFactoryRegistry, workloadFacory);
}
//Then create workloads.
diff --git a/src/armnn/LoadedNetwork.hpp b/src/armnn/LoadedNetwork.hpp
index 75af4a4e28..808a93222a 100644
--- a/src/armnn/LoadedNetwork.hpp
+++ b/src/armnn/LoadedNetwork.hpp
@@ -12,6 +12,7 @@
#include "Profiling.hpp"
#include <backendsCommon/IBackendInternal.hpp>
+#include <backendsCommon/TensorHandleFactoryRegistry.hpp>
#include <backendsCommon/Workload.hpp>
#include <backendsCommon/WorkloadFactory.hpp>
@@ -83,6 +84,8 @@ private:
mutable std::mutex m_WorkingMemMutex;
bool m_IsWorkingMemAllocated=false;
+
+ TensorHandleFactoryRegistry m_TensorHandleFactoryRegistry;
};
}
diff --git a/src/armnn/Network.cpp b/src/armnn/Network.cpp
index 9436fc6f9c..58ccfb7813 100644
--- a/src/armnn/Network.cpp
+++ b/src/armnn/Network.cpp
@@ -16,6 +16,7 @@
#include <backendsCommon/WorkloadFactory.hpp>
#include <backendsCommon/BackendRegistry.hpp>
#include <backendsCommon/IBackendInternal.hpp>
+#include <backendsCommon/TensorHandleFactoryRegistry.hpp>
#include <armnn/Exceptions.hpp>
#include <armnn/Utils.hpp>
@@ -74,16 +75,7 @@ Status OptimizedNetwork::SerializeToDot(std::ostream& stream) const
return m_Graph->SerializeToDot(stream);
}
-struct OptimizationResult
-{
- bool m_Warning;
- bool m_Error;
- OptimizationResult()
- : m_Warning(false)
- , m_Error(false)
- {}
-};
void ReportError(const std::string& errorMessage,
Optional<std::vector<std::string>&> errorMessages)
@@ -323,8 +315,28 @@ OptimizationResult AssignBackends(OptimizedNetwork* optNetObjPtr,
errMessages);
}
+BackendsMap CreateSupportedBackends(TensorHandleFactoryRegistry& handleFactoryRegistry,
+ BackendSettings& backendSettings)
+{
+ BackendsMap backends;
+ auto const& backendRegistry = BackendRegistryInstance();
+ for (auto&& selectedBackend : backendSettings.m_SupportedBackends)
+ {
+ auto backendFactory = backendRegistry.GetFactory(selectedBackend);
+ auto backendObjPtr = backendFactory();
+ BOOST_ASSERT(backendObjPtr);
+
+ backendObjPtr->RegisterTensorHandleFactories(handleFactoryRegistry);
+
+ backends[backendObjPtr->GetId()] = std::move(backendObjPtr);
+ }
+
+ return backends;
+}
+
OptimizationResult ApplyBackendOptimizations(OptimizedNetwork* optNetObjPtr,
BackendSettings& backendSettings,
+ BackendsMap& backends,
Optional<std::vector<std::string>&> errMessages)
{
BOOST_ASSERT(optNetObjPtr);
@@ -338,11 +350,9 @@ OptimizationResult ApplyBackendOptimizations(OptimizedNetwork* optNetObjPtr,
SubgraphView mainSubgraph(optGraph);
// Run backend specific optimizations
- auto const& backendRegistry = BackendRegistryInstance();
for (auto&& selectedBackend : backendSettings.m_SelectedBackends)
{
- auto backendFactory = backendRegistry.GetFactory(selectedBackend);
- auto backendObjPtr = backendFactory();
+ auto backendObjPtr = backends.find(selectedBackend)->second.get();
BOOST_ASSERT(backendObjPtr);
// Select sub-graphs based on backend
@@ -425,6 +435,359 @@ OptimizationResult ApplyBackendOptimizations(OptimizedNetwork* optNetObjPtr,
return result;
}
+bool RequiresCopy(ITensorHandleFactory::FactoryId src,
+ ITensorHandleFactory::FactoryId dst,
+ TensorHandleFactoryRegistry& registry)
+{
+ if (src != dst)
+ {
+ ITensorHandleFactory* srcFactory = registry.GetFactory(src);
+ ITensorHandleFactory* dstFactory = registry.GetFactory(dst);
+
+ if (srcFactory->SupportsExport() && dstFactory->SupportsImport())
+ {
+ return false;
+ }
+ return true;
+ }
+ return false;
+}
+
+// Find the handle factory for the input layer which results in fewest required copies.
+ITensorHandleFactory::FactoryId CalculateSlotOptionForInput(BackendsMap& backends,
+ OutputSlot& slot,
+ TensorHandleFactoryRegistry& registry)
+{
+ Layer& layer = slot.GetOwningLayer();
+ BOOST_ASSERT(layer.GetType() == LayerType::Input);
+
+ // Explicitly select the tensorhandle factory for InputLayer because the rules for it are slightly different. It
+ // doesn't matter which backend it is assigned to because they all use the same implementation, which
+ // requires Map/Unmap support. This means that, so long as the handle type supports map/unmap semantics, we can
+ // select a factory with maximum compatibility with the layers connected to the InputLayer.
+
+ // First ensure the from backends can support the TensorHandeAPI
+ auto frmBackend = backends.find(layer.GetBackendId());
+ if (frmBackend == backends.end() ||
+ !frmBackend->second->SupportsTensorAllocatorAPI())
+ {
+ return ITensorHandleFactory::LegacyFactoryId;
+ }
+
+ // Go through all connections to the output slot and determine the TensorHandleFactory which results in the
+ // fewest copies.
+ std::map<ITensorHandleFactory::FactoryId, int> factoryScores;
+ int topScore = 0;
+ ITensorHandleFactory::FactoryId topChoice = ITensorHandleFactory::LegacyFactoryId;
+
+ for (auto&& connection : slot.GetConnections())
+ {
+ const Layer& connectedLayer = connection->GetOwningLayer();
+
+ auto toBackend = backends.find(connectedLayer.GetBackendId());
+ BOOST_ASSERT_MSG(toBackend != backends.end(), "Backend id not found for the connected layer");
+
+ if (!toBackend->second.get()->SupportsTensorAllocatorAPI())
+ {
+ // The destination backend does not support the tensor allocator API, move to the next one
+ continue;
+ }
+
+ auto dstPrefs = toBackend->second.get()->GetHandleFactoryPreferences();
+ for (auto&& dst : dstPrefs)
+ {
+ // Input layers use the mem copy workload, so the selected factory must support map/unmap API
+ ITensorHandleFactory* factory = registry.GetFactory(dst);
+ if (!factory->SupportsMapUnmap())
+ {
+ // The current tensor handle factory does not support the map/unmap strategy, move to the next one
+ continue;
+ }
+
+ auto it = factoryScores.find(dst);
+ if (it == factoryScores.end())
+ {
+ // Add new score to the table
+ factoryScores[dst] = 0;
+ if (topChoice == ITensorHandleFactory::LegacyFactoryId)
+ {
+ topChoice = dst;
+ }
+ }
+ else
+ {
+ // Increase the score
+ factoryScores[dst]++;
+
+ // Track the best option
+ if (factoryScores[dst] > topScore)
+ {
+ topScore = factoryScores[dst];
+ topChoice = dst;
+ }
+ }
+ }
+ }
+
+ return topChoice;
+}
+
+// Find the handle factory for the output layer which results in fewest required copies.
+ITensorHandleFactory::FactoryId CalculateSlotOptionForOutput(BackendsMap& backends,
+ OutputSlot& slot,
+ TensorHandleFactoryRegistry& registry)
+{
+ return ITensorHandleFactory::DeferredFactoryId;
+}
+
+// For all handle factories supported on the source backend, we wish to find the one which requires the fewest copies
+// when considering all connections.
+ITensorHandleFactory::FactoryId CalculateSlotOption(BackendsMap& backends,
+ OutputSlot& outputSlot,
+ TensorHandleFactoryRegistry& registry)
+{
+ // First ensure the from backends can support the TensorHandeAPI
+ Layer& layer = outputSlot.GetOwningLayer();
+ auto frmBackend = backends.find(layer.GetBackendId());
+ if (frmBackend == backends.end() ||
+ !frmBackend->second->SupportsTensorAllocatorAPI())
+ {
+ return ITensorHandleFactory::LegacyFactoryId;
+ }
+
+ // Connections to Output Layers requires support for map/unmap on the TensorHandle.
+ bool requiresMapUnmap = false;
+ for (auto&& connection : outputSlot.GetConnections())
+ {
+ const Layer& connectedLayer = connection->GetOwningLayer();
+ if (connectedLayer.GetType() == LayerType::Output)
+ {
+ requiresMapUnmap = true;
+ }
+ }
+
+ IBackendInternal* srcBackend = frmBackend->second.get();
+ auto srcPrefs = srcBackend->GetHandleFactoryPreferences();
+
+ // Initialize the scores
+ std::map<ITensorHandleFactory::FactoryId, int> factoryScores;
+ for (auto&& pref : srcPrefs)
+ {
+ if (requiresMapUnmap) // Only consider factories that support map/unmap if required
+ {
+ ITensorHandleFactory* factory = registry.GetFactory(pref);
+ if (!factory->SupportsMapUnmap())
+ {
+ // The current tensor handle factory does not support the map/unmap strategy, move to the next one
+ continue;
+ }
+ }
+
+ auto it = factoryScores.find(pref);
+ if (it == factoryScores.end())
+ {
+ // Add new score to the table
+ factoryScores[pref] = 0;
+ }
+ }
+
+ // Score each handle factory based on how many times it requires copies on the slot connections
+ for (auto&& connection : outputSlot.GetConnections())
+ {
+ const Layer& connectedLayer = connection->GetOwningLayer();
+
+ auto toBackend = backends.find(connectedLayer.GetBackendId());
+ BOOST_ASSERT_MSG(toBackend != backends.end(), "Backend id not found for the connected layer");
+
+ auto dstPrefs = toBackend->second.get()->GetHandleFactoryPreferences();
+ for (auto&& src : srcPrefs)
+ {
+ if (factoryScores.find(src) == factoryScores.end()) // Don't consider excluded factories
+ {
+ continue;
+ }
+
+ for (auto&& dst : dstPrefs)
+ {
+ if (RequiresCopy(src, dst, registry))
+ {
+ // Copy avoided, increase the score
+ factoryScores[src]++;
+ break;
+ }
+ }
+ }
+ }
+
+ // Find the lowest score
+ int minScore = std::numeric_limits<int>::max();
+ for (auto it : factoryScores)
+ {
+ minScore = std::min(minScore, it.second);
+ }
+
+ // Collect factories matching the best(lowest) score
+ std::vector<ITensorHandleFactory::FactoryId> optimalFactories;
+ for (auto it : factoryScores)
+ {
+ if (it.second == minScore)
+ {
+ optimalFactories.push_back(it.first);
+ }
+ }
+
+ // For all compatible Factories matching the best score, find the preferred one for the current layer.
+ for (auto&& srcPref : srcPrefs)
+ {
+ for (auto&& comp : optimalFactories)
+ {
+ if (comp == srcPref)
+ {
+ return comp;
+ }
+ }
+ }
+
+ return ITensorHandleFactory::LegacyFactoryId;
+}
+
+MemoryStrategy CalculateStrategy(BackendsMap& backends,
+ ITensorHandleFactory::FactoryId srcFactoryId,
+ const Layer& layer,
+ const Layer& connectedLayer,
+ TensorHandleFactoryRegistry& registry)
+{
+ auto toBackend = backends.find(connectedLayer.GetBackendId());
+ BOOST_ASSERT_MSG(toBackend != backends.end(), "Backend id not found for the connected layer");
+
+ auto dstPrefs = toBackend->second.get()->GetHandleFactoryPreferences();
+
+ // Legacy API check for backward compatibility
+ if (srcFactoryId == ITensorHandleFactory::LegacyFactoryId || dstPrefs.empty())
+ {
+ if (layer.GetBackendId() != connectedLayer.GetBackendId())
+ {
+ return MemoryStrategy::CopyToTarget;
+ }
+ else
+ {
+ return MemoryStrategy::DirectCompatibility;
+ }
+ }
+
+ // TensorHandleFactory API present, so perform more sophisticated strategies.
+ // Dst Output layers don't require copy because they use map/unmap
+ if (connectedLayer.GetType() == LayerType::Output)
+ {
+ return MemoryStrategy::DirectCompatibility;
+ }
+
+ // Search for direct match in prefs
+ for (auto&& pref : dstPrefs)
+ {
+ if (pref == srcFactoryId)
+ {
+ return MemoryStrategy::DirectCompatibility;
+ }
+ }
+
+ // Search for export/import options
+ ITensorHandleFactory* srcFactory = registry.GetFactory(srcFactoryId);
+ if (srcFactory->SupportsExport())
+ {
+ for (auto&& pref : dstPrefs)
+ {
+ ITensorHandleFactory* dstFactory = registry.GetFactory(pref);
+ if (dstFactory->SupportsImport())
+ {
+ return MemoryStrategy::ExportToTarget;
+ }
+ }
+ }
+
+ // Search for copy options via map/unmap
+ if (srcFactory->SupportsMapUnmap())
+ {
+ for (auto&& pref : dstPrefs)
+ {
+ ITensorHandleFactory* dstFactory = registry.GetFactory(pref);
+ if (dstFactory->SupportsMapUnmap())
+ {
+ return MemoryStrategy::CopyToTarget;
+ }
+ }
+ }
+
+ return MemoryStrategy::Undefined;
+}
+
+// Select the TensorHandleFactories and the corresponding memory strategy
+OptimizationResult SelectTensorHandleStrategy(Graph& optGraph,
+ BackendsMap& backends,
+ TensorHandleFactoryRegistry& registry,
+ Optional<std::vector<std::string>&> errMessages)
+{
+ OptimizationResult result;
+
+ optGraph.ForEachLayer([&backends, &registry, &result, &errMessages](Layer* layer)
+ {
+ BOOST_ASSERT(layer);
+
+ // Lets make sure the backend is in our list of supported backends. Something went wrong during backend
+ // assignment if this check fails
+ BOOST_ASSERT(backends.find(layer->GetBackendId()) != backends.end());
+
+ // Check each output separately
+ for (unsigned int slotIdx = 0; slotIdx < layer->GetNumOutputSlots(); slotIdx++)
+ {
+ OutputSlot& outputSlot = layer->GetOutputSlot(slotIdx);
+
+ ITensorHandleFactory::FactoryId slotOption = ITensorHandleFactory::LegacyFactoryId;
+
+ // Calculate the factory to use which results in the fewest copies being made.
+ switch(layer->GetType())
+ {
+ case LayerType::Input:
+ slotOption = CalculateSlotOptionForInput(backends, outputSlot, registry);
+ break;
+ case LayerType::Output:
+ slotOption = CalculateSlotOptionForOutput(backends, outputSlot, registry);
+ break;
+ default:
+ slotOption = CalculateSlotOption(backends, outputSlot, registry);
+ break;
+ }
+ outputSlot.SetTensorHandleFactory(slotOption);
+
+ // Now determine the "best" memory strategy for each connection given the slotOption.
+ unsigned int connectionIdx = 0;
+ for (auto&& connection : outputSlot.GetConnections())
+ {
+ const Layer& connectedLayer = connection->GetOwningLayer();
+
+ MemoryStrategy strategy = CalculateStrategy(backends, slotOption, *layer, connectedLayer, registry);
+
+ if (strategy == MemoryStrategy::Undefined)
+ {
+ result.m_Error = true;
+ if (errMessages)
+ {
+ errMessages.value().emplace_back("Could not find valid strategy required for compatibility"
+ " between backends.");
+ }
+ return;
+ }
+
+ outputSlot.SetMemoryStrategy(connectionIdx, strategy);
+
+ connectionIdx++;
+ }
+ }
+ });
+
+ return result;
+}
+
IOptimizedNetworkPtr Optimize(const INetwork& inNetwork,
const std::vector<BackendId>& backendPreferences,
const IDeviceSpec& deviceSpec,
@@ -476,15 +839,19 @@ IOptimizedNetworkPtr Optimize(const INetwork& inNetwork,
return IOptimizedNetworkPtr(nullptr, &IOptimizedNetwork::Destroy);
}
+ // Create a map to temporarily hold initialized backend objects
+ TensorHandleFactoryRegistry tensorHandleFactoryRegistry;
+ BackendsMap backends = CreateSupportedBackends(tensorHandleFactoryRegistry, backendSettings);
+
// Assign an available backend to each layer
Graph::Iterator firstLayer = optGraph.begin();
Graph::Iterator lastLayer = optGraph.end();
- OptimizationResult assigBackendsResult = AssignBackends(optNetObjPtr,
- backendSettings,
- firstLayer,
- lastLayer,
- errMessages);
- if (assigBackendsResult.m_Error)
+ OptimizationResult assignBackendsResult = AssignBackends(optNetObjPtr,
+ backendSettings,
+ firstLayer,
+ lastLayer,
+ errMessages);
+ if (assignBackendsResult.m_Error)
{
// Failed to assign a backend to each layer
return IOptimizedNetworkPtr(nullptr, &IOptimizedNetwork::Destroy);
@@ -496,6 +863,7 @@ IOptimizedNetworkPtr Optimize(const INetwork& inNetwork,
// Apply the backend-specific optimizations
OptimizationResult backendOptimizationResult = ApplyBackendOptimizations(optNetObjPtr,
backendSettings,
+ backends,
errMessages);
if (backendOptimizationResult.m_Error)
{
@@ -510,13 +878,25 @@ IOptimizedNetworkPtr Optimize(const INetwork& inNetwork,
Optimizer::Pass(optGraph, MakeOptimizations(InsertDebugLayer()));
}
- optGraph.AddCopyLayers();
+ // Calculate the compatibility strategies for tensor handles
+ OptimizationResult strategyResult = SelectTensorHandleStrategy(optGraph,
+ backends,
+ tensorHandleFactoryRegistry,
+ errMessages);
+ if (strategyResult.m_Error)
+ {
+ // Failed to apply the backend-specific optimizations
+ return IOptimizedNetworkPtr(nullptr, &IOptimizedNetwork::Destroy);
+ }
+
+ // Based on the tensor handle strategy determined above, insert copy layers where required.
+ optGraph.AddCopyLayers(backends, tensorHandleFactoryRegistry);
// Convert constants
Optimizer::Pass(optGraph, MakeOptimizations(ConvertConstantsFloatToHalf()));
Optimizer::Pass(optGraph, MakeOptimizations(ConvertConstantsHalfToFloat()));
- // Run backend specific optimizations
+ // Run backend specific optimizations (deprecated)
for (auto&& chosenBackend : backendSettings.m_SelectedBackends)
{
auto factoryFun = BackendRegistryInstance().GetFactory(chosenBackend);
diff --git a/src/armnn/Network.hpp b/src/armnn/Network.hpp
index b90e3c2f8d..8db968a3f9 100644
--- a/src/armnn/Network.hpp
+++ b/src/armnn/Network.hpp
@@ -13,6 +13,7 @@
#include <string>
#include <vector>
+#include <map>
#include <memory>
#include "Layer.hpp"
@@ -229,4 +230,27 @@ private:
std::unique_ptr<Graph> m_Graph;
};
+
+
+struct OptimizationResult
+{
+ bool m_Warning;
+ bool m_Error;
+
+ OptimizationResult()
+ : m_Warning(false)
+ , m_Error(false)
+ {}
+};
+
+using BackendsMap = std::map<BackendId, std::unique_ptr<class IBackendInternal>>;
+
+BackendsMap CreateSupportedBackends(TensorHandleFactoryRegistry& handleFactoryRegistry,
+ struct BackendSettings& backendSettings);
+
+OptimizationResult SelectTensorHandleStrategy(Graph& optGraph,
+ BackendsMap& backends,
+ TensorHandleFactoryRegistry& registry,
+ Optional<std::vector<std::string>&> errMessages);
+
} // namespace armnn
diff --git a/src/armnn/Optimizer.cpp b/src/armnn/Optimizer.cpp
index 4d0aae8491..0a31f84654 100644
--- a/src/armnn/Optimizer.cpp
+++ b/src/armnn/Optimizer.cpp
@@ -28,6 +28,7 @@ void Optimizer::Pass(Graph& graph, const Optimizations& optimizations)
--it;
for (auto&& optimization : optimizations)
{
+ BOOST_ASSERT(*it);
optimization->Run(graph, **it);
if ((*it)->IsOutputUnconnected())
diff --git a/src/armnn/layers/ConcatLayer.cpp b/src/armnn/layers/ConcatLayer.cpp
index 1d2641cd60..24051a24d2 100644
--- a/src/armnn/layers/ConcatLayer.cpp
+++ b/src/armnn/layers/ConcatLayer.cpp
@@ -34,7 +34,8 @@ std::unique_ptr<IWorkload> ConcatLayer::CreateWorkload(const Graph& graph, const
return factory.CreateConcat(descriptor, PrepInfoAndDesc(descriptor, graph));
}
-void ConcatLayer::CreateTensorHandles(Graph& graph, const IWorkloadFactory& factory)
+template<typename FactoryType>
+void ConcatLayer::CreateTensors(const FactoryType& factory)
{
//If sub tensors are supported then the concat
//just needs to make sure that the outputs of the prev layer
@@ -43,6 +44,8 @@ void ConcatLayer::CreateTensorHandles(Graph& graph, const IWorkloadFactory& fact
if (factory.SupportsSubTensors())
{
+ ITensorHandleFactory::FactoryId factoryId = GetOutputSlot(0).GetTensorHandleFactoryId();
+
std::queue<ConcatLayer*> m_ConcatLayers;
m_ConcatLayers.push(this);
@@ -66,7 +69,8 @@ void ConcatLayer::CreateTensorHandles(Graph& graph, const IWorkloadFactory& fact
auto CreateSubTensor = [&]()
{
// Make sure quantization parameters are in the same space
- if (parentInfo.IsTypeSpaceMatch(info))
+ if (parentInfo.IsTypeSpaceMatch(info) &&
+ factoryId == slot->GetTensorHandleFactoryId())
{
return factory.CreateSubTensorHandle(*parentTensor,
info.GetShape(),
@@ -114,6 +118,24 @@ void ConcatLayer::CreateTensorHandles(Graph& graph, const IWorkloadFactory& fact
}
}
+void ConcatLayer::CreateTensorHandles(const TensorHandleFactoryRegistry& registry,
+ const IWorkloadFactory& workloadFactory)
+{
+ OutputSlot& slot = GetOutputSlot(0);
+ ITensorHandleFactory::FactoryId factoryId = slot.GetTensorHandleFactoryId();
+
+ if (factoryId == ITensorHandleFactory::LegacyFactoryId)
+ {
+ CreateTensors(workloadFactory);
+ }
+ else
+ {
+ ITensorHandleFactory* handleFactory = registry.GetFactory(factoryId);
+ BOOST_ASSERT(handleFactory);
+ CreateTensors(*handleFactory);
+ }
+}
+
ConcatLayer* ConcatLayer::Clone(Graph& graph) const
{
return CloneBase<ConcatLayer>(graph, m_Param, GetName());
diff --git a/src/armnn/layers/ConcatLayer.hpp b/src/armnn/layers/ConcatLayer.hpp
index 4268291916..eb7d93ce14 100644
--- a/src/armnn/layers/ConcatLayer.hpp
+++ b/src/armnn/layers/ConcatLayer.hpp
@@ -22,9 +22,11 @@ public:
/// Set the outputs to be appropriate sub tensors of the input if sub tensors are supported
/// otherwise creates tensor handlers.
- /// @param [in] graph The graph where this layer can be found.
+ /// @param [in] registry Contains all the registered tensor handle factories available for use.
/// @param [in] factory The workload factory which will create the workload.
- virtual void CreateTensorHandles(Graph& graph, const IWorkloadFactory& factory) override;
+// virtual void CreateTensorHandles(Graph& graph, const IWorkloadFactory& factory) override;
+ virtual void CreateTensorHandles(const TensorHandleFactoryRegistry& registry,
+ const IWorkloadFactory& factory) override;
/// Creates a dynamically-allocated copy of this layer.
/// @param [in] graph The graph into which this layer is being cloned.
@@ -50,6 +52,11 @@ protected:
/// Default destructor
~ConcatLayer() = default;
+
+private:
+ template <typename FactoryType>
+ void CreateTensors(const FactoryType& factory);
+
};
} // namespace
diff --git a/src/armnn/layers/OutputLayer.hpp b/src/armnn/layers/OutputLayer.hpp
index b86f8e2dfe..2aa2dbd6c9 100644
--- a/src/armnn/layers/OutputLayer.hpp
+++ b/src/armnn/layers/OutputLayer.hpp
@@ -22,11 +22,12 @@ public:
/// Set the outputs to be appropriate sub tensors of the input if sub tensors are supported
/// otherwise creates tensor handlers by default. Ignores parameters for Output type.
- /// @param [in] graph The graph where this layer can be found.
+ /// @param [in] registry Contains all the registered tensor handle factories available for use.
/// @param [in] factory The workload factory which will create the workload.
- virtual void CreateTensorHandles(Graph& graph, const IWorkloadFactory& factory) override
+ virtual void CreateTensorHandles(const TensorHandleFactoryRegistry& registry,
+ const IWorkloadFactory& factory) override
{
- boost::ignore_unused(graph, factory);
+ boost::ignore_unused(registry, factory);
}
/// Creates a dynamically-allocated copy of this layer.
diff --git a/src/armnn/layers/SplitterLayer.cpp b/src/armnn/layers/SplitterLayer.cpp
index 4a6b2220a7..dc04b3fd15 100644
--- a/src/armnn/layers/SplitterLayer.cpp
+++ b/src/armnn/layers/SplitterLayer.cpp
@@ -32,7 +32,8 @@ std::unique_ptr<IWorkload> SplitterLayer::CreateWorkload(const Graph& graph, con
return factory.CreateSplitter(descriptor, PrepInfoAndDesc(descriptor, graph));
}
-void SplitterLayer::CreateTensorHandles(Graph& graph, const IWorkloadFactory& factory)
+template<typename FactoryType>
+void SplitterLayer::CreateTensors(const FactoryType& factory)
{
//If sub tensors are supported than all the "splitter" need to do is to
//set the outputs to be appropriate sub tensors of the input.
@@ -40,6 +41,7 @@ void SplitterLayer::CreateTensorHandles(Graph& graph, const IWorkloadFactory& fa
if (useSubTensors)
{
+ const OutputSlot* slot = GetInputSlots()[0].GetConnectedOutputSlot();
const OutputHandler& outputHandler = GetInputSlots()[0].GetConnectedOutputSlot()->GetOutputHandler();
const TensorInfo& parentInfo = outputHandler.GetTensorInfo();
@@ -53,10 +55,13 @@ void SplitterLayer::CreateTensorHandles(Graph& graph, const IWorkloadFactory& fa
{
const TensorInfo& info = m_OutputHandlers[i].GetTensorInfo();
+ OutputSlot& outSlot = GetOutputSlot(i);
+ ITensorHandleFactory::FactoryId factoryId = outSlot.GetTensorHandleFactoryId();
auto CreateSubTensor = [&]()
{
// Make sure quantization parameters are in the same space
- if (parentInfo.IsTypeSpaceMatch(info))
+ if (parentInfo.IsTypeSpaceMatch(info) &&
+ factoryId == slot->GetTensorHandleFactoryId())
{
return factory.CreateSubTensorHandle(*inputData,
info.GetShape(),
@@ -95,6 +100,24 @@ void SplitterLayer::CreateTensorHandles(Graph& graph, const IWorkloadFactory& fa
}
}
+void SplitterLayer::CreateTensorHandles(const TensorHandleFactoryRegistry& registry,
+ const IWorkloadFactory& workloadFactory)
+{
+ OutputSlot& slot = GetOutputSlot(0);
+ ITensorHandleFactory::FactoryId factoryId = slot.GetTensorHandleFactoryId();
+
+ if (factoryId == ITensorHandleFactory::LegacyFactoryId)
+ {
+ CreateTensors(workloadFactory);
+ }
+ else
+ {
+ ITensorHandleFactory* handleFactory = registry.GetFactory(factoryId);
+ BOOST_ASSERT(handleFactory);
+ CreateTensors(*handleFactory);
+ }
+}
+
SplitterLayer* SplitterLayer::Clone(Graph& graph) const
{
return CloneBase<SplitterLayer>(graph, m_Param, GetName());
diff --git a/src/armnn/layers/SplitterLayer.hpp b/src/armnn/layers/SplitterLayer.hpp
index 19b05562e8..9c684d479f 100644
--- a/src/armnn/layers/SplitterLayer.hpp
+++ b/src/armnn/layers/SplitterLayer.hpp
@@ -22,9 +22,11 @@ public:
/// Set the outputs to be appropriate sub tensors of the input if sub tensors are supported
/// otherwise creates tensor handlers.
- /// @param [in] graph The graph where this layer can be found.
+ /// @param [in] registry Contains all the registered tensor handle factories available for use.
/// @param [in] factory The workload factory which will create the workload.
- virtual void CreateTensorHandles(Graph& graph, const IWorkloadFactory& factory) override;
+ //virtual void CreateTensorHandles(Graph& graph, const IWorkloadFactory& factory) override;
+ virtual void CreateTensorHandles(const TensorHandleFactoryRegistry& registry,
+ const IWorkloadFactory& factory) override;
/// Creates a dynamically-allocated copy of this layer.
/// @param [in] graph The graph into which this layer is being cloned.
@@ -50,6 +52,10 @@ protected:
/// Default destructor
~SplitterLayer() = default;
+
+private:
+ template <typename FactoryType>
+ void CreateTensors(const FactoryType& factory);
};
} // namespace
diff --git a/src/armnn/test/CreateWorkload.hpp b/src/armnn/test/CreateWorkload.hpp
index b07197797c..47af4a89b5 100644
--- a/src/armnn/test/CreateWorkload.hpp
+++ b/src/armnn/test/CreateWorkload.hpp
@@ -41,11 +41,13 @@ std::unique_ptr<Workload> MakeAndCheckWorkload(Layer& layer, Graph& graph, const
}
// Helper function to create tensor handlers for workloads, assuming they all use the same factory.
-void CreateTensorHandles(armnn::Graph& graph, armnn::IWorkloadFactory& factory)
+void CreateTensorHandles(armnn::Graph& graph,
+ armnn::IWorkloadFactory& factory)
{
+ TensorHandleFactoryRegistry tmpRegistry;
for (auto&& layer : graph.TopologicalSort())
{
- layer->CreateTensorHandles(graph, factory);
+ layer->CreateTensorHandles(tmpRegistry, factory);
}
}
diff --git a/src/armnn/test/GraphTests.cpp b/src/armnn/test/GraphTests.cpp
index 44cfa2eaec..ae5ff2232b 100644
--- a/src/armnn/test/GraphTests.cpp
+++ b/src/armnn/test/GraphTests.cpp
@@ -14,6 +14,8 @@
#include <Layer.hpp>
#include <backendsCommon/CpuTensorHandle.hpp>
+#include <backendsCommon/IBackendInternal.hpp>
+#include <backendsCommon/TensorHandleFactoryRegistry.hpp>
/// Checks that first comes before second in the order.
@@ -477,10 +479,21 @@ struct CopyLayersFixture
outputLayer->SetBackendId(armnn::Compute::CpuRef);
softmaxLayer->GetOutputSlot(0).Connect(outputLayer->GetInputSlot(0));
+
+ // Set the memory strategies
+ inputLayer->GetOutputSlot(0).SetMemoryStrategy(0, MemoryStrategy::DirectCompatibility);
+ convLayer1->GetOutputSlot(0).SetMemoryStrategy(0, MemoryStrategy::DirectCompatibility);
+ convLayer1->GetOutputSlot(0).SetMemoryStrategy(1, MemoryStrategy::DirectCompatibility);
+ convLayer2->GetOutputSlot(0).SetMemoryStrategy(0, MemoryStrategy::DirectCompatibility);
+ concatLayer->GetOutputSlot(0).SetMemoryStrategy(0, MemoryStrategy::DirectCompatibility);
+ actLayer->GetOutputSlot(0).SetMemoryStrategy(0, MemoryStrategy::DirectCompatibility);
+ softmaxLayer->GetOutputSlot(0).SetMemoryStrategy(0, MemoryStrategy::DirectCompatibility);
}
armnn::TensorInfo m_TensorDesc;
armnn::Graph m_Graph;
+ std::map<armnn::BackendId, std::unique_ptr<armnn::IBackendInternal>> m_Backends;
+ armnn::TensorHandleFactoryRegistry m_FactoryRegistry;
private:
@@ -501,26 +514,26 @@ private:
BOOST_FIXTURE_TEST_CASE(AddCopyLayers, CopyLayersFixture)
{
const armnn::Graph origGraph(m_Graph);
- m_Graph.AddCopyLayers();
+ m_Graph.AddCopyLayers(m_Backends, m_FactoryRegistry);
TestGraphAfterAddingCopyLayers(m_Graph, origGraph);
}
BOOST_FIXTURE_TEST_CASE(AddCopyLayersSeveralTimes, CopyLayersFixture)
{
- m_Graph.AddCopyLayers();
+ m_Graph.AddCopyLayers(m_Backends, m_FactoryRegistry);
// Calling AddCopyLayers() several times should not change the connections.
const std::vector<Edge> edges = GetEdgeList(m_Graph);
for (int i = 0; i < 4; ++i)
{
- m_Graph.AddCopyLayers();
+ m_Graph.AddCopyLayers(m_Backends, m_FactoryRegistry);
const std::vector<Edge> otherEdges = GetEdgeList(m_Graph);
BOOST_TEST((edges == otherEdges));
}
}
-BOOST_AUTO_TEST_CASE(CopyLayersAddedBetweenSameLayersHaveDifferentNames)
+BOOST_FIXTURE_TEST_CASE(CopyLayersAddedBetweenSameLayersHaveDifferentNames, CopyLayersFixture)
{
armnn::Graph graph;
@@ -542,7 +555,12 @@ BOOST_AUTO_TEST_CASE(CopyLayersAddedBetweenSameLayersHaveDifferentNames)
splitterLayer->GetOutputSlot(1).Connect(additionLayer->GetInputSlot(1));
additionLayer->GetOutputSlot(0).Connect(outputLayer->GetInputSlot(0));
- graph.AddCopyLayers();
+ inputLayer->GetOutputSlot(0).SetMemoryStrategy(0, armnn::MemoryStrategy::DirectCompatibility);
+ splitterLayer->GetOutputSlot(0).SetMemoryStrategy(0, armnn::MemoryStrategy::CopyToTarget);
+ splitterLayer->GetOutputSlot(1).SetMemoryStrategy(0, armnn::MemoryStrategy::CopyToTarget);
+ additionLayer->GetOutputSlot(0).SetMemoryStrategy(0, armnn::MemoryStrategy::DirectCompatibility);
+
+ graph.AddCopyLayers(m_Backends, m_FactoryRegistry);
std::vector<Edge> edges = GetEdgeList(graph);
BOOST_CHECK(edges.size() == 6u);
diff --git a/src/armnn/test/TensorHandleStrategyTest.cpp b/src/armnn/test/TensorHandleStrategyTest.cpp
new file mode 100644
index 0000000000..3bb1c68169
--- /dev/null
+++ b/src/armnn/test/TensorHandleStrategyTest.cpp
@@ -0,0 +1,274 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+#include <boost/test/unit_test.hpp>
+
+#include <armnn/LayerVisitorBase.hpp>
+
+#include <backendsCommon/IBackendContext.hpp>
+#include <backendsCommon/IBackendInternal.hpp>
+#include <backendsCommon/IMemoryManager.hpp>
+#include <backendsCommon/ITensorHandleFactory.hpp>
+#include <backendsCommon/TensorHandleFactoryRegistry.hpp>
+
+#include <optimizations/Optimization.hpp>
+
+#include <Network.hpp>
+
+#include <vector>
+#include <string>
+
+using namespace armnn;
+
+class TestMemMgr : public IMemoryManager
+{
+public:
+ TestMemMgr() = default;
+
+ void Acquire() override {}
+ void Release() override {}
+};
+
+class TestFactory1 : public ITensorHandleFactory
+{
+public:
+ TestFactory1(std::weak_ptr<IMemoryManager> mgr, ITensorHandleFactory::FactoryId id)
+ : m_Id(id)
+ , m_MemMgr(mgr)
+ {}
+
+ std::unique_ptr<ITensorHandle> CreateSubTensorHandle(ITensorHandle& parent,
+ TensorShape const& subTensorShape,
+ unsigned int const* subTensorOrigin) const override
+ {
+ return nullptr;
+ }
+
+ std::unique_ptr<ITensorHandle> CreateTensorHandle(const TensorInfo& tensorInfo) const override
+ {
+ return nullptr;
+ }
+
+ virtual const FactoryId GetId() const override { return m_Id; }
+
+ virtual bool SupportsSubTensors() const override { return true; }
+
+private:
+ FactoryId m_Id = "UninitializedId";
+
+ std::weak_ptr<IMemoryManager> m_MemMgr;
+};
+
+class TestBackendA : public IBackendInternal
+{
+public:
+ TestBackendA() = default;
+
+ const BackendId& GetId() const override { return m_Id; }
+
+ IWorkloadFactoryPtr CreateWorkloadFactory(const IMemoryManagerSharedPtr& memoryManager = nullptr) const override
+ {
+ return IWorkloadFactoryPtr{};
+ }
+
+ IBackendInternal::ILayerSupportSharedPtr GetLayerSupport() const override
+ {
+ return ILayerSupportSharedPtr{};
+ }
+
+ std::vector<ITensorHandleFactory::FactoryId> GetHandleFactoryPreferences() const override
+ {
+ return std::vector<ITensorHandleFactory::FactoryId>
+ {
+ "TestHandleFactoryA1",
+ "TestHandleFactoryA2",
+ "TestHandleFactoryB1"
+ };
+ }
+
+ void RegisterTensorHandleFactories(TensorHandleFactoryRegistry& registry) override
+ {
+ auto mgr = std::make_shared<TestMemMgr>();
+
+ registry.RegisterMemoryManager(mgr);
+ registry.RegisterFactory(std::make_unique<TestFactory1>(mgr, "TestHandleFactoryA1"));
+ registry.RegisterFactory(std::make_unique<TestFactory1>(mgr, "TestHandleFactoryA2"));
+ }
+
+private:
+ BackendId m_Id = "BackendA";
+};
+
+class TestBackendB : public IBackendInternal
+{
+public:
+ TestBackendB() = default;
+
+ const BackendId& GetId() const override { return m_Id; }
+
+ IWorkloadFactoryPtr CreateWorkloadFactory(const IMemoryManagerSharedPtr& memoryManager = nullptr) const override
+ {
+ return IWorkloadFactoryPtr{};
+ }
+
+ IBackendInternal::ILayerSupportSharedPtr GetLayerSupport() const override
+ {
+ return ILayerSupportSharedPtr{};
+ }
+
+ std::vector<ITensorHandleFactory::FactoryId> GetHandleFactoryPreferences() const override
+ {
+ return std::vector<ITensorHandleFactory::FactoryId>
+ {
+ "TestHandleFactoryB1"
+ };
+ }
+
+ void RegisterTensorHandleFactories(TensorHandleFactoryRegistry& registry) override
+ {
+ auto mgr = std::make_shared<TestMemMgr>();
+
+ registry.RegisterMemoryManager(mgr);
+ registry.RegisterFactory(std::make_unique<TestFactory1>(mgr, "TestHandleFactoryB1"));
+ }
+
+private:
+ BackendId m_Id = "BackendB";
+};
+
+class TestBackendC : public IBackendInternal
+{
+public:
+ TestBackendC() = default;
+
+ const BackendId& GetId() const override { return m_Id; }
+
+ IWorkloadFactoryPtr CreateWorkloadFactory(const IMemoryManagerSharedPtr& memoryManager = nullptr) const override
+ {
+ return IWorkloadFactoryPtr{};
+ }
+
+ IBackendInternal::ILayerSupportSharedPtr GetLayerSupport() const override
+ {
+ return ILayerSupportSharedPtr{};
+ }
+
+ std::vector<ITensorHandleFactory::FactoryId> GetHandleFactoryPreferences() const override
+ {
+ return std::vector<ITensorHandleFactory::FactoryId>{
+ "TestHandleFactoryC1"
+ };
+ }
+
+ void RegisterTensorHandleFactories(TensorHandleFactoryRegistry& registry) override
+ {
+ auto mgr = std::make_shared<TestMemMgr>();
+
+ registry.RegisterMemoryManager(mgr);
+ registry.RegisterFactory(std::make_unique<TestFactory1>(mgr, "TestHandleFactoryC1"));
+ }
+
+private:
+ BackendId m_Id = "BackendC";
+};
+
+
+BOOST_AUTO_TEST_SUITE(TensorHandle)
+
+BOOST_AUTO_TEST_CASE(RegisterFactories)
+{
+ TestBackendA backendA;
+ TestBackendB backendB;
+
+ BOOST_TEST(backendA.GetHandleFactoryPreferences()[0] == "TestHandleFactoryA1");
+ BOOST_TEST(backendA.GetHandleFactoryPreferences()[1] == "TestHandleFactoryA2");
+ BOOST_TEST(backendA.GetHandleFactoryPreferences()[2] == "TestHandleFactoryB1");
+
+ TensorHandleFactoryRegistry registry;
+ backendA.RegisterTensorHandleFactories(registry);
+ backendB.RegisterTensorHandleFactories(registry);
+
+ BOOST_TEST((registry.GetFactory("Non-existing Backend") == nullptr));
+ BOOST_TEST((registry.GetFactory("TestHandleFactoryA1") != nullptr));
+ BOOST_TEST((registry.GetFactory("TestHandleFactoryA2") != nullptr));
+ BOOST_TEST((registry.GetFactory("TestHandleFactoryB1") != nullptr));
+}
+
+BOOST_AUTO_TEST_CASE(TensorHandleSelectionStrategy)
+{
+ auto backendA = std::make_unique<TestBackendA>();
+ auto backendB = std::make_unique<TestBackendB>();
+ auto backendC = std::make_unique<TestBackendC>();
+
+ TensorHandleFactoryRegistry registry;
+ backendA->RegisterTensorHandleFactories(registry);
+ backendB->RegisterTensorHandleFactories(registry);
+ backendC->RegisterTensorHandleFactories(registry);
+
+ BackendsMap backends;
+ backends["BackendA"] = std::move(backendA);
+ backends["BackendB"] = std::move(backendB);
+ backends["BackendC"] = std::move(backendC);
+
+ armnn::Graph graph;
+
+ armnn::InputLayer* const inputLayer = graph.AddLayer<armnn::InputLayer>(0, "input");
+ inputLayer->SetBackendId("BackendA");
+
+ armnn::SoftmaxDescriptor smDesc;
+ armnn::SoftmaxLayer* const softmaxLayer1 = graph.AddLayer<armnn::SoftmaxLayer>(smDesc, "softmax1");
+ softmaxLayer1->SetBackendId("BackendA");
+
+ armnn::SoftmaxLayer* const softmaxLayer2 = graph.AddLayer<armnn::SoftmaxLayer>(smDesc, "softmax2");
+ softmaxLayer2->SetBackendId("BackendB");
+
+ armnn::SoftmaxLayer* const softmaxLayer3 = graph.AddLayer<armnn::SoftmaxLayer>(smDesc, "softmax3");
+ softmaxLayer3->SetBackendId("BackendC");
+
+ armnn::OutputLayer* const outputLayer = graph.AddLayer<armnn::OutputLayer>(0, "output");
+ outputLayer->SetBackendId("BackendA");
+
+ inputLayer->GetOutputSlot(0).Connect(softmaxLayer1->GetInputSlot(0));
+ softmaxLayer1->GetOutputSlot(0).Connect(softmaxLayer2->GetInputSlot(0));
+ softmaxLayer2->GetOutputSlot(0).Connect(softmaxLayer3->GetInputSlot(0));
+ softmaxLayer3->GetOutputSlot(0).Connect(outputLayer->GetInputSlot(0));
+
+ graph.TopologicalSort();
+
+ std::vector<std::string> errors;
+ auto result = SelectTensorHandleStrategy(graph, backends, registry, errors);
+
+ BOOST_TEST(result.m_Error == false);
+ BOOST_TEST(result.m_Warning == false);
+
+ OutputSlot& inputLayerOut = inputLayer->GetOutputSlot(0);
+ OutputSlot& softmaxLayer1Out = softmaxLayer1->GetOutputSlot(0);
+ OutputSlot& softmaxLayer2Out = softmaxLayer2->GetOutputSlot(0);
+ OutputSlot& softmaxLayer3Out = softmaxLayer3->GetOutputSlot(0);
+
+ // Check that the correct factory was selected
+ BOOST_TEST(inputLayerOut.GetTensorHandleFactoryId() == "TestHandleFactoryA1");
+ BOOST_TEST(softmaxLayer1Out.GetTensorHandleFactoryId() == "TestHandleFactoryB1");
+ BOOST_TEST(softmaxLayer2Out.GetTensorHandleFactoryId() == "TestHandleFactoryB1");
+ BOOST_TEST(softmaxLayer3Out.GetTensorHandleFactoryId() == "TestHandleFactoryC1");
+
+ // Check that the correct strategy was selected
+ BOOST_TEST((inputLayerOut.GetMemoryStrategyForConnection(0) == MemoryStrategy::DirectCompatibility));
+ BOOST_TEST((softmaxLayer1Out.GetMemoryStrategyForConnection(0) == MemoryStrategy::DirectCompatibility));
+ BOOST_TEST((softmaxLayer2Out.GetMemoryStrategyForConnection(0) == MemoryStrategy::CopyToTarget));
+ BOOST_TEST((softmaxLayer3Out.GetMemoryStrategyForConnection(0) == MemoryStrategy::DirectCompatibility));
+
+ graph.AddCopyLayers(backends, registry);
+ int count= 0;
+ graph.ForEachLayer([&count](Layer* layer)
+ {
+ if (layer->GetType() == LayerType::MemCopy)
+ {
+ count++;
+ }
+ });
+ BOOST_TEST(count == 1);
+}
+
+BOOST_AUTO_TEST_SUITE_END()
diff --git a/src/backends/aclCommon/test/CreateWorkloadClNeon.hpp b/src/backends/aclCommon/test/CreateWorkloadClNeon.hpp
index f544c12c30..03bcf32387 100644
--- a/src/backends/aclCommon/test/CreateWorkloadClNeon.hpp
+++ b/src/backends/aclCommon/test/CreateWorkloadClNeon.hpp
@@ -62,6 +62,7 @@ boost::test_tools::predicate_result CompareTensorHandleShape(IComputeTensorHandl
template<typename IComputeTensorHandle>
void CreateMemCopyWorkloads(IWorkloadFactory& factory)
{
+ TensorHandleFactoryRegistry registry;
Graph graph;
RefWorkloadFactory refFactory;
@@ -79,10 +80,10 @@ void CreateMemCopyWorkloads(IWorkloadFactory& factory)
Connect(layer1, layer2, tensorInfo);
Connect(layer2, output, tensorInfo);
- input->CreateTensorHandles(graph, refFactory);
- layer1->CreateTensorHandles(graph, factory);
- layer2->CreateTensorHandles(graph, refFactory);
- output->CreateTensorHandles(graph, refFactory);
+ input->CreateTensorHandles(registry, refFactory);
+ layer1->CreateTensorHandles(registry, factory);
+ layer2->CreateTensorHandles(registry, refFactory);
+ output->CreateTensorHandles(registry, refFactory);
// make the workloads and check them
auto workload1 = MakeAndCheckWorkload<CopyMemGenericWorkload>(*layer1, graph, factory);
diff --git a/src/backends/backendsCommon/CMakeLists.txt b/src/backends/backendsCommon/CMakeLists.txt
index e1e387bc6f..bc1c15beef 100644
--- a/src/backends/backendsCommon/CMakeLists.txt
+++ b/src/backends/backendsCommon/CMakeLists.txt
@@ -11,6 +11,8 @@ list(APPEND armnnBackendsCommon_sources
CpuTensorHandle.hpp
IBackendInternal.hpp
IBackendContext.hpp
+ ITensorHandleFactory.cpp
+ ITensorHandleFactory.hpp
LayerSupportBase.cpp
LayerSupportBase.hpp
IMemoryManager.hpp
@@ -22,6 +24,8 @@ list(APPEND armnnBackendsCommon_sources
OptimizationViews.hpp
OutputHandler.cpp
OutputHandler.hpp
+ TensorHandleFactoryRegistry.cpp
+ TensorHandleFactoryRegistry.hpp
WorkloadDataCollector.hpp
WorkloadData.cpp
WorkloadDataFwd.hpp
diff --git a/src/backends/backendsCommon/IBackendInternal.hpp b/src/backends/backendsCommon/IBackendInternal.hpp
index fe9d620278..a0d6569949 100644
--- a/src/backends/backendsCommon/IBackendInternal.hpp
+++ b/src/backends/backendsCommon/IBackendInternal.hpp
@@ -10,7 +10,11 @@
#include <ISubgraphViewConverter.hpp>
#include <SubgraphView.hpp>
+#include <optimizations/Optimization.hpp>
+#include "IBackendContext.hpp"
+#include "IMemoryManager.hpp"
+#include "ITensorHandleFactory.hpp"
#include "OptimizationViews.hpp"
#include <vector>
@@ -18,9 +22,7 @@
namespace armnn
{
class IWorkloadFactory;
-class IBackendContext;
class IMemoryManager;
-class Optimization;
class ILayerSupport;
class IBackendInternal : public IBackend
@@ -60,7 +62,10 @@ public:
}
ARMNN_DEPRECATED_MSG("Use \"OptimizationViews OptimizeSubgraphView(const SubgraphView&)\" instead")
- virtual Optimizations GetOptimizations() const = 0;
+ virtual Optimizations GetOptimizations() const
+ {
+ return Optimizations{};
+ }
ARMNN_DEPRECATED_MSG("Use \"OptimizationViews OptimizeSubgraphView(const SubgraphView&)\" instead")
virtual SubGraphUniquePtr OptimizeSubGraph(const SubGraph& subGraph, bool& optimizationAttempted) const
@@ -70,12 +75,19 @@ public:
}
ARMNN_NO_DEPRECATE_WARN_END
- virtual IMemoryManagerUniquePtr CreateMemoryManager() const = 0;
+
+ virtual IMemoryManagerUniquePtr CreateMemoryManager() const
+ {
+ return IMemoryManagerUniquePtr();
+ };
virtual IWorkloadFactoryPtr CreateWorkloadFactory(
const IMemoryManagerSharedPtr& memoryManager = nullptr) const = 0;
- virtual IBackendContextPtr CreateBackendContext(const IRuntime::CreationOptions&) const = 0;
+ virtual IBackendContextPtr CreateBackendContext(const IRuntime::CreationOptions&) const
+ {
+ return IBackendContextPtr{};
+ }
virtual ILayerSupportSharedPtr GetLayerSupport() const = 0;
@@ -107,6 +119,29 @@ public:
}
return result;
}
+
+ bool SupportsTensorAllocatorAPI() const { return GetHandleFactoryPreferences().empty() == false; }
+
+ ITensorHandleFactory::FactoryId GetBackwardCompatibleFavoriteHandleFactory()
+ {
+ auto favorites = GetHandleFactoryPreferences();
+ if (favorites.empty())
+ {
+ return ITensorHandleFactory::LegacyFactoryId;
+ }
+ return favorites[0];
+ }
+
+ /// (Optional) Returns a vector of supported TensorHandleFactory ids in preference order.
+ virtual std::vector<ITensorHandleFactory::FactoryId> GetHandleFactoryPreferences() const
+ {
+ return std::vector<ITensorHandleFactory::FactoryId>();
+ }
+
+ /// (Optional) Register TensorHandleFactories
+ /// Either this method or CreateMemoryManager() and
+ /// IWorkloadFactory::CreateTensor()/IWorkloadFactory::CreateSubtensor() methods must be implemented.
+ virtual void RegisterTensorHandleFactories(class TensorHandleFactoryRegistry& registry) {}
};
using IBackendInternalUniquePtr = std::unique_ptr<IBackendInternal>;
diff --git a/src/backends/backendsCommon/ITensorHandleFactory.cpp b/src/backends/backendsCommon/ITensorHandleFactory.cpp
new file mode 100644
index 0000000000..91f5692723
--- /dev/null
+++ b/src/backends/backendsCommon/ITensorHandleFactory.cpp
@@ -0,0 +1,14 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#include "ITensorHandleFactory.hpp"
+
+namespace armnn
+{
+
+const ITensorHandleFactory::FactoryId ITensorHandleFactory::LegacyFactoryId = "armnn_legacy_factory";
+const ITensorHandleFactory::FactoryId ITensorHandleFactory::DeferredFactoryId = "armnn_deferred_factory";
+
+} // namespace armnn
diff --git a/src/backends/backendsCommon/ITensorHandleFactory.hpp b/src/backends/backendsCommon/ITensorHandleFactory.hpp
new file mode 100644
index 0000000000..7685061eb3
--- /dev/null
+++ b/src/backends/backendsCommon/ITensorHandleFactory.hpp
@@ -0,0 +1,49 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#pragma once
+
+#include <armnn/Types.hpp>
+#include <armnn/IRuntime.hpp>
+
+namespace armnn
+{
+
+class ITensorHandleFactory
+{
+public:
+ using FactoryId = std::string;
+ static const FactoryId LegacyFactoryId; // Use the workload factory to create the tensor handle
+ static const FactoryId DeferredFactoryId; // Some TensorHandleFactory decisions are deferred to run-time
+
+ virtual ~ITensorHandleFactory() {}
+
+
+ virtual std::unique_ptr<ITensorHandle> CreateSubTensorHandle(ITensorHandle& parent,
+ TensorShape const& subTensorShape,
+ unsigned int const* subTensorOrigin) const = 0;
+
+ virtual std::unique_ptr<ITensorHandle> CreateTensorHandle(const TensorInfo& tensorInfo) const = 0;
+
+ virtual const FactoryId GetId() const = 0;
+
+ virtual bool SupportsSubTensors() const = 0;
+
+ virtual bool SupportsMapUnmap() const final { return true; }
+
+ virtual bool SupportsExport() const final { return false; }
+
+ virtual bool SupportsImport() const final { return false; }
+};
+
+enum class MemoryStrategy
+{
+ Undefined,
+ DirectCompatibility, // Only allocate the tensorhandle using the assigned factory
+ CopyToTarget, // Default + Insert MemCopy node before target
+ ExportToTarget, // Default + Insert Import node
+};
+
+} //namespace armnn
diff --git a/src/backends/backendsCommon/OutputHandler.cpp b/src/backends/backendsCommon/OutputHandler.cpp
index 2df2fb5181..8f4942d8ba 100644
--- a/src/backends/backendsCommon/OutputHandler.cpp
+++ b/src/backends/backendsCommon/OutputHandler.cpp
@@ -27,9 +27,9 @@ void OutputHandler::CreateTensorHandles(const IWorkloadFactory& factory)
m_TensorHandle = factory.CreateTensorHandle(m_TensorInfo);
}
-void OutputHandler::CreateTensorHandles(const IWorkloadFactory& factory, DataLayout dataLayout)
+void OutputHandler::CreateTensorHandles(const ITensorHandleFactory& factory)
{
- m_TensorHandle = factory.CreateTensorHandle(m_TensorInfo, dataLayout);
+ m_TensorHandle = factory.CreateTensorHandle(m_TensorInfo);
}
void OutputHandler::CollectWorkloadOutputs(WorkloadDataCollector& dataCollector) const
diff --git a/src/backends/backendsCommon/OutputHandler.hpp b/src/backends/backendsCommon/OutputHandler.hpp
index 240b369fab..01e255deaa 100644
--- a/src/backends/backendsCommon/OutputHandler.hpp
+++ b/src/backends/backendsCommon/OutputHandler.hpp
@@ -5,6 +5,7 @@
#pragma once
#include "ITensorHandle.hpp"
+#include "ITensorHandleFactory.hpp"
#include <armnn/Descriptors.hpp>
#include <armnn/INetwork.hpp>
@@ -35,14 +36,10 @@ public:
/// @param tensorInfo - TensorInfo for the output.
void SetTensorInfo(const TensorInfo& tensorInfo);
- /// @brief - Creates tensor handlers used by the intermediate tensors. Does not allocate memory.
+ /// @brief - Creates tensor handles used by the intermediate tensors. Does not allocate memory.
/// @param factory - Factory to be used for handler creation.
void CreateTensorHandles(const IWorkloadFactory& factory);
-
- /// @brief - Creates tensor handlers used by the intermediate tensors. Does not allocate memory.
- /// @param factory - Factory to be used for handler creation.
- /// @param dataLayout - Data Layout to be used for handler creation.
- void CreateTensorHandles(const IWorkloadFactory& factory, DataLayout dataLayout);
+ void CreateTensorHandles(const ITensorHandleFactory& factory);
/// @brief - Gets the matching TensorInfo for the output.
/// @return - References to the output TensorInfo.
diff --git a/src/backends/backendsCommon/TensorHandleFactoryRegistry.cpp b/src/backends/backendsCommon/TensorHandleFactoryRegistry.cpp
new file mode 100644
index 0000000000..4692b9f960
--- /dev/null
+++ b/src/backends/backendsCommon/TensorHandleFactoryRegistry.cpp
@@ -0,0 +1,69 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#include "TensorHandleFactoryRegistry.hpp"
+#include "IMemoryManager.hpp"
+
+
+namespace armnn
+{
+
+void TensorHandleFactoryRegistry::RegisterFactory(std::unique_ptr <ITensorHandleFactory> newFactory)
+{
+ if (!newFactory)
+ {
+ return;
+ }
+
+ ITensorHandleFactory::FactoryId id = newFactory->GetId();
+
+ // Don't register duplicates
+ for (auto& registeredFactory : m_Factories)
+ {
+ if (id == registeredFactory->GetId())
+ {
+ return;
+ }
+ }
+
+ // Take ownership of the new allocator
+ m_Factories.push_back(std::move(newFactory));
+}
+
+void TensorHandleFactoryRegistry::RegisterMemoryManager(std::shared_ptr<armnn::IMemoryManager> memoryManger)
+{
+ m_MemoryManagers.push_back(memoryManger);
+}
+
+ITensorHandleFactory* TensorHandleFactoryRegistry::GetFactory(ITensorHandleFactory::FactoryId id) const
+{
+ for (auto& factory : m_Factories)
+ {
+ if (factory->GetId() == id)
+ {
+ return factory.get();
+ }
+ }
+
+ return nullptr;
+}
+
+void TensorHandleFactoryRegistry::AquireMemory()
+{
+ for (auto& mgr : m_MemoryManagers)
+ {
+ mgr->Acquire();
+ }
+}
+
+void TensorHandleFactoryRegistry::ReleaseMemory()
+{
+ for (auto& mgr : m_MemoryManagers)
+ {
+ mgr->Release();
+ }
+}
+
+} // namespace armnn
diff --git a/src/backends/backendsCommon/TensorHandleFactoryRegistry.hpp b/src/backends/backendsCommon/TensorHandleFactoryRegistry.hpp
new file mode 100644
index 0000000000..9e02985301
--- /dev/null
+++ b/src/backends/backendsCommon/TensorHandleFactoryRegistry.hpp
@@ -0,0 +1,49 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#pragma once
+
+#include "ITensorHandleFactory.hpp"
+
+#include <memory>
+#include <vector>
+
+namespace armnn
+{
+
+//Forward
+class IMemoryManager;
+
+///
+class TensorHandleFactoryRegistry
+{
+public:
+ TensorHandleFactoryRegistry() = default;
+
+ TensorHandleFactoryRegistry(const TensorHandleFactoryRegistry& other) = delete;
+ TensorHandleFactoryRegistry(TensorHandleFactoryRegistry&& other) = delete;
+
+ /// Register a TensorHandleFactory and transfer ownership
+ void RegisterFactory(std::unique_ptr<ITensorHandleFactory> allocator);
+
+ /// Register a memory manager with shared ownership
+ void RegisterMemoryManager(std::shared_ptr<IMemoryManager> memoryManger);
+
+ /// Find a TensorHandleFactory by Id
+ /// Returns nullptr if not found
+ ITensorHandleFactory* GetFactory(ITensorHandleFactory::FactoryId id) const;
+
+ /// Aquire memory required for inference
+ void AquireMemory();
+
+ /// Release memory required for inference
+ void ReleaseMemory();
+
+private:
+ std::vector<std::unique_ptr<ITensorHandleFactory>> m_Factories;
+ std::vector<std::shared_ptr<IMemoryManager>> m_MemoryManagers;
+};
+
+} // namespace armnn
diff --git a/src/backends/backendsCommon/common.mk b/src/backends/backendsCommon/common.mk
index 90d3d16ebb..8df5ab9203 100644
--- a/src/backends/backendsCommon/common.mk
+++ b/src/backends/backendsCommon/common.mk
@@ -10,10 +10,12 @@
COMMON_SOURCES := \
BackendRegistry.cpp \
CpuTensorHandle.cpp \
+ ITensorHandleFactory.cpp \
LayerSupportBase.cpp \
MemCopyWorkload.cpp \
OptimizationViews.cpp \
OutputHandler.cpp \
+ TensorHandleFactoryRegistry.cpp \
WorkloadData.cpp \
WorkloadFactory.cpp \
WorkloadUtils.cpp