aboutsummaryrefslogtreecommitdiff
path: root/src/armnn
diff options
context:
space:
mode:
authorDerek Lamberti <derek.lamberti@arm.com>2019-08-01 15:56:25 +0100
committerÁron Virginás-Tar <aron.virginas-tar@arm.com>2019-08-05 13:51:42 +0000
commitf674aa0fd2809126debdaaeb8067067790d86907 (patch)
treed86d0261c7a25149217918986043c76d0823ee44 /src/armnn
parent737d9ff58b348b11234b6c2363390607d576177d (diff)
downloadarmnn-f674aa0fd2809126debdaaeb8067067790d86907.tar.gz
IVGCVSW-3277 Mem export/import suppor for Tensors
* Rename MemoryStrategy to EdgeStrategy * Add MemImportLayer * Import memory rather than copy when possible Change-Id: I1d3a9414f2cbe517dc2aae9bbd4fdd92712b38ef Signed-off-by: Derek Lamberti <derek.lamberti@arm.com>
Diffstat (limited to 'src/armnn')
-rw-r--r--src/armnn/Graph.cpp85
-rw-r--r--src/armnn/Graph.hpp4
-rw-r--r--src/armnn/InternalTypes.hpp1
-rw-r--r--src/armnn/Layer.cpp18
-rw-r--r--src/armnn/Layer.hpp8
-rw-r--r--src/armnn/LayerSupport.cpp9
-rw-r--r--src/armnn/LayersFwd.hpp2
-rw-r--r--src/armnn/LoadedNetwork.cpp54
-rw-r--r--src/armnn/Network.cpp51
-rw-r--r--src/armnn/layers/MemImportLayer.cpp54
-rw-r--r--src/armnn/layers/MemImportLayer.hpp42
-rw-r--r--src/armnn/test/GraphTests.cpp34
-rw-r--r--src/armnn/test/TensorHandleStrategyTest.cpp119
13 files changed, 376 insertions, 105 deletions
diff --git a/src/armnn/Graph.cpp b/src/armnn/Graph.cpp
index 9e00f5ec01..6212c49eba 100644
--- a/src/armnn/Graph.cpp
+++ b/src/armnn/Graph.cpp
@@ -255,26 +255,31 @@ const Graph& Graph::TopologicalSort() const
return *this;
}
-void Graph::AddCopyLayers(std::map<BackendId, std::unique_ptr<IBackendInternal>>& backends,
- TensorHandleFactoryRegistry& registry)
+void Graph::AddCompatibilityLayers(std::map<BackendId, std::unique_ptr<IBackendInternal>>& backends,
+ TensorHandleFactoryRegistry& registry)
{
- // Returns true if the given layer could potentially need an intermediate copy layer (depending on its
- // connections to other layers). At the time of writing, copy layers will be inserted in the following situations:
- // CPU -> CL (and viceversa)
- // CPU -> Neon (and viceversa)
- auto MayNeedCopyLayer = [](const Layer& layer)
+ // Returns true if the given layer could potentially need an intermediate copy/import layer (depending on its
+ // connections to other layers).
+ auto MayNeedCompatibilityLayer = [](const Layer& layer)
{
// All layers should have been associated with a valid compute device at this point.
BOOST_ASSERT(layer.GetBackendId() != Compute::Undefined);
- // Does not need another copy layer if a copy layer is already present.
- return layer.GetType() != LayerType::MemCopy;
+ // Does not need another compatibility layer if a copy or import layer is already present.
+ return layer.GetType() != LayerType::MemCopy &&
+ layer.GetType() != LayerType::MemImport;
};
- ForEachLayer([this, &backends, &registry, MayNeedCopyLayer](Layer* srcLayer)
+ auto IsCompatibilityStrategy = [](EdgeStrategy strategy)
+ {
+ return strategy == EdgeStrategy::CopyToTarget ||
+ strategy == EdgeStrategy::ExportToTarget;
+ };
+
+ ForEachLayer([this, &backends, &registry, MayNeedCompatibilityLayer, IsCompatibilityStrategy](Layer* srcLayer)
{
BOOST_ASSERT(srcLayer);
- if (!MayNeedCopyLayer(*srcLayer))
+ if (!MayNeedCompatibilityLayer(*srcLayer))
{
// The current layer does not need copy layers, move to the next one
return;
@@ -285,33 +290,43 @@ void Graph::AddCopyLayers(std::map<BackendId, std::unique_ptr<IBackendInternal>>
{
OutputSlot& srcOutputSlot = srcLayer->GetOutputSlot(srcOutputIndex);
const std::vector<InputSlot*> srcConnections = srcOutputSlot.GetConnections();
- const std::vector<MemoryStrategy> srcMemoryStrategies = srcOutputSlot.GetMemoryStrategies();
+ const std::vector<EdgeStrategy> srcEdgeStrategies = srcOutputSlot.GetEdgeStrategies();
for (unsigned int srcConnectionIndex = 0; srcConnectionIndex < srcConnections.size(); srcConnectionIndex++)
{
InputSlot* dstInputSlot = srcConnections[srcConnectionIndex];
BOOST_ASSERT(dstInputSlot);
- MemoryStrategy strategy = srcMemoryStrategies[srcConnectionIndex];
- BOOST_ASSERT_MSG(strategy != MemoryStrategy::Undefined,
+ EdgeStrategy strategy = srcEdgeStrategies[srcConnectionIndex];
+ BOOST_ASSERT_MSG(strategy != EdgeStrategy::Undefined,
"Undefined memory strategy found while adding copy layers for compatibility");
const Layer& dstLayer = dstInputSlot->GetOwningLayer();
- if (MayNeedCopyLayer(dstLayer) &&
- strategy == MemoryStrategy::CopyToTarget)
+ if (MayNeedCompatibilityLayer(dstLayer) &&
+ IsCompatibilityStrategy(strategy))
{
// A copy layer is needed in between the source and destination layers.
// Record the operation rather than attempting to modify the graph as we go.
// (invalidating iterators)
- const std::string copyLayerName = boost::str(boost::format("[ %1% (%2%) -> %3% (%4%) ]")
+ const std::string compLayerName = boost::str(boost::format("[ %1% (%2%) -> %3% (%4%) ]")
% srcLayer->GetName()
% srcOutputIndex
% dstLayer.GetName()
% dstInputSlot->GetSlotIndex());
- MemCopyLayer* const copyLayer = InsertNewLayer<MemCopyLayer>(*dstInputSlot, copyLayerName.c_str());
- copyLayer->SetBackendId(dstLayer.GetBackendId());
+ Layer* compLayer = nullptr;
+ if (strategy == EdgeStrategy::CopyToTarget)
+ {
+ compLayer = InsertNewLayer<MemCopyLayer>(*dstInputSlot, compLayerName.c_str());
+ }
+ else
+ {
+ BOOST_ASSERT_MSG(strategy == EdgeStrategy::ExportToTarget, "Invalid edge strategy found.");
+ compLayer = InsertNewLayer<MemImportLayer>(*dstInputSlot, compLayerName.c_str());
+ }
+
+ compLayer->SetBackendId(dstLayer.GetBackendId());
- OutputSlot& copyOutputSlot = copyLayer->GetOutputSlot(0);
+ OutputSlot& compOutputSlot = compLayer->GetOutputSlot(0);
auto backendIt = backends.find(dstLayer.GetBackendId());
if (backendIt != backends.end() &&
backendIt->second &&
@@ -325,34 +340,40 @@ void Graph::AddCopyLayers(std::map<BackendId, std::unique_ptr<IBackendInternal>>
for (auto preference : tensorHandleFactoryIds)
{
auto factory = registry.GetFactory(preference);
- if (factory && factory->SupportsMapUnmap())
+ if (factory)
{
- copyOutputSlot.SetTensorHandleFactory(preference);
- found = true;
- break;
+ auto srcPref = srcOutputSlot.GetTensorHandleFactoryId();
+ auto srcFactory = registry.GetFactory(srcPref);
+ bool canExportImport = (factory->GetImportFlags() & srcFactory->GetExportFlags()) != 0;
+ if (factory->SupportsMapUnmap() || canExportImport)
+ {
+ compOutputSlot.SetTensorHandleFactory(preference);
+ found = true;
+ break;
+ }
}
}
- BOOST_ASSERT_MSG(found, "Could not find a mappable TensorHandle for copy layer");
+ BOOST_ASSERT_MSG(found, "Could not find a valid TensorHandle for compatibilty layer");
}
else
{
- copyOutputSlot.SetTensorHandleFactory(ITensorHandleFactory::LegacyFactoryId);
+ compOutputSlot.SetTensorHandleFactory(ITensorHandleFactory::LegacyFactoryId);
}
- // The output strategy of a copy layer is always DirectCompatibility.
- copyOutputSlot.SetMemoryStrategy(0, MemoryStrategy::DirectCompatibility);
+ // The output strategy of a compatibility layer is always DirectCompatibility.
+ compOutputSlot.SetEdgeStrategy(0, EdgeStrategy::DirectCompatibility);
// Recalculate the connection index on the previous layer as we have just inserted into it.
const std::vector<InputSlot*>& newSourceConnections = srcOutputSlot.GetConnections();
long newSrcConnectionIndex = std::distance(newSourceConnections.begin(),
std::find(newSourceConnections.begin(),
newSourceConnections.end(),
- &copyLayer->GetInputSlot(0)));
+ &compLayer->GetInputSlot(0)));
- // The input strategy of a copy layer is always DirectCompatibilty.
- srcOutputSlot.SetMemoryStrategy(boost::numeric_cast<unsigned int>(newSrcConnectionIndex),
- MemoryStrategy::DirectCompatibility);
+ // The input strategy of a compatibility layer is always DirectCompatibilty.
+ srcOutputSlot.SetEdgeStrategy(boost::numeric_cast<unsigned int>(newSrcConnectionIndex),
+ EdgeStrategy::DirectCompatibility);
}
}
}
diff --git a/src/armnn/Graph.hpp b/src/armnn/Graph.hpp
index f8113375c9..c65f12bbc3 100644
--- a/src/armnn/Graph.hpp
+++ b/src/armnn/Graph.hpp
@@ -191,8 +191,8 @@ public:
/// Modifies the graph in-place, removing edges connecting layers using different compute devices,
/// and relinking them via an intermediary copy layers.
- void AddCopyLayers(std::map<BackendId, std::unique_ptr<class IBackendInternal>>& backends,
- TensorHandleFactoryRegistry& registry);
+ void AddCompatibilityLayers(std::map<BackendId, std::unique_ptr<class IBackendInternal>>& backends,
+ TensorHandleFactoryRegistry& registry);
/// Substitutes the given sub-graph with either a new layer or a new sub-graph.
/// In either case, the given layer or all the layers in the given sub-graph must belong to this graph.
diff --git a/src/armnn/InternalTypes.hpp b/src/armnn/InternalTypes.hpp
index b0fea7c8c2..7a0f9a1cb0 100644
--- a/src/armnn/InternalTypes.hpp
+++ b/src/armnn/InternalTypes.hpp
@@ -40,6 +40,7 @@ enum class LayerType
Maximum,
Mean,
MemCopy,
+ MemImport,
Merge,
Minimum,
Multiplication,
diff --git a/src/armnn/Layer.cpp b/src/armnn/Layer.cpp
index a287220702..528020bab5 100644
--- a/src/armnn/Layer.cpp
+++ b/src/armnn/Layer.cpp
@@ -31,7 +31,7 @@ void InputSlot::Insert(Layer& layer)
// Connects inserted layer to parent.
BOOST_ASSERT(layer.GetNumInputSlots() == 1);
int idx = prevSlot->Connect(layer.GetInputSlot(0));
- prevSlot->SetMemoryStrategy(boost::numeric_cast<unsigned int>(idx), MemoryStrategy::Undefined);
+ prevSlot->SetEdgeStrategy(boost::numeric_cast<unsigned int>(idx), EdgeStrategy::Undefined);
// Sets tensor info for inserted layer.
const TensorInfo& tensorInfo = prevSlot->GetTensorInfo();
@@ -40,7 +40,7 @@ void InputSlot::Insert(Layer& layer)
// Connects inserted layer to this.
layer.GetOutputSlot(0).Connect(*this);
- layer.GetOutputSlot(0).SetMemoryStrategy(0, MemoryStrategy::Undefined);
+ layer.GetOutputSlot(0).SetEdgeStrategy(0, EdgeStrategy::Undefined);
}
const InputSlot* OutputSlot::GetConnection(unsigned int index) const
@@ -80,7 +80,7 @@ int OutputSlot::Connect(InputSlot& destination)
{
destination.SetConnection(this);
m_Connections.push_back(&destination);
- m_MemoryStrategies.push_back(MemoryStrategy::Undefined);
+ m_EdgeStrategies.push_back(EdgeStrategy::Undefined);
return boost::numeric_cast<int>(m_Connections.size() - 1);
}
@@ -97,7 +97,7 @@ void OutputSlot::Disconnect(InputSlot& slot)
auto idx = std::distance(m_Connections.begin(), it);
m_Connections.erase(std::remove(m_Connections.begin(), m_Connections.end(), &slot), m_Connections.end());
- m_MemoryStrategies.erase(m_MemoryStrategies.begin() + idx);
+ m_EdgeStrategies.erase(m_EdgeStrategies.begin() + idx);
}
void OutputSlot::DisconnectAll()
@@ -113,7 +113,7 @@ void OutputSlot::MoveAllConnections(OutputSlot& destination)
{
while (GetNumConnections() > 0)
{
- BOOST_ASSERT_MSG(m_MemoryStrategies[0] == MemoryStrategy::Undefined,
+ BOOST_ASSERT_MSG(m_EdgeStrategies[0] == EdgeStrategy::Undefined,
"Cannot move connections once memory strategies have be established.");
InputSlot& connection = *GetConnection(0);
@@ -174,14 +174,14 @@ ITensorHandleFactory::FactoryId OutputSlot::GetTensorHandleFactoryId() const
return m_TensorHandleFactoryId;
}
-void OutputSlot::SetMemoryStrategy(unsigned int connectionIndex, MemoryStrategy strategy)
+void OutputSlot::SetEdgeStrategy(unsigned int connectionIndex, EdgeStrategy strategy)
{
- m_MemoryStrategies[connectionIndex] = strategy;
+ m_EdgeStrategies[connectionIndex] = strategy;
}
-MemoryStrategy OutputSlot::GetMemoryStrategyForConnection(unsigned int connectionIdx) const
+EdgeStrategy OutputSlot::GetEdgeStrategyForConnection(unsigned int connectionIdx) const
{
- return m_MemoryStrategies[connectionIdx];
+ return m_EdgeStrategies[connectionIdx];
}
namespace {
diff --git a/src/armnn/Layer.hpp b/src/armnn/Layer.hpp
index b90d040475..5944ea83ed 100644
--- a/src/armnn/Layer.hpp
+++ b/src/armnn/Layer.hpp
@@ -123,7 +123,7 @@ public:
void Disconnect(InputSlot& slot);
const std::vector<InputSlot*>& GetConnections() const { return m_Connections; }
- const std::vector<MemoryStrategy>& GetMemoryStrategies() const { return m_MemoryStrategies; }
+ const std::vector<EdgeStrategy>& GetEdgeStrategies() const { return m_EdgeStrategies; }
bool ValidateTensorShape(const TensorShape& shape) const;
@@ -160,8 +160,8 @@ public:
void SetTensorHandleFactory(const ITensorHandleFactory::FactoryId& id);
ITensorHandleFactory::FactoryId GetTensorHandleFactoryId() const;
- void SetMemoryStrategy(unsigned int connectionIndex, MemoryStrategy strategy);
- MemoryStrategy GetMemoryStrategyForConnection(unsigned int connectionIdx) const;
+ void SetEdgeStrategy(unsigned int connectionIndex, EdgeStrategy strategy);
+ EdgeStrategy GetEdgeStrategyForConnection(unsigned int connectionIdx) const;
private:
void ValidateConnectionIndex(unsigned int index) const;
@@ -171,7 +171,7 @@ private:
std::vector<InputSlot*> m_Connections;
ITensorHandleFactory::FactoryId m_TensorHandleFactoryId;
- std::vector<MemoryStrategy> m_MemoryStrategies;
+ std::vector<EdgeStrategy> m_EdgeStrategies;
};
// InputSlot inlines that need OutputSlot declaration.
diff --git a/src/armnn/LayerSupport.cpp b/src/armnn/LayerSupport.cpp
index 047c80a8c4..5a756b9544 100644
--- a/src/armnn/LayerSupport.cpp
+++ b/src/armnn/LayerSupport.cpp
@@ -371,6 +371,15 @@ bool IsMemCopySupported(const BackendId &backend,
FORWARD_LAYER_SUPPORT_FUNC(backend, IsMemCopySupported, input, output);
}
+bool IsMemImportSupported(const BackendId &backend,
+ const TensorInfo &input,
+ const TensorInfo &output,
+ char *reasonIfUnsupported,
+ size_t reasonIfUnsupportedMaxLength)
+{
+ FORWARD_LAYER_SUPPORT_FUNC(backend, IsMemImportSupported, input, output);
+}
+
bool IsMergeSupported(const BackendId& backend,
const TensorInfo& input0,
const TensorInfo& input1,
diff --git a/src/armnn/LayersFwd.hpp b/src/armnn/LayersFwd.hpp
index 2c8d5d2e07..cadcd49acb 100644
--- a/src/armnn/LayersFwd.hpp
+++ b/src/armnn/LayersFwd.hpp
@@ -32,6 +32,7 @@
#include "layers/MaximumLayer.hpp"
#include "layers/MeanLayer.hpp"
#include "layers/MemCopyLayer.hpp"
+#include "layers/MemImportLayer.hpp"
#include "layers/MergeLayer.hpp"
#include "layers/MinimumLayer.hpp"
#include "layers/MultiplicationLayer.hpp"
@@ -110,6 +111,7 @@ DECLARE_LAYER(Lstm)
DECLARE_LAYER(Maximum)
DECLARE_LAYER(Mean)
DECLARE_LAYER(MemCopy)
+DECLARE_LAYER(MemImport)
DECLARE_LAYER(Merge)
DECLARE_LAYER(Minimum)
DECLARE_LAYER(Multiplication)
diff --git a/src/armnn/LoadedNetwork.cpp b/src/armnn/LoadedNetwork.cpp
index 7873e48780..a81528aa65 100644
--- a/src/armnn/LoadedNetwork.cpp
+++ b/src/armnn/LoadedNetwork.cpp
@@ -14,6 +14,8 @@
#include <backendsCommon/CpuTensorHandle.hpp>
#include <backendsCommon/BackendRegistry.hpp>
#include <backendsCommon/IMemoryManager.hpp>
+#include <backendsCommon/MemCopyWorkload.hpp>
+#include <backendsCommon/MemSyncWorkload.hpp>
#include <boost/polymorphic_cast.hpp>
#include <boost/assert.hpp>
@@ -389,8 +391,22 @@ void LoadedNetwork::EnqueueInput(const BindableLayer& layer, ITensorHandle* tens
inputQueueDescriptor.m_Outputs.push_back(outputTensorHandle);
info.m_OutputTensorInfos.push_back(outputTensorInfo);
- const IWorkloadFactory& workloadFactory = GetWorkloadFactory(layer);
- auto inputWorkload = workloadFactory.CreateInput(inputQueueDescriptor, info);
+ MemorySourceFlags importFlags = outputTensorHandle->GetImportFlags();
+ if (CheckFlag(importFlags, MemorySource::Malloc)) // Try import the input tensor
+ {
+ // This assumes a CPU Tensor handle
+ void* mem = tensorHandle->Map(false);
+ if (outputTensorHandle->Import(mem, MemorySource::Malloc))
+ {
+ tensorHandle->Unmap();
+ return; // No need for a workload since the import has been done.
+ }
+ tensorHandle->Unmap();
+ }
+
+ // Create a mem copy workload for input since we could not import
+ auto inputWorkload = std::make_unique<CopyMemGenericWorkload>(inputQueueDescriptor, info);
+
BOOST_ASSERT_MSG(inputWorkload, "No input workload created");
m_InputQueue.push_back(move(inputWorkload));
}
@@ -422,11 +438,41 @@ void LoadedNetwork::EnqueueOutput(const BindableLayer& layer, ITensorHandle* ten
ITensorHandle* inputTensorHandle = outputHandler.GetData();
BOOST_ASSERT_MSG(inputTensorHandle != nullptr, "Data should have been allocated.");
+ // Try import the output tensor.
+ // Note: We can only import the output pointer if all of the following hold true:
+ // a) The imported pointer is aligned sufficiently
+ // b) The tensor has zero padding
+ // c) There is only one connection to the OutputSlot and it is to an OutputLayer.
+ // d) The output pointer is allocated via malloc. (Other types will be supported in a later release)
+ if (layer.GetInputSlots()[0].GetConnectedOutputSlot()->GetNumConnections() == 1)
+ {
+ MemorySourceFlags importFlags = inputTensorHandle->GetImportFlags();
+ if (CheckFlag(importFlags, MemorySource::Malloc))
+ {
+ void* mem = tensorHandle->Map(false);
+ bool importOk = inputTensorHandle->Import(mem, MemorySource::Malloc);
+ tensorHandle->Unmap();
+
+ if (importOk)
+ {
+ // Insert synchronization workload
+ MemSyncQueueDescriptor syncDesc;
+ syncDesc.m_Inputs.push_back(inputTensorHandle);
+ info.m_InputTensorInfos.push_back(inputTensorInfo);
+ auto syncWorkload = std::make_unique<SyncMemGenericWorkload>(syncDesc, info);
+ BOOST_ASSERT_MSG(syncWorkload, "No sync workload created");
+ m_OutputQueue.push_back(move(syncWorkload));
+
+ return; //No need to add the output workload below
+ }
+ }
+ }
+
+ // If we got here then we couldn't import the memory, so add an output workload which performs a memcopy.
outputQueueDescriptor.m_Inputs.push_back(inputTensorHandle);
info.m_InputTensorInfos.push_back(inputTensorInfo);
- const IWorkloadFactory& workloadFactory = GetWorkloadFactory(layer);
- auto outputWorkload = workloadFactory.CreateOutput(outputQueueDescriptor, info);
+ auto outputWorkload = std::make_unique<CopyMemGenericWorkload>(outputQueueDescriptor, info);
BOOST_ASSERT_MSG(outputWorkload, "No output workload created");
m_OutputQueue.push_back(move(outputWorkload));
}
diff --git a/src/armnn/Network.cpp b/src/armnn/Network.cpp
index 2195c71735..b30cd9f3c2 100644
--- a/src/armnn/Network.cpp
+++ b/src/armnn/Network.cpp
@@ -441,7 +441,7 @@ bool RequiresCopy(ITensorHandleFactory::FactoryId src,
ITensorHandleFactory* srcFactory = registry.GetFactory(src);
ITensorHandleFactory* dstFactory = registry.GetFactory(dst);
- if (srcFactory->SupportsExport() && dstFactory->SupportsImport())
+ if ((srcFactory->GetExportFlags() & dstFactory->GetImportFlags()) != 0)
{
return false;
}
@@ -493,11 +493,14 @@ ITensorHandleFactory::FactoryId CalculateSlotOptionForInput(BackendsMap& backend
auto dstPrefs = toBackend->second.get()->GetHandleFactoryPreferences();
for (auto&& dst : dstPrefs)
{
- // Input layers use the mem copy workload, so the selected factory must support map/unmap API
+ // Input layers use the mem copy workload or import, so the selected factory must
+ // support either the map/unmap API or Import API
ITensorHandleFactory* factory = registry.GetFactory(dst);
- if (!factory->SupportsMapUnmap())
+ if (!factory->SupportsMapUnmap() &&
+ !CheckFlag(factory->GetImportFlags(), MemorySource::Malloc)) // Just support cpu mem imports for now
{
- // The current tensor handle factory does not support the map/unmap strategy, move to the next one
+ // The current tensor handle factory does not support the map/unmap or import
+ // strategy, move to the next one
continue;
}
@@ -648,11 +651,11 @@ ITensorHandleFactory::FactoryId CalculateSlotOption(BackendsMap& backends,
return ITensorHandleFactory::LegacyFactoryId;
}
-MemoryStrategy CalculateStrategy(BackendsMap& backends,
- ITensorHandleFactory::FactoryId srcFactoryId,
- const Layer& layer,
- const Layer& connectedLayer,
- TensorHandleFactoryRegistry& registry)
+EdgeStrategy CalculateEdgeStrategy(BackendsMap& backends,
+ ITensorHandleFactory::FactoryId srcFactoryId,
+ const Layer& layer,
+ const Layer& connectedLayer,
+ TensorHandleFactoryRegistry& registry)
{
auto toBackend = backends.find(connectedLayer.GetBackendId());
BOOST_ASSERT_MSG(toBackend != backends.end(), "Backend id not found for the connected layer");
@@ -664,19 +667,19 @@ MemoryStrategy CalculateStrategy(BackendsMap& backends,
{
if (layer.GetBackendId() != connectedLayer.GetBackendId())
{
- return MemoryStrategy::CopyToTarget;
+ return EdgeStrategy::CopyToTarget;
}
else
{
- return MemoryStrategy::DirectCompatibility;
+ return EdgeStrategy::DirectCompatibility;
}
}
// TensorHandleFactory API present, so perform more sophisticated strategies.
- // Dst Output layers don't require copy because they use map/unmap
+ // Dst Output layers don't require copy because they use import or map/unmap
if (connectedLayer.GetType() == LayerType::Output)
{
- return MemoryStrategy::DirectCompatibility;
+ return EdgeStrategy::DirectCompatibility;
}
// Search for direct match in prefs
@@ -684,20 +687,20 @@ MemoryStrategy CalculateStrategy(BackendsMap& backends,
{
if (pref == srcFactoryId)
{
- return MemoryStrategy::DirectCompatibility;
+ return EdgeStrategy::DirectCompatibility;
}
}
// Search for export/import options
ITensorHandleFactory* srcFactory = registry.GetFactory(srcFactoryId);
- if (srcFactory->SupportsExport())
+ if (srcFactory->GetExportFlags() != 0)
{
for (auto&& pref : dstPrefs)
{
ITensorHandleFactory* dstFactory = registry.GetFactory(pref);
- if (dstFactory->SupportsImport())
+ if ((dstFactory->GetImportFlags() & srcFactory->GetExportFlags()) != 0)
{
- return MemoryStrategy::ExportToTarget;
+ return EdgeStrategy::ExportToTarget;
}
}
}
@@ -710,12 +713,12 @@ MemoryStrategy CalculateStrategy(BackendsMap& backends,
ITensorHandleFactory* dstFactory = registry.GetFactory(pref);
if (dstFactory->SupportsMapUnmap())
{
- return MemoryStrategy::CopyToTarget;
+ return EdgeStrategy::CopyToTarget;
}
}
}
- return MemoryStrategy::Undefined;
+ return EdgeStrategy::Undefined;
}
// Select the TensorHandleFactories and the corresponding memory strategy
@@ -756,15 +759,15 @@ OptimizationResult SelectTensorHandleStrategy(Graph& optGraph,
}
outputSlot.SetTensorHandleFactory(slotOption);
- // Now determine the "best" memory strategy for each connection given the slotOption.
+ // Now determine the "best" edge strategy for each connection given the slotOption.
unsigned int connectionIdx = 0;
for (auto&& connection : outputSlot.GetConnections())
{
const Layer& connectedLayer = connection->GetOwningLayer();
- MemoryStrategy strategy = CalculateStrategy(backends, slotOption, *layer, connectedLayer, registry);
+ EdgeStrategy strategy = CalculateEdgeStrategy(backends, slotOption, *layer, connectedLayer, registry);
- if (strategy == MemoryStrategy::Undefined)
+ if (strategy == EdgeStrategy::Undefined)
{
result.m_Error = true;
if (errMessages)
@@ -775,7 +778,7 @@ OptimizationResult SelectTensorHandleStrategy(Graph& optGraph,
return;
}
- outputSlot.SetMemoryStrategy(connectionIdx, strategy);
+ outputSlot.SetEdgeStrategy(connectionIdx, strategy);
connectionIdx++;
}
@@ -887,7 +890,7 @@ IOptimizedNetworkPtr Optimize(const INetwork& inNetwork,
}
// Based on the tensor handle strategy determined above, insert copy layers where required.
- optGraph.AddCopyLayers(backends, tensorHandleFactoryRegistry);
+ optGraph.AddCompatibilityLayers(backends, tensorHandleFactoryRegistry);
// Convert constants
Optimizer::Pass(optGraph, MakeOptimizations(ConvertConstantsFloatToHalf()));
diff --git a/src/armnn/layers/MemImportLayer.cpp b/src/armnn/layers/MemImportLayer.cpp
new file mode 100644
index 0000000000..7a922f5a7c
--- /dev/null
+++ b/src/armnn/layers/MemImportLayer.cpp
@@ -0,0 +1,54 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+#include "MemImportLayer.hpp"
+
+#include "LayerCloneBase.hpp"
+
+#include <armnn/TypesUtils.hpp>
+#include <backendsCommon/WorkloadData.hpp>
+#include <backendsCommon/WorkloadFactory.hpp>
+#include <backendsCommon/MemImportWorkload.hpp>
+
+namespace armnn
+{
+
+MemImportLayer::MemImportLayer(const char* name)
+ : Layer(1, 1, LayerType::MemImport, name)
+{
+}
+
+MemImportLayer* MemImportLayer::Clone(Graph& graph) const
+{
+ return CloneBase<MemImportLayer>(graph, GetName());
+}
+
+std::unique_ptr<IWorkload> MemImportLayer::CreateWorkload(const Graph& graph, const IWorkloadFactory& factory) const
+{
+ MemImportQueueDescriptor descriptor;
+
+ //This is different from other workloads. Does not get created by the workload factory.
+ return std::make_unique<ImportMemGenericWorkload>(descriptor, PrepInfoAndDesc(descriptor, graph));
+}
+
+void MemImportLayer::ValidateTensorShapesFromInputs()
+{
+ VerifyLayerConnections(1, CHECK_LOCATION());
+
+ auto inferredShapes = InferOutputShapes({ GetInputSlot(0).GetConnection()->GetTensorInfo().GetShape() });
+
+ BOOST_ASSERT(inferredShapes.size() == 1);
+
+ ConditionalThrowIfNotEqual<LayerValidationException>(
+ "MemImportLayer: TensorShape set on OutputSlot[0] does not match the inferred shape.",
+ GetOutputSlot(0).GetTensorInfo().GetShape(),
+ inferredShapes[0]);
+}
+
+void MemImportLayer::Accept(ILayerVisitor& visitor) const
+{
+ throw armnn::Exception("MemImportLayer should not appear in an input graph");
+}
+
+} // namespace armnn
diff --git a/src/armnn/layers/MemImportLayer.hpp b/src/armnn/layers/MemImportLayer.hpp
new file mode 100644
index 0000000000..2d02c1fb41
--- /dev/null
+++ b/src/armnn/layers/MemImportLayer.hpp
@@ -0,0 +1,42 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+#pragma once
+
+#include <Layer.hpp>
+
+namespace armnn
+{
+
+/// This layer represents a memory import operation.
+class MemImportLayer : public Layer
+{
+public:
+ /// Makes a workload for the MemImport type.
+ /// @param [in] graph The graph where this layer can be found.
+ /// @param [in] factory The workload factory which will create the workload.
+ /// @return A pointer to the created workload, or nullptr if not created.
+ virtual std::unique_ptr<IWorkload>CreateWorkload(const Graph& graph,
+ const IWorkloadFactory& factory) const override;
+
+ /// Creates a dynamically-allocated copy of this layer.
+ /// @param [in] graph The graph into which this layer is being cloned.
+ MemImportLayer* Clone(Graph& graph) const override;
+
+ /// Check if the input tensor shape(s)
+ /// will lead to a valid configuration of @ref MemImportLayer.
+ void ValidateTensorShapesFromInputs() override;
+
+ void Accept(ILayerVisitor& visitor) const override;
+
+protected:
+ /// Constructor to create a MemImportLayer.
+ /// @param [in] name Optional name for the layer.
+ MemImportLayer(const char* name);
+
+ /// Default destructor
+ ~MemImportLayer() = default;
+};
+
+} // namespace
diff --git a/src/armnn/test/GraphTests.cpp b/src/armnn/test/GraphTests.cpp
index 7950ec49f4..7bd6aac98b 100644
--- a/src/armnn/test/GraphTests.cpp
+++ b/src/armnn/test/GraphTests.cpp
@@ -495,13 +495,13 @@ struct CopyLayersFixture
// Set the memory strategies - for this test should be DirectCompatibility for same backends,
// and CopyToTarget for different backends
- inputLayer->GetOutputSlot(0).SetMemoryStrategy(0, MemoryStrategy::DirectCompatibility);
- convLayer1->GetOutputSlot(0).SetMemoryStrategy(0, MemoryStrategy::CopyToTarget);
- convLayer1->GetOutputSlot(0).SetMemoryStrategy(1, MemoryStrategy::DirectCompatibility);
- convLayer2->GetOutputSlot(0).SetMemoryStrategy(0, MemoryStrategy::CopyToTarget);
- concatLayer->GetOutputSlot(0).SetMemoryStrategy(0, MemoryStrategy::DirectCompatibility);
- actLayer->GetOutputSlot(0).SetMemoryStrategy(0, MemoryStrategy::DirectCompatibility);
- softmaxLayer->GetOutputSlot(0).SetMemoryStrategy(0, MemoryStrategy::CopyToTarget);
+ inputLayer->GetOutputSlot(0).SetEdgeStrategy(0, EdgeStrategy::DirectCompatibility);
+ convLayer1->GetOutputSlot(0).SetEdgeStrategy(0, EdgeStrategy::CopyToTarget);
+ convLayer1->GetOutputSlot(0).SetEdgeStrategy(1, EdgeStrategy::DirectCompatibility);
+ convLayer2->GetOutputSlot(0).SetEdgeStrategy(0, EdgeStrategy::CopyToTarget);
+ concatLayer->GetOutputSlot(0).SetEdgeStrategy(0, EdgeStrategy::DirectCompatibility);
+ actLayer->GetOutputSlot(0).SetEdgeStrategy(0, EdgeStrategy::DirectCompatibility);
+ softmaxLayer->GetOutputSlot(0).SetEdgeStrategy(0, EdgeStrategy::CopyToTarget);
}
armnn::TensorInfo m_TensorDesc;
@@ -529,7 +529,7 @@ BOOST_FIXTURE_TEST_CASE(AddCopyLayers, CopyLayersFixture)
{
InitialiseTestGraph();
const armnn::Graph origGraph(m_Graph);
- m_Graph.AddCopyLayers(m_Backends, m_FactoryRegistry);
+ m_Graph.AddCompatibilityLayers(m_Backends, m_FactoryRegistry);
TestGraphAfterAddingCopyLayers(m_Graph, origGraph);
}
@@ -537,13 +537,13 @@ BOOST_FIXTURE_TEST_CASE(AddCopyLayers, CopyLayersFixture)
BOOST_FIXTURE_TEST_CASE(AddCopyLayersSeveralTimes, CopyLayersFixture)
{
InitialiseTestGraph();
- m_Graph.AddCopyLayers(m_Backends, m_FactoryRegistry);
+ m_Graph.AddCompatibilityLayers(m_Backends, m_FactoryRegistry);
- // Calling AddCopyLayers() several times should not change the connections.
+ // Calling AddCompatibilityLayers() several times should not change the connections.
const std::vector<Edge> edges = GetEdgeList(m_Graph);
for (int i = 0; i < 4; ++i)
{
- m_Graph.AddCopyLayers(m_Backends, m_FactoryRegistry);
+ m_Graph.AddCompatibilityLayers(m_Backends, m_FactoryRegistry);
const std::vector<Edge> otherEdges = GetEdgeList(m_Graph);
BOOST_TEST((edges == otherEdges));
}
@@ -571,18 +571,18 @@ BOOST_FIXTURE_TEST_CASE(CopyLayersAddedBetweenSameLayersHaveDifferentNames, Copy
splitterLayer->GetOutputSlot(1).Connect(additionLayer->GetInputSlot(1));
additionLayer->GetOutputSlot(0).Connect(outputLayer->GetInputSlot(0));
- inputLayer->GetOutputSlot(0).SetMemoryStrategy(0, armnn::MemoryStrategy::DirectCompatibility);
- splitterLayer->GetOutputSlot(0).SetMemoryStrategy(0, armnn::MemoryStrategy::CopyToTarget);
- splitterLayer->GetOutputSlot(1).SetMemoryStrategy(0, armnn::MemoryStrategy::CopyToTarget);
- additionLayer->GetOutputSlot(0).SetMemoryStrategy(0, armnn::MemoryStrategy::DirectCompatibility);
+ inputLayer->GetOutputSlot(0).SetEdgeStrategy(0, armnn::EdgeStrategy::DirectCompatibility);
+ splitterLayer->GetOutputSlot(0).SetEdgeStrategy(0, armnn::EdgeStrategy::CopyToTarget);
+ splitterLayer->GetOutputSlot(1).SetEdgeStrategy(0, armnn::EdgeStrategy::CopyToTarget);
+ additionLayer->GetOutputSlot(0).SetEdgeStrategy(0, armnn::EdgeStrategy::DirectCompatibility);
- graph.AddCopyLayers(m_Backends, m_FactoryRegistry);
+ graph.AddCompatibilityLayers(m_Backends, m_FactoryRegistry);
std::vector<Edge> edges = GetEdgeList(graph);
BOOST_CHECK(edges.size() == 6u);
std::sort(edges.begin(), edges.end());
auto last = std::unique(edges.begin(), edges.end());
- BOOST_CHECK_MESSAGE(last == edges.end(), "Found duplicated edges after AddCopyLayers()");
+ BOOST_CHECK_MESSAGE(last == edges.end(), "Found duplicated edges after AddCompatibilityLayers()");
}
BOOST_AUTO_TEST_CASE(DuplicateLayerNames)
diff --git a/src/armnn/test/TensorHandleStrategyTest.cpp b/src/armnn/test/TensorHandleStrategyTest.cpp
index 3bb1c68169..c391b04d97 100644
--- a/src/armnn/test/TensorHandleStrategyTest.cpp
+++ b/src/armnn/test/TensorHandleStrategyTest.cpp
@@ -50,9 +50,11 @@ public:
return nullptr;
}
- virtual const FactoryId GetId() const override { return m_Id; }
+ const FactoryId GetId() const override { return m_Id; }
- virtual bool SupportsSubTensors() const override { return true; }
+ bool SupportsSubTensors() const override { return true; }
+
+ MemorySourceFlags GetExportFlags() const override { return 1; }
private:
FactoryId m_Id = "UninitializedId";
@@ -60,6 +62,38 @@ private:
std::weak_ptr<IMemoryManager> m_MemMgr;
};
+class TestFactoryImport : public ITensorHandleFactory
+{
+public:
+ TestFactoryImport(std::weak_ptr<IMemoryManager> mgr, ITensorHandleFactory::FactoryId id)
+ : m_Id(id)
+ , m_MemMgr(mgr)
+ {}
+
+ std::unique_ptr<ITensorHandle> CreateSubTensorHandle(ITensorHandle& parent,
+ TensorShape const& subTensorShape,
+ unsigned int const* subTensorOrigin) const override
+ {
+ return nullptr;
+ }
+
+ std::unique_ptr<ITensorHandle> CreateTensorHandle(const TensorInfo& tensorInfo) const override
+ {
+ return nullptr;
+ }
+
+ const FactoryId GetId() const override { return m_Id; }
+
+ bool SupportsSubTensors() const override { return true; }
+
+ MemorySourceFlags GetImportFlags() const override { return 1; }
+
+private:
+ FactoryId m_Id = "ImporterId";
+
+ std::weak_ptr<IMemoryManager> m_MemMgr;
+};
+
class TestBackendA : public IBackendInternal
{
public:
@@ -173,6 +207,42 @@ private:
BackendId m_Id = "BackendC";
};
+class TestBackendD : public IBackendInternal
+{
+public:
+ TestBackendD() = default;
+
+ const BackendId& GetId() const override { return m_Id; }
+
+ IWorkloadFactoryPtr CreateWorkloadFactory(const IMemoryManagerSharedPtr& memoryManager = nullptr) const override
+ {
+ return IWorkloadFactoryPtr{};
+ }
+
+ IBackendInternal::ILayerSupportSharedPtr GetLayerSupport() const override
+ {
+ return ILayerSupportSharedPtr{};
+ }
+
+ std::vector<ITensorHandleFactory::FactoryId> GetHandleFactoryPreferences() const override
+ {
+ return std::vector<ITensorHandleFactory::FactoryId>{
+ "TestHandleFactoryD1"
+ };
+ }
+
+ void RegisterTensorHandleFactories(TensorHandleFactoryRegistry& registry) override
+ {
+ auto mgr = std::make_shared<TestMemMgr>();
+
+ registry.RegisterMemoryManager(mgr);
+ registry.RegisterFactory(std::make_unique<TestFactoryImport>(mgr, "TestHandleFactoryD1"));
+ }
+
+private:
+ BackendId m_Id = "BackendD";
+};
+
BOOST_AUTO_TEST_SUITE(TensorHandle)
@@ -200,16 +270,19 @@ BOOST_AUTO_TEST_CASE(TensorHandleSelectionStrategy)
auto backendA = std::make_unique<TestBackendA>();
auto backendB = std::make_unique<TestBackendB>();
auto backendC = std::make_unique<TestBackendC>();
+ auto backendD = std::make_unique<TestBackendD>();
TensorHandleFactoryRegistry registry;
backendA->RegisterTensorHandleFactories(registry);
backendB->RegisterTensorHandleFactories(registry);
backendC->RegisterTensorHandleFactories(registry);
+ backendD->RegisterTensorHandleFactories(registry);
BackendsMap backends;
backends["BackendA"] = std::move(backendA);
backends["BackendB"] = std::move(backendB);
backends["BackendC"] = std::move(backendC);
+ backends["BackendD"] = std::move(backendD);
armnn::Graph graph;
@@ -226,13 +299,17 @@ BOOST_AUTO_TEST_CASE(TensorHandleSelectionStrategy)
armnn::SoftmaxLayer* const softmaxLayer3 = graph.AddLayer<armnn::SoftmaxLayer>(smDesc, "softmax3");
softmaxLayer3->SetBackendId("BackendC");
+ armnn::SoftmaxLayer* const softmaxLayer4 = graph.AddLayer<armnn::SoftmaxLayer>(smDesc, "softmax4");
+ softmaxLayer4->SetBackendId("BackendD");
+
armnn::OutputLayer* const outputLayer = graph.AddLayer<armnn::OutputLayer>(0, "output");
outputLayer->SetBackendId("BackendA");
inputLayer->GetOutputSlot(0).Connect(softmaxLayer1->GetInputSlot(0));
softmaxLayer1->GetOutputSlot(0).Connect(softmaxLayer2->GetInputSlot(0));
softmaxLayer2->GetOutputSlot(0).Connect(softmaxLayer3->GetInputSlot(0));
- softmaxLayer3->GetOutputSlot(0).Connect(outputLayer->GetInputSlot(0));
+ softmaxLayer3->GetOutputSlot(0).Connect(softmaxLayer4->GetInputSlot(0));
+ softmaxLayer4->GetOutputSlot(0).Connect(outputLayer->GetInputSlot(0));
graph.TopologicalSort();
@@ -246,29 +323,45 @@ BOOST_AUTO_TEST_CASE(TensorHandleSelectionStrategy)
OutputSlot& softmaxLayer1Out = softmaxLayer1->GetOutputSlot(0);
OutputSlot& softmaxLayer2Out = softmaxLayer2->GetOutputSlot(0);
OutputSlot& softmaxLayer3Out = softmaxLayer3->GetOutputSlot(0);
+ OutputSlot& softmaxLayer4Out = softmaxLayer4->GetOutputSlot(0);
// Check that the correct factory was selected
BOOST_TEST(inputLayerOut.GetTensorHandleFactoryId() == "TestHandleFactoryA1");
BOOST_TEST(softmaxLayer1Out.GetTensorHandleFactoryId() == "TestHandleFactoryB1");
BOOST_TEST(softmaxLayer2Out.GetTensorHandleFactoryId() == "TestHandleFactoryB1");
BOOST_TEST(softmaxLayer3Out.GetTensorHandleFactoryId() == "TestHandleFactoryC1");
+ BOOST_TEST(softmaxLayer4Out.GetTensorHandleFactoryId() == "TestHandleFactoryD1");
// Check that the correct strategy was selected
- BOOST_TEST((inputLayerOut.GetMemoryStrategyForConnection(0) == MemoryStrategy::DirectCompatibility));
- BOOST_TEST((softmaxLayer1Out.GetMemoryStrategyForConnection(0) == MemoryStrategy::DirectCompatibility));
- BOOST_TEST((softmaxLayer2Out.GetMemoryStrategyForConnection(0) == MemoryStrategy::CopyToTarget));
- BOOST_TEST((softmaxLayer3Out.GetMemoryStrategyForConnection(0) == MemoryStrategy::DirectCompatibility));
-
- graph.AddCopyLayers(backends, registry);
- int count= 0;
- graph.ForEachLayer([&count](Layer* layer)
+ BOOST_TEST((inputLayerOut.GetEdgeStrategyForConnection(0) == EdgeStrategy::DirectCompatibility));
+ BOOST_TEST((softmaxLayer1Out.GetEdgeStrategyForConnection(0) == EdgeStrategy::DirectCompatibility));
+ BOOST_TEST((softmaxLayer2Out.GetEdgeStrategyForConnection(0) == EdgeStrategy::CopyToTarget));
+ BOOST_TEST((softmaxLayer3Out.GetEdgeStrategyForConnection(0) == EdgeStrategy::ExportToTarget));
+ BOOST_TEST((softmaxLayer4Out.GetEdgeStrategyForConnection(0) == EdgeStrategy::DirectCompatibility));
+
+ graph.AddCompatibilityLayers(backends, registry);
+
+ // Test for copy layers
+ int copyCount= 0;
+ graph.ForEachLayer([&copyCount](Layer* layer)
{
if (layer->GetType() == LayerType::MemCopy)
{
- count++;
+ copyCount++;
+ }
+ });
+ BOOST_TEST(copyCount == 1);
+
+ // Test for import layers
+ int importCount= 0;
+ graph.ForEachLayer([&importCount](Layer *layer)
+ {
+ if (layer->GetType() == LayerType::MemImport)
+ {
+ importCount++;
}
});
- BOOST_TEST(count == 1);
+ BOOST_TEST(importCount == 1);
}
BOOST_AUTO_TEST_SUITE_END()