aboutsummaryrefslogtreecommitdiff
path: root/src/armnn
diff options
context:
space:
mode:
Diffstat (limited to 'src/armnn')
-rw-r--r--src/armnn/Layer.cpp12
-rw-r--r--src/armnn/Layer.hpp3
-rw-r--r--src/armnn/LoadedNetwork.cpp31
-rw-r--r--src/armnn/Network.cpp75
-rw-r--r--src/armnn/layers/ConcatLayer.cpp13
-rw-r--r--src/armnn/layers/ConcatLayer.hpp3
-rw-r--r--src/armnn/layers/OutputLayer.hpp5
-rw-r--r--src/armnn/layers/SplitterLayer.cpp13
-rw-r--r--src/armnn/layers/SplitterLayer.hpp3
-rw-r--r--src/armnn/test/TensorHandleStrategyTest.cpp8
10 files changed, 94 insertions, 72 deletions
diff --git a/src/armnn/Layer.cpp b/src/armnn/Layer.cpp
index e0d988d8ea..7761063650 100644
--- a/src/armnn/Layer.cpp
+++ b/src/armnn/Layer.cpp
@@ -249,8 +249,7 @@ void Layer::SetAdditionalInfo(QueueDescriptor& descriptor) const
void Layer::CreateTensorHandles(const TensorHandleFactoryRegistry& registry,
const IWorkloadFactory& workloadFactory,
- const bool IsMemoryManaged,
- MemorySource memSource)
+ const bool IsMemoryManaged)
{
for (unsigned int idx=0; idx < GetNumOutputSlots(); idx++)
{
@@ -266,14 +265,7 @@ void Layer::CreateTensorHandles(const TensorHandleFactoryRegistry& registry,
else
{
ITensorHandleFactory* handleFactory;
- if (memSource == MemorySource::Undefined )
- {
- handleFactory = registry.GetFactory(factoryId);
- }
- else
- {
- handleFactory = registry.GetFactory(factoryId, memSource);
- }
+ handleFactory = registry.GetFactory(factoryId);
ARMNN_ASSERT(handleFactory);
handler.CreateTensorHandles(*handleFactory, IsMemoryManaged);
}
diff --git a/src/armnn/Layer.hpp b/src/armnn/Layer.hpp
index 76f9b41f4c..0e0883c1cd 100644
--- a/src/armnn/Layer.hpp
+++ b/src/armnn/Layer.hpp
@@ -275,8 +275,7 @@ public:
virtual void CreateTensorHandles(const TensorHandleFactoryRegistry& registry,
const IWorkloadFactory& factory,
- const bool IsMemoryManaged = true,
- MemorySource memSource = MemorySource::Undefined);
+ const bool IsMemoryManaged = true);
/// Creates a dynamically-allocated copy of this layer.
/// @param graph - The Graph into which this Layer is being cloned.
diff --git a/src/armnn/LoadedNetwork.cpp b/src/armnn/LoadedNetwork.cpp
index 67de00f0f3..53a9e18863 100644
--- a/src/armnn/LoadedNetwork.cpp
+++ b/src/armnn/LoadedNetwork.cpp
@@ -150,7 +150,9 @@ LoadedNetwork::LoadedNetwork(std::unique_ptr<IOptimizedNetwork> net,
if (backend->SupportsTensorAllocatorAPI())
{
auto workloadFactory = backend->CreateWorkloadFactory(
- m_TensorHandleFactoryRegistry, m_OptimizedNetwork->pOptimizedNetworkImpl->GetModelOptions());
+ m_TensorHandleFactoryRegistry, m_OptimizedNetwork->pOptimizedNetworkImpl->GetModelOptions(),
+ static_cast<MemorySourceFlags>(m_NetworkProperties.m_InputSource),
+ static_cast<MemorySourceFlags>(m_NetworkProperties.m_OutputSource));
m_WorkloadFactories.emplace(
std::make_pair(backendId, std::make_pair(std::move(workloadFactory), nullptr)));
}
@@ -188,8 +190,7 @@ LoadedNetwork::LoadedNetwork(std::unique_ptr<IOptimizedNetwork> net,
// to false when creating TensorHandles
layer->CreateTensorHandles(m_TensorHandleFactoryRegistry,
workloadFactory,
- !m_NetworkProperties.m_ImportEnabled,
- m_NetworkProperties.m_InputSource);
+ !m_NetworkProperties.m_ImportEnabled);
break;
}
default:
@@ -202,8 +203,7 @@ LoadedNetwork::LoadedNetwork(std::unique_ptr<IOptimizedNetwork> net,
{
layer->CreateTensorHandles(m_TensorHandleFactoryRegistry,
workloadFactory,
- !m_NetworkProperties.m_ExportEnabled,
- m_NetworkProperties.m_OutputSource);
+ !m_NetworkProperties.m_ExportEnabled);
}
else
{
@@ -643,12 +643,12 @@ void LoadedNetwork::EnqueueInput(const BindableLayer& layer, ITensorHandle* tens
bool needMemCopy = true;
if (m_NetworkProperties.m_ImportEnabled) // Try import the input tensor
{
- if(CheckFlag(importFlags, MemorySource::Malloc) )
+ if(CheckFlag(importFlags, m_NetworkProperties.m_InputSource))
{
needMemCopy = false;
// This assumes a CPU Tensor handle
void* mem = tensorHandle->Map(false);
- if (outputTensorHandle->Import(mem, MemorySource::Malloc))
+ if (outputTensorHandle->Import(mem, m_NetworkProperties.m_InputSource))
{
tensorHandle->Unmap();
return; // No need for a workload since the import has been done.
@@ -718,11 +718,11 @@ void LoadedNetwork::EnqueueOutput(const BindableLayer& layer, ITensorHandle* ten
if(layer.GetInputSlots()[0].GetConnectedOutputSlot()->GetOwningLayer().GetType() != LayerType::Input)
{
MemorySourceFlags importFlags = inputTensorHandle->GetImportFlags();
- if (CheckFlag(importFlags, MemorySource::Malloc))
+ if (CheckFlag(importFlags, m_NetworkProperties.m_OutputSource))
{
needMemCopy = false;
void *mem = tensorHandle->Map(false);
- bool importOk = inputTensorHandle->Import(mem, MemorySource::Malloc);
+ bool importOk = inputTensorHandle->Import(mem, m_NetworkProperties.m_OutputSource);
tensorHandle->Unmap();
if (importOk)
@@ -1013,7 +1013,7 @@ void LoadedNetwork::EnqueueInput(const BindableLayer& layer,
MemorySourceFlags importFlags = descriptor.m_Outputs[0]->GetImportFlags();
if (m_NetworkProperties.m_ImportEnabled) // Try import the input tensor
{
- if (CheckFlag(importFlags, MemorySource::Malloc) )
+ if (CheckFlag(importFlags, m_NetworkProperties.m_InputSource) )
{
// This assumes a CPU Tensor handle
std::unique_ptr<ITensorHandle> tensorHandle =
@@ -1021,7 +1021,7 @@ void LoadedNetwork::EnqueueInput(const BindableLayer& layer,
inputTensor.GetMemoryArea());
void* mem = tensorHandle->Map(false);
- if (descriptor.m_Outputs[0]->Import(mem, MemorySource::Malloc))
+ if (descriptor.m_Outputs[0]->Import(mem, m_NetworkProperties.m_InputSource))
{
tensorHandle->Unmap();
return;
@@ -1078,14 +1078,14 @@ void LoadedNetwork::EnqueueOutput(const BindableLayer& layer, const Tensor& outp
if (layer.GetInputSlots()[0].GetConnectedOutputSlot()->GetOwningLayer().GetType() != LayerType::Input)
{
MemorySourceFlags importFlags = inputTensorHandle->GetImportFlags();
- if (CheckFlag(importFlags, MemorySource::Malloc))
+ if (CheckFlag(importFlags, m_NetworkProperties.m_OutputSource))
{
std::unique_ptr<ITensorHandle> tensorHandle =
std::make_unique<PassthroughTensorHandle>(outputTensor.GetInfo(),
outputTensor.GetMemoryArea());
void* mem = tensorHandle->Map(false);
- bool importOk = inputTensorHandle->Import(mem, MemorySource::Malloc);
+ bool importOk = inputTensorHandle->Import(mem, m_NetworkProperties.m_OutputSource);
tensorHandle->Unmap();
if (importOk)
@@ -1270,7 +1270,10 @@ std::unique_ptr<IWorkingMemHandle> LoadedNetwork::CreateWorkingMemHandle(Network
{
if (backend.second->SupportsTensorAllocatorAPI())
{
- backend.second->RegisterTensorHandleFactories(tensorHandleFactoryRegistry);
+ backend.second->RegisterTensorHandleFactories(
+ tensorHandleFactoryRegistry,
+ static_cast<MemorySourceFlags>(m_NetworkProperties.m_InputSource),
+ static_cast<MemorySourceFlags>(m_NetworkProperties.m_OutputSource));
memoryManagers.emplace_back(tensorHandleFactoryRegistry.GetMemoryManagers().back());
}
else
diff --git a/src/armnn/Network.cpp b/src/armnn/Network.cpp
index b79576c87e..f097e677d7 100644
--- a/src/armnn/Network.cpp
+++ b/src/armnn/Network.cpp
@@ -1165,7 +1165,8 @@ bool RequiresCopy(ITensorHandleFactory::FactoryId src,
// Find the handle factory for the input layer which results in fewest required copies.
ITensorHandleFactory::FactoryId CalculateSlotOptionForInput(BackendsMap& backends,
OutputSlot& slot,
- TensorHandleFactoryRegistry& registry)
+ TensorHandleFactoryRegistry& registry,
+ bool importEnabled)
{
Layer& layer = slot.GetOwningLayer();
ARMNN_ASSERT(layer.GetType() == LayerType::Input);
@@ -1191,6 +1192,7 @@ ITensorHandleFactory::FactoryId CalculateSlotOptionForInput(BackendsMap& backend
for (auto&& connection : slot.GetConnections())
{
+
const Layer& connectedLayer = connection->GetOwningLayer();
auto toBackend = backends.find(connectedLayer.GetBackendId());
@@ -1208,11 +1210,12 @@ ITensorHandleFactory::FactoryId CalculateSlotOptionForInput(BackendsMap& backend
// Input layers use the mem copy workload or import, so the selected factory must
// support either the map/unmap API or Import API
ITensorHandleFactory* factory = registry.GetFactory(dst);
- if (!factory->SupportsMapUnmap() &&
- !CheckFlag(factory->GetImportFlags(), MemorySource::Malloc)) // Just support cpu mem imports for now
+ if (importEnabled && factory->GetImportFlags() == 0)
+ {
+ continue;
+ }
+ else if (!importEnabled && !factory->SupportsMapUnmap())
{
- // The current tensor handle factory does not support the map/unmap or import
- // strategy, move to the next one
continue;
}
@@ -1257,7 +1260,8 @@ ITensorHandleFactory::FactoryId CalculateSlotOptionForOutput(BackendsMap& backen
// when considering all connections.
ITensorHandleFactory::FactoryId CalculateSlotOption(BackendsMap& backends,
OutputSlot& outputSlot,
- TensorHandleFactoryRegistry& registry)
+ TensorHandleFactoryRegistry& registry,
+ bool importEnabled)
{
// First ensure the from backends can support the TensorHandeAPI
Layer& layer = outputSlot.GetOwningLayer();
@@ -1268,14 +1272,13 @@ ITensorHandleFactory::FactoryId CalculateSlotOption(BackendsMap& backends,
return ITensorHandleFactory::LegacyFactoryId;
}
- // Connections to Output Layers requires support for map/unmap on the TensorHandle.
- bool requiresMapUnmap = false;
+ bool outputConnection = false;
for (auto&& connection : outputSlot.GetConnections())
{
const Layer& connectedLayer = connection->GetOwningLayer();
if (connectedLayer.GetType() == LayerType::Output)
{
- requiresMapUnmap = true;
+ outputConnection = true;
}
}
@@ -1286,8 +1289,48 @@ ITensorHandleFactory::FactoryId CalculateSlotOption(BackendsMap& backends,
std::map<ITensorHandleFactory::FactoryId, int> factoryScores;
for (auto&& pref : srcPrefs)
{
- if (requiresMapUnmap) // Only consider factories that support map/unmap if required
+ if (importEnabled)
+ {
+ ITensorHandleFactory* factory = registry.GetFactory(pref);
+ if (outputConnection)
+ {
+ // Check if this is fallback case
+ bool fallbackConnection = false;
+ for (auto&& inputSlot : layer.GetInputSlots())
+ {
+ if (inputSlot.GetConnectedOutputSlot()->GetOwningLayer().GetBackendId() != layer.GetBackendId())
+ {
+ fallbackConnection = true;
+ }
+ }
+ if (fallbackConnection)
+ {
+ auto factoryCap = factory->GetCapabilities(&layer, &layer, CapabilityClass::FallbackImportDisabled);
+ // Cannot use factory import if fallback import is not supported.
+ if (!factoryCap.empty())
+ {
+ continue;
+ }
+ }
+ else if (factory->GetExportFlags() == 0)
+ {
+ continue;
+ }
+ }
+ if (!outputConnection)
+ {
+ auto factoryCap = factory->GetCapabilities(&layer, &layer, CapabilityClass::FallbackImportDisabled);
+ // Cannot use factory import if fallback import is not supported.
+ if (!factoryCap.empty())
+ {
+ continue;
+ }
+ }
+
+ }
+ else
{
+ // Only consider factories that support map/unmap
ITensorHandleFactory* factory = registry.GetFactory(pref);
if (!factory->SupportsMapUnmap())
{
@@ -1296,6 +1339,7 @@ ITensorHandleFactory::FactoryId CalculateSlotOption(BackendsMap& backends,
}
}
+
auto it = factoryScores.find(pref);
if (it == factoryScores.end())
{
@@ -1417,15 +1461,18 @@ EdgeStrategy CalculateEdgeStrategy(BackendsMap& backends,
if (!dstFactory) {
continue;
}
-
if ((dstFactory->GetImportFlags() & srcFactory->GetExportFlags()) != 0)
{
auto srcCapability = srcFactory->GetCapabilities(&layer, &layer, CapabilityClass::PaddingRequired);
auto dstCapability = dstFactory->GetCapabilities(&connectedLayer,
&connectedLayer,
CapabilityClass::PaddingRequired);
+ auto srcFallback = srcFactory->GetCapabilities(&layer, &layer, CapabilityClass::FallbackImportDisabled);
+ auto dstFallback = dstFactory->GetCapabilities(&connectedLayer,
+ &connectedLayer,
+ CapabilityClass::FallbackImportDisabled);
// Do not require memory copy if the source and destination do not require padding.
- if (srcCapability.empty() && dstCapability.empty())
+ if (srcCapability.empty() && dstCapability.empty() && srcFallback.empty() && dstFallback.empty())
{
return EdgeStrategy::ExportToTarget;
}
@@ -1477,13 +1524,13 @@ OptimizationResult SelectTensorHandleStrategy(Graph& optGraph,
switch(layer->GetType())
{
case LayerType::Input:
- slotOption = CalculateSlotOptionForInput(backends, outputSlot, registry);
+ slotOption = CalculateSlotOptionForInput(backends, outputSlot, registry, importEnabled);
break;
case LayerType::Output:
slotOption = CalculateSlotOptionForOutput(backends, outputSlot, registry);
break;
default:
- slotOption = CalculateSlotOption(backends, outputSlot, registry);
+ slotOption = CalculateSlotOption(backends, outputSlot, registry, importEnabled);
break;
}
outputSlot.SetTensorHandleFactory(slotOption);
diff --git a/src/armnn/layers/ConcatLayer.cpp b/src/armnn/layers/ConcatLayer.cpp
index 3a20e1b3f6..238fdb66d9 100644
--- a/src/armnn/layers/ConcatLayer.cpp
+++ b/src/armnn/layers/ConcatLayer.cpp
@@ -179,8 +179,7 @@ void ConcatLayer::CreateTensors(const TensorHandleFactoryRegistry& registry,
void ConcatLayer::CreateTensorHandles(const TensorHandleFactoryRegistry& registry,
const IWorkloadFactory& workloadFactory,
- const bool isMemoryManaged,
- MemorySource memSource)
+ const bool isMemoryManaged)
{
OutputSlot& slot = GetOutputSlot(0);
ITensorHandleFactory::FactoryId factoryId = slot.GetTensorHandleFactoryId();
@@ -191,15 +190,7 @@ void ConcatLayer::CreateTensorHandles(const TensorHandleFactoryRegistry& registr
}
else
{
- ITensorHandleFactory* handleFactory;
- if (memSource == MemorySource::Undefined)
- {
- handleFactory = registry.GetFactory(factoryId);
- }
- else
- {
- handleFactory = registry.GetFactory(factoryId, memSource);
- }
+ ITensorHandleFactory* handleFactory = registry.GetFactory(factoryId);
ARMNN_ASSERT(handleFactory);
CreateTensors(registry, *handleFactory, isMemoryManaged);
}
diff --git a/src/armnn/layers/ConcatLayer.hpp b/src/armnn/layers/ConcatLayer.hpp
index 6a43318382..4315d66436 100644
--- a/src/armnn/layers/ConcatLayer.hpp
+++ b/src/armnn/layers/ConcatLayer.hpp
@@ -27,8 +27,7 @@ public:
/// @param [in] MemorySource Determine the source of memory e.g Malloc
virtual void CreateTensorHandles(const TensorHandleFactoryRegistry& registry,
const IWorkloadFactory& factory,
- const bool IsMemoryManaged = true,
- MemorySource memSource = MemorySource::Undefined) override;
+ const bool IsMemoryManaged = true) override;
/// Creates a dynamically-allocated copy of this layer.
/// @param [in] graph The graph into which this layer is being cloned.
diff --git a/src/armnn/layers/OutputLayer.hpp b/src/armnn/layers/OutputLayer.hpp
index fc6a8aa6b2..408a28a6f3 100644
--- a/src/armnn/layers/OutputLayer.hpp
+++ b/src/armnn/layers/OutputLayer.hpp
@@ -26,10 +26,9 @@ public:
/// @param [in] IsMemoryManaged Determine whether or not to assign a memory manager during creation
virtual void CreateTensorHandles(const TensorHandleFactoryRegistry& registry,
const IWorkloadFactory& factory,
- const bool isMemoryManaged = true,
- MemorySource memSource = MemorySource::Undefined) override
+ const bool isMemoryManaged = true) override
{
- IgnoreUnused(registry, factory, isMemoryManaged, memSource);
+ IgnoreUnused(registry, factory, isMemoryManaged);
}
/// Creates a dynamically-allocated copy of this layer.
diff --git a/src/armnn/layers/SplitterLayer.cpp b/src/armnn/layers/SplitterLayer.cpp
index adef9aa1a2..5e6622e13a 100644
--- a/src/armnn/layers/SplitterLayer.cpp
+++ b/src/armnn/layers/SplitterLayer.cpp
@@ -177,8 +177,7 @@ void SplitterLayer::CreateTensors(const TensorHandleFactoryRegistry& registry,
void SplitterLayer::CreateTensorHandles(const TensorHandleFactoryRegistry& registry,
const IWorkloadFactory& workloadFactory,
- const bool isMemoryManaged,
- MemorySource memSource)
+ const bool isMemoryManaged)
{
OutputSlot& slot = GetOutputSlot(0);
ITensorHandleFactory::FactoryId factoryId = slot.GetTensorHandleFactoryId();
@@ -189,15 +188,7 @@ void SplitterLayer::CreateTensorHandles(const TensorHandleFactoryRegistry& regis
}
else
{
- ITensorHandleFactory* handleFactory;
- if (memSource == MemorySource::Undefined)
- {
- handleFactory = registry.GetFactory(factoryId);
- }
- else
- {
- handleFactory = registry.GetFactory(factoryId, memSource);
- }
+ ITensorHandleFactory* handleFactory = registry.GetFactory(factoryId);
ARMNN_ASSERT(handleFactory);
CreateTensors(registry, *handleFactory, isMemoryManaged);
}
diff --git a/src/armnn/layers/SplitterLayer.hpp b/src/armnn/layers/SplitterLayer.hpp
index 075b136da9..f90696b1ad 100644
--- a/src/armnn/layers/SplitterLayer.hpp
+++ b/src/armnn/layers/SplitterLayer.hpp
@@ -26,8 +26,7 @@ public:
/// @param [in] IsMemoryManaged Determine whether or not to assign a memory manager during creation
virtual void CreateTensorHandles(const TensorHandleFactoryRegistry& registry,
const IWorkloadFactory& factory,
- const bool IsMemoryManaged = true,
- MemorySource memSource = MemorySource::Undefined) override;
+ const bool IsMemoryManaged = true) override;
/// Creates a dynamically-allocated copy of this layer.
/// @param [in] graph The graph into which this layer is being cloned.
diff --git a/src/armnn/test/TensorHandleStrategyTest.cpp b/src/armnn/test/TensorHandleStrategyTest.cpp
index c7aa30f701..47d0666414 100644
--- a/src/armnn/test/TensorHandleStrategyTest.cpp
+++ b/src/armnn/test/TensorHandleStrategyTest.cpp
@@ -139,7 +139,8 @@ public:
{
"TestHandleFactoryA1",
"TestHandleFactoryA2",
- "TestHandleFactoryB1"
+ "TestHandleFactoryB1",
+ "TestHandleFactoryD1"
};
}
@@ -252,7 +253,7 @@ public:
std::vector<ITensorHandleFactory::FactoryId> GetHandleFactoryPreferences() const override
{
return std::vector<ITensorHandleFactory::FactoryId>{
- "TestHandleFactoryD1"
+ "TestHandleFactoryD1",
};
}
@@ -279,6 +280,7 @@ BOOST_AUTO_TEST_CASE(RegisterFactories)
BOOST_TEST(backendA.GetHandleFactoryPreferences()[0] == "TestHandleFactoryA1");
BOOST_TEST(backendA.GetHandleFactoryPreferences()[1] == "TestHandleFactoryA2");
BOOST_TEST(backendA.GetHandleFactoryPreferences()[2] == "TestHandleFactoryB1");
+ BOOST_TEST(backendA.GetHandleFactoryPreferences()[3] == "TestHandleFactoryD1");
TensorHandleFactoryRegistry registry;
backendA.RegisterTensorHandleFactories(registry);
@@ -351,7 +353,7 @@ BOOST_AUTO_TEST_CASE(TensorHandleSelectionStrategy)
OutputSlot& softmaxLayer4Out = softmaxLayer4->GetOutputSlot(0);
// Check that the correct factory was selected
- BOOST_TEST(inputLayerOut.GetTensorHandleFactoryId() == "TestHandleFactoryA1");
+ BOOST_TEST(inputLayerOut.GetTensorHandleFactoryId() == "TestHandleFactoryD1");
BOOST_TEST(softmaxLayer1Out.GetTensorHandleFactoryId() == "TestHandleFactoryB1");
BOOST_TEST(softmaxLayer2Out.GetTensorHandleFactoryId() == "TestHandleFactoryB1");
BOOST_TEST(softmaxLayer3Out.GetTensorHandleFactoryId() == "TestHandleFactoryC1");