diff options
author | Narumol Prangnawarat <narumol.prangnawarat@arm.com> | 2021-05-07 17:52:36 +0100 |
---|---|---|
committer | Narumol Prangnawarat <narumol.prangnawarat@arm.com> | 2021-05-08 20:15:32 +0100 |
commit | e5f0b2409c2e557a5a78e2f4659d203154289b23 (patch) | |
tree | 0e32680ed15ed5157c78d5deeabda2c0ceeeb4a3 /src/armnn | |
parent | ae12306486efc55293a40048618abe5e8b19151b (diff) | |
download | armnn-e5f0b2409c2e557a5a78e2f4659d203154289b23.tar.gz |
IVGCVSW-5818 Enable import on GPU
Signed-off-by: Narumol Prangnawarat <narumol.prangnawarat@arm.com>
Change-Id: I4e4eb107aa2bfa09625840d738001f33152e6792
Diffstat (limited to 'src/armnn')
-rw-r--r-- | src/armnn/Layer.cpp | 12 | ||||
-rw-r--r-- | src/armnn/Layer.hpp | 3 | ||||
-rw-r--r-- | src/armnn/LoadedNetwork.cpp | 31 | ||||
-rw-r--r-- | src/armnn/Network.cpp | 75 | ||||
-rw-r--r-- | src/armnn/layers/ConcatLayer.cpp | 13 | ||||
-rw-r--r-- | src/armnn/layers/ConcatLayer.hpp | 3 | ||||
-rw-r--r-- | src/armnn/layers/OutputLayer.hpp | 5 | ||||
-rw-r--r-- | src/armnn/layers/SplitterLayer.cpp | 13 | ||||
-rw-r--r-- | src/armnn/layers/SplitterLayer.hpp | 3 | ||||
-rw-r--r-- | src/armnn/test/TensorHandleStrategyTest.cpp | 8 |
10 files changed, 94 insertions, 72 deletions
diff --git a/src/armnn/Layer.cpp b/src/armnn/Layer.cpp index e0d988d8ea..7761063650 100644 --- a/src/armnn/Layer.cpp +++ b/src/armnn/Layer.cpp @@ -249,8 +249,7 @@ void Layer::SetAdditionalInfo(QueueDescriptor& descriptor) const void Layer::CreateTensorHandles(const TensorHandleFactoryRegistry& registry, const IWorkloadFactory& workloadFactory, - const bool IsMemoryManaged, - MemorySource memSource) + const bool IsMemoryManaged) { for (unsigned int idx=0; idx < GetNumOutputSlots(); idx++) { @@ -266,14 +265,7 @@ void Layer::CreateTensorHandles(const TensorHandleFactoryRegistry& registry, else { ITensorHandleFactory* handleFactory; - if (memSource == MemorySource::Undefined ) - { - handleFactory = registry.GetFactory(factoryId); - } - else - { - handleFactory = registry.GetFactory(factoryId, memSource); - } + handleFactory = registry.GetFactory(factoryId); ARMNN_ASSERT(handleFactory); handler.CreateTensorHandles(*handleFactory, IsMemoryManaged); } diff --git a/src/armnn/Layer.hpp b/src/armnn/Layer.hpp index 76f9b41f4c..0e0883c1cd 100644 --- a/src/armnn/Layer.hpp +++ b/src/armnn/Layer.hpp @@ -275,8 +275,7 @@ public: virtual void CreateTensorHandles(const TensorHandleFactoryRegistry& registry, const IWorkloadFactory& factory, - const bool IsMemoryManaged = true, - MemorySource memSource = MemorySource::Undefined); + const bool IsMemoryManaged = true); /// Creates a dynamically-allocated copy of this layer. /// @param graph - The Graph into which this Layer is being cloned. diff --git a/src/armnn/LoadedNetwork.cpp b/src/armnn/LoadedNetwork.cpp index 67de00f0f3..53a9e18863 100644 --- a/src/armnn/LoadedNetwork.cpp +++ b/src/armnn/LoadedNetwork.cpp @@ -150,7 +150,9 @@ LoadedNetwork::LoadedNetwork(std::unique_ptr<IOptimizedNetwork> net, if (backend->SupportsTensorAllocatorAPI()) { auto workloadFactory = backend->CreateWorkloadFactory( - m_TensorHandleFactoryRegistry, m_OptimizedNetwork->pOptimizedNetworkImpl->GetModelOptions()); + m_TensorHandleFactoryRegistry, m_OptimizedNetwork->pOptimizedNetworkImpl->GetModelOptions(), + static_cast<MemorySourceFlags>(m_NetworkProperties.m_InputSource), + static_cast<MemorySourceFlags>(m_NetworkProperties.m_OutputSource)); m_WorkloadFactories.emplace( std::make_pair(backendId, std::make_pair(std::move(workloadFactory), nullptr))); } @@ -188,8 +190,7 @@ LoadedNetwork::LoadedNetwork(std::unique_ptr<IOptimizedNetwork> net, // to false when creating TensorHandles layer->CreateTensorHandles(m_TensorHandleFactoryRegistry, workloadFactory, - !m_NetworkProperties.m_ImportEnabled, - m_NetworkProperties.m_InputSource); + !m_NetworkProperties.m_ImportEnabled); break; } default: @@ -202,8 +203,7 @@ LoadedNetwork::LoadedNetwork(std::unique_ptr<IOptimizedNetwork> net, { layer->CreateTensorHandles(m_TensorHandleFactoryRegistry, workloadFactory, - !m_NetworkProperties.m_ExportEnabled, - m_NetworkProperties.m_OutputSource); + !m_NetworkProperties.m_ExportEnabled); } else { @@ -643,12 +643,12 @@ void LoadedNetwork::EnqueueInput(const BindableLayer& layer, ITensorHandle* tens bool needMemCopy = true; if (m_NetworkProperties.m_ImportEnabled) // Try import the input tensor { - if(CheckFlag(importFlags, MemorySource::Malloc) ) + if(CheckFlag(importFlags, m_NetworkProperties.m_InputSource)) { needMemCopy = false; // This assumes a CPU Tensor handle void* mem = tensorHandle->Map(false); - if (outputTensorHandle->Import(mem, MemorySource::Malloc)) + if (outputTensorHandle->Import(mem, m_NetworkProperties.m_InputSource)) { tensorHandle->Unmap(); return; // No need for a workload since the import has been done. @@ -718,11 +718,11 @@ void LoadedNetwork::EnqueueOutput(const BindableLayer& layer, ITensorHandle* ten if(layer.GetInputSlots()[0].GetConnectedOutputSlot()->GetOwningLayer().GetType() != LayerType::Input) { MemorySourceFlags importFlags = inputTensorHandle->GetImportFlags(); - if (CheckFlag(importFlags, MemorySource::Malloc)) + if (CheckFlag(importFlags, m_NetworkProperties.m_OutputSource)) { needMemCopy = false; void *mem = tensorHandle->Map(false); - bool importOk = inputTensorHandle->Import(mem, MemorySource::Malloc); + bool importOk = inputTensorHandle->Import(mem, m_NetworkProperties.m_OutputSource); tensorHandle->Unmap(); if (importOk) @@ -1013,7 +1013,7 @@ void LoadedNetwork::EnqueueInput(const BindableLayer& layer, MemorySourceFlags importFlags = descriptor.m_Outputs[0]->GetImportFlags(); if (m_NetworkProperties.m_ImportEnabled) // Try import the input tensor { - if (CheckFlag(importFlags, MemorySource::Malloc) ) + if (CheckFlag(importFlags, m_NetworkProperties.m_InputSource) ) { // This assumes a CPU Tensor handle std::unique_ptr<ITensorHandle> tensorHandle = @@ -1021,7 +1021,7 @@ void LoadedNetwork::EnqueueInput(const BindableLayer& layer, inputTensor.GetMemoryArea()); void* mem = tensorHandle->Map(false); - if (descriptor.m_Outputs[0]->Import(mem, MemorySource::Malloc)) + if (descriptor.m_Outputs[0]->Import(mem, m_NetworkProperties.m_InputSource)) { tensorHandle->Unmap(); return; @@ -1078,14 +1078,14 @@ void LoadedNetwork::EnqueueOutput(const BindableLayer& layer, const Tensor& outp if (layer.GetInputSlots()[0].GetConnectedOutputSlot()->GetOwningLayer().GetType() != LayerType::Input) { MemorySourceFlags importFlags = inputTensorHandle->GetImportFlags(); - if (CheckFlag(importFlags, MemorySource::Malloc)) + if (CheckFlag(importFlags, m_NetworkProperties.m_OutputSource)) { std::unique_ptr<ITensorHandle> tensorHandle = std::make_unique<PassthroughTensorHandle>(outputTensor.GetInfo(), outputTensor.GetMemoryArea()); void* mem = tensorHandle->Map(false); - bool importOk = inputTensorHandle->Import(mem, MemorySource::Malloc); + bool importOk = inputTensorHandle->Import(mem, m_NetworkProperties.m_OutputSource); tensorHandle->Unmap(); if (importOk) @@ -1270,7 +1270,10 @@ std::unique_ptr<IWorkingMemHandle> LoadedNetwork::CreateWorkingMemHandle(Network { if (backend.second->SupportsTensorAllocatorAPI()) { - backend.second->RegisterTensorHandleFactories(tensorHandleFactoryRegistry); + backend.second->RegisterTensorHandleFactories( + tensorHandleFactoryRegistry, + static_cast<MemorySourceFlags>(m_NetworkProperties.m_InputSource), + static_cast<MemorySourceFlags>(m_NetworkProperties.m_OutputSource)); memoryManagers.emplace_back(tensorHandleFactoryRegistry.GetMemoryManagers().back()); } else diff --git a/src/armnn/Network.cpp b/src/armnn/Network.cpp index b79576c87e..f097e677d7 100644 --- a/src/armnn/Network.cpp +++ b/src/armnn/Network.cpp @@ -1165,7 +1165,8 @@ bool RequiresCopy(ITensorHandleFactory::FactoryId src, // Find the handle factory for the input layer which results in fewest required copies. ITensorHandleFactory::FactoryId CalculateSlotOptionForInput(BackendsMap& backends, OutputSlot& slot, - TensorHandleFactoryRegistry& registry) + TensorHandleFactoryRegistry& registry, + bool importEnabled) { Layer& layer = slot.GetOwningLayer(); ARMNN_ASSERT(layer.GetType() == LayerType::Input); @@ -1191,6 +1192,7 @@ ITensorHandleFactory::FactoryId CalculateSlotOptionForInput(BackendsMap& backend for (auto&& connection : slot.GetConnections()) { + const Layer& connectedLayer = connection->GetOwningLayer(); auto toBackend = backends.find(connectedLayer.GetBackendId()); @@ -1208,11 +1210,12 @@ ITensorHandleFactory::FactoryId CalculateSlotOptionForInput(BackendsMap& backend // Input layers use the mem copy workload or import, so the selected factory must // support either the map/unmap API or Import API ITensorHandleFactory* factory = registry.GetFactory(dst); - if (!factory->SupportsMapUnmap() && - !CheckFlag(factory->GetImportFlags(), MemorySource::Malloc)) // Just support cpu mem imports for now + if (importEnabled && factory->GetImportFlags() == 0) + { + continue; + } + else if (!importEnabled && !factory->SupportsMapUnmap()) { - // The current tensor handle factory does not support the map/unmap or import - // strategy, move to the next one continue; } @@ -1257,7 +1260,8 @@ ITensorHandleFactory::FactoryId CalculateSlotOptionForOutput(BackendsMap& backen // when considering all connections. ITensorHandleFactory::FactoryId CalculateSlotOption(BackendsMap& backends, OutputSlot& outputSlot, - TensorHandleFactoryRegistry& registry) + TensorHandleFactoryRegistry& registry, + bool importEnabled) { // First ensure the from backends can support the TensorHandeAPI Layer& layer = outputSlot.GetOwningLayer(); @@ -1268,14 +1272,13 @@ ITensorHandleFactory::FactoryId CalculateSlotOption(BackendsMap& backends, return ITensorHandleFactory::LegacyFactoryId; } - // Connections to Output Layers requires support for map/unmap on the TensorHandle. - bool requiresMapUnmap = false; + bool outputConnection = false; for (auto&& connection : outputSlot.GetConnections()) { const Layer& connectedLayer = connection->GetOwningLayer(); if (connectedLayer.GetType() == LayerType::Output) { - requiresMapUnmap = true; + outputConnection = true; } } @@ -1286,8 +1289,48 @@ ITensorHandleFactory::FactoryId CalculateSlotOption(BackendsMap& backends, std::map<ITensorHandleFactory::FactoryId, int> factoryScores; for (auto&& pref : srcPrefs) { - if (requiresMapUnmap) // Only consider factories that support map/unmap if required + if (importEnabled) + { + ITensorHandleFactory* factory = registry.GetFactory(pref); + if (outputConnection) + { + // Check if this is fallback case + bool fallbackConnection = false; + for (auto&& inputSlot : layer.GetInputSlots()) + { + if (inputSlot.GetConnectedOutputSlot()->GetOwningLayer().GetBackendId() != layer.GetBackendId()) + { + fallbackConnection = true; + } + } + if (fallbackConnection) + { + auto factoryCap = factory->GetCapabilities(&layer, &layer, CapabilityClass::FallbackImportDisabled); + // Cannot use factory import if fallback import is not supported. + if (!factoryCap.empty()) + { + continue; + } + } + else if (factory->GetExportFlags() == 0) + { + continue; + } + } + if (!outputConnection) + { + auto factoryCap = factory->GetCapabilities(&layer, &layer, CapabilityClass::FallbackImportDisabled); + // Cannot use factory import if fallback import is not supported. + if (!factoryCap.empty()) + { + continue; + } + } + + } + else { + // Only consider factories that support map/unmap ITensorHandleFactory* factory = registry.GetFactory(pref); if (!factory->SupportsMapUnmap()) { @@ -1296,6 +1339,7 @@ ITensorHandleFactory::FactoryId CalculateSlotOption(BackendsMap& backends, } } + auto it = factoryScores.find(pref); if (it == factoryScores.end()) { @@ -1417,15 +1461,18 @@ EdgeStrategy CalculateEdgeStrategy(BackendsMap& backends, if (!dstFactory) { continue; } - if ((dstFactory->GetImportFlags() & srcFactory->GetExportFlags()) != 0) { auto srcCapability = srcFactory->GetCapabilities(&layer, &layer, CapabilityClass::PaddingRequired); auto dstCapability = dstFactory->GetCapabilities(&connectedLayer, &connectedLayer, CapabilityClass::PaddingRequired); + auto srcFallback = srcFactory->GetCapabilities(&layer, &layer, CapabilityClass::FallbackImportDisabled); + auto dstFallback = dstFactory->GetCapabilities(&connectedLayer, + &connectedLayer, + CapabilityClass::FallbackImportDisabled); // Do not require memory copy if the source and destination do not require padding. - if (srcCapability.empty() && dstCapability.empty()) + if (srcCapability.empty() && dstCapability.empty() && srcFallback.empty() && dstFallback.empty()) { return EdgeStrategy::ExportToTarget; } @@ -1477,13 +1524,13 @@ OptimizationResult SelectTensorHandleStrategy(Graph& optGraph, switch(layer->GetType()) { case LayerType::Input: - slotOption = CalculateSlotOptionForInput(backends, outputSlot, registry); + slotOption = CalculateSlotOptionForInput(backends, outputSlot, registry, importEnabled); break; case LayerType::Output: slotOption = CalculateSlotOptionForOutput(backends, outputSlot, registry); break; default: - slotOption = CalculateSlotOption(backends, outputSlot, registry); + slotOption = CalculateSlotOption(backends, outputSlot, registry, importEnabled); break; } outputSlot.SetTensorHandleFactory(slotOption); diff --git a/src/armnn/layers/ConcatLayer.cpp b/src/armnn/layers/ConcatLayer.cpp index 3a20e1b3f6..238fdb66d9 100644 --- a/src/armnn/layers/ConcatLayer.cpp +++ b/src/armnn/layers/ConcatLayer.cpp @@ -179,8 +179,7 @@ void ConcatLayer::CreateTensors(const TensorHandleFactoryRegistry& registry, void ConcatLayer::CreateTensorHandles(const TensorHandleFactoryRegistry& registry, const IWorkloadFactory& workloadFactory, - const bool isMemoryManaged, - MemorySource memSource) + const bool isMemoryManaged) { OutputSlot& slot = GetOutputSlot(0); ITensorHandleFactory::FactoryId factoryId = slot.GetTensorHandleFactoryId(); @@ -191,15 +190,7 @@ void ConcatLayer::CreateTensorHandles(const TensorHandleFactoryRegistry& registr } else { - ITensorHandleFactory* handleFactory; - if (memSource == MemorySource::Undefined) - { - handleFactory = registry.GetFactory(factoryId); - } - else - { - handleFactory = registry.GetFactory(factoryId, memSource); - } + ITensorHandleFactory* handleFactory = registry.GetFactory(factoryId); ARMNN_ASSERT(handleFactory); CreateTensors(registry, *handleFactory, isMemoryManaged); } diff --git a/src/armnn/layers/ConcatLayer.hpp b/src/armnn/layers/ConcatLayer.hpp index 6a43318382..4315d66436 100644 --- a/src/armnn/layers/ConcatLayer.hpp +++ b/src/armnn/layers/ConcatLayer.hpp @@ -27,8 +27,7 @@ public: /// @param [in] MemorySource Determine the source of memory e.g Malloc virtual void CreateTensorHandles(const TensorHandleFactoryRegistry& registry, const IWorkloadFactory& factory, - const bool IsMemoryManaged = true, - MemorySource memSource = MemorySource::Undefined) override; + const bool IsMemoryManaged = true) override; /// Creates a dynamically-allocated copy of this layer. /// @param [in] graph The graph into which this layer is being cloned. diff --git a/src/armnn/layers/OutputLayer.hpp b/src/armnn/layers/OutputLayer.hpp index fc6a8aa6b2..408a28a6f3 100644 --- a/src/armnn/layers/OutputLayer.hpp +++ b/src/armnn/layers/OutputLayer.hpp @@ -26,10 +26,9 @@ public: /// @param [in] IsMemoryManaged Determine whether or not to assign a memory manager during creation virtual void CreateTensorHandles(const TensorHandleFactoryRegistry& registry, const IWorkloadFactory& factory, - const bool isMemoryManaged = true, - MemorySource memSource = MemorySource::Undefined) override + const bool isMemoryManaged = true) override { - IgnoreUnused(registry, factory, isMemoryManaged, memSource); + IgnoreUnused(registry, factory, isMemoryManaged); } /// Creates a dynamically-allocated copy of this layer. diff --git a/src/armnn/layers/SplitterLayer.cpp b/src/armnn/layers/SplitterLayer.cpp index adef9aa1a2..5e6622e13a 100644 --- a/src/armnn/layers/SplitterLayer.cpp +++ b/src/armnn/layers/SplitterLayer.cpp @@ -177,8 +177,7 @@ void SplitterLayer::CreateTensors(const TensorHandleFactoryRegistry& registry, void SplitterLayer::CreateTensorHandles(const TensorHandleFactoryRegistry& registry, const IWorkloadFactory& workloadFactory, - const bool isMemoryManaged, - MemorySource memSource) + const bool isMemoryManaged) { OutputSlot& slot = GetOutputSlot(0); ITensorHandleFactory::FactoryId factoryId = slot.GetTensorHandleFactoryId(); @@ -189,15 +188,7 @@ void SplitterLayer::CreateTensorHandles(const TensorHandleFactoryRegistry& regis } else { - ITensorHandleFactory* handleFactory; - if (memSource == MemorySource::Undefined) - { - handleFactory = registry.GetFactory(factoryId); - } - else - { - handleFactory = registry.GetFactory(factoryId, memSource); - } + ITensorHandleFactory* handleFactory = registry.GetFactory(factoryId); ARMNN_ASSERT(handleFactory); CreateTensors(registry, *handleFactory, isMemoryManaged); } diff --git a/src/armnn/layers/SplitterLayer.hpp b/src/armnn/layers/SplitterLayer.hpp index 075b136da9..f90696b1ad 100644 --- a/src/armnn/layers/SplitterLayer.hpp +++ b/src/armnn/layers/SplitterLayer.hpp @@ -26,8 +26,7 @@ public: /// @param [in] IsMemoryManaged Determine whether or not to assign a memory manager during creation virtual void CreateTensorHandles(const TensorHandleFactoryRegistry& registry, const IWorkloadFactory& factory, - const bool IsMemoryManaged = true, - MemorySource memSource = MemorySource::Undefined) override; + const bool IsMemoryManaged = true) override; /// Creates a dynamically-allocated copy of this layer. /// @param [in] graph The graph into which this layer is being cloned. diff --git a/src/armnn/test/TensorHandleStrategyTest.cpp b/src/armnn/test/TensorHandleStrategyTest.cpp index c7aa30f701..47d0666414 100644 --- a/src/armnn/test/TensorHandleStrategyTest.cpp +++ b/src/armnn/test/TensorHandleStrategyTest.cpp @@ -139,7 +139,8 @@ public: { "TestHandleFactoryA1", "TestHandleFactoryA2", - "TestHandleFactoryB1" + "TestHandleFactoryB1", + "TestHandleFactoryD1" }; } @@ -252,7 +253,7 @@ public: std::vector<ITensorHandleFactory::FactoryId> GetHandleFactoryPreferences() const override { return std::vector<ITensorHandleFactory::FactoryId>{ - "TestHandleFactoryD1" + "TestHandleFactoryD1", }; } @@ -279,6 +280,7 @@ BOOST_AUTO_TEST_CASE(RegisterFactories) BOOST_TEST(backendA.GetHandleFactoryPreferences()[0] == "TestHandleFactoryA1"); BOOST_TEST(backendA.GetHandleFactoryPreferences()[1] == "TestHandleFactoryA2"); BOOST_TEST(backendA.GetHandleFactoryPreferences()[2] == "TestHandleFactoryB1"); + BOOST_TEST(backendA.GetHandleFactoryPreferences()[3] == "TestHandleFactoryD1"); TensorHandleFactoryRegistry registry; backendA.RegisterTensorHandleFactories(registry); @@ -351,7 +353,7 @@ BOOST_AUTO_TEST_CASE(TensorHandleSelectionStrategy) OutputSlot& softmaxLayer4Out = softmaxLayer4->GetOutputSlot(0); // Check that the correct factory was selected - BOOST_TEST(inputLayerOut.GetTensorHandleFactoryId() == "TestHandleFactoryA1"); + BOOST_TEST(inputLayerOut.GetTensorHandleFactoryId() == "TestHandleFactoryD1"); BOOST_TEST(softmaxLayer1Out.GetTensorHandleFactoryId() == "TestHandleFactoryB1"); BOOST_TEST(softmaxLayer2Out.GetTensorHandleFactoryId() == "TestHandleFactoryB1"); BOOST_TEST(softmaxLayer3Out.GetTensorHandleFactoryId() == "TestHandleFactoryC1"); |