diff options
author | Derek Lamberti <derek.lamberti@arm.com> | 2019-08-01 15:56:25 +0100 |
---|---|---|
committer | Áron Virginás-Tar <aron.virginas-tar@arm.com> | 2019-08-05 13:51:42 +0000 |
commit | f674aa0fd2809126debdaaeb8067067790d86907 (patch) | |
tree | d86d0261c7a25149217918986043c76d0823ee44 /src/armnn/LoadedNetwork.cpp | |
parent | 737d9ff58b348b11234b6c2363390607d576177d (diff) | |
download | armnn-f674aa0fd2809126debdaaeb8067067790d86907.tar.gz |
IVGCVSW-3277 Mem export/import suppor for Tensors
* Rename MemoryStrategy to EdgeStrategy
* Add MemImportLayer
* Import memory rather than copy when possible
Change-Id: I1d3a9414f2cbe517dc2aae9bbd4fdd92712b38ef
Signed-off-by: Derek Lamberti <derek.lamberti@arm.com>
Diffstat (limited to 'src/armnn/LoadedNetwork.cpp')
-rw-r--r-- | src/armnn/LoadedNetwork.cpp | 54 |
1 files changed, 50 insertions, 4 deletions
diff --git a/src/armnn/LoadedNetwork.cpp b/src/armnn/LoadedNetwork.cpp index 7873e48780..a81528aa65 100644 --- a/src/armnn/LoadedNetwork.cpp +++ b/src/armnn/LoadedNetwork.cpp @@ -14,6 +14,8 @@ #include <backendsCommon/CpuTensorHandle.hpp> #include <backendsCommon/BackendRegistry.hpp> #include <backendsCommon/IMemoryManager.hpp> +#include <backendsCommon/MemCopyWorkload.hpp> +#include <backendsCommon/MemSyncWorkload.hpp> #include <boost/polymorphic_cast.hpp> #include <boost/assert.hpp> @@ -389,8 +391,22 @@ void LoadedNetwork::EnqueueInput(const BindableLayer& layer, ITensorHandle* tens inputQueueDescriptor.m_Outputs.push_back(outputTensorHandle); info.m_OutputTensorInfos.push_back(outputTensorInfo); - const IWorkloadFactory& workloadFactory = GetWorkloadFactory(layer); - auto inputWorkload = workloadFactory.CreateInput(inputQueueDescriptor, info); + MemorySourceFlags importFlags = outputTensorHandle->GetImportFlags(); + if (CheckFlag(importFlags, MemorySource::Malloc)) // Try import the input tensor + { + // This assumes a CPU Tensor handle + void* mem = tensorHandle->Map(false); + if (outputTensorHandle->Import(mem, MemorySource::Malloc)) + { + tensorHandle->Unmap(); + return; // No need for a workload since the import has been done. + } + tensorHandle->Unmap(); + } + + // Create a mem copy workload for input since we could not import + auto inputWorkload = std::make_unique<CopyMemGenericWorkload>(inputQueueDescriptor, info); + BOOST_ASSERT_MSG(inputWorkload, "No input workload created"); m_InputQueue.push_back(move(inputWorkload)); } @@ -422,11 +438,41 @@ void LoadedNetwork::EnqueueOutput(const BindableLayer& layer, ITensorHandle* ten ITensorHandle* inputTensorHandle = outputHandler.GetData(); BOOST_ASSERT_MSG(inputTensorHandle != nullptr, "Data should have been allocated."); + // Try import the output tensor. + // Note: We can only import the output pointer if all of the following hold true: + // a) The imported pointer is aligned sufficiently + // b) The tensor has zero padding + // c) There is only one connection to the OutputSlot and it is to an OutputLayer. + // d) The output pointer is allocated via malloc. (Other types will be supported in a later release) + if (layer.GetInputSlots()[0].GetConnectedOutputSlot()->GetNumConnections() == 1) + { + MemorySourceFlags importFlags = inputTensorHandle->GetImportFlags(); + if (CheckFlag(importFlags, MemorySource::Malloc)) + { + void* mem = tensorHandle->Map(false); + bool importOk = inputTensorHandle->Import(mem, MemorySource::Malloc); + tensorHandle->Unmap(); + + if (importOk) + { + // Insert synchronization workload + MemSyncQueueDescriptor syncDesc; + syncDesc.m_Inputs.push_back(inputTensorHandle); + info.m_InputTensorInfos.push_back(inputTensorInfo); + auto syncWorkload = std::make_unique<SyncMemGenericWorkload>(syncDesc, info); + BOOST_ASSERT_MSG(syncWorkload, "No sync workload created"); + m_OutputQueue.push_back(move(syncWorkload)); + + return; //No need to add the output workload below + } + } + } + + // If we got here then we couldn't import the memory, so add an output workload which performs a memcopy. outputQueueDescriptor.m_Inputs.push_back(inputTensorHandle); info.m_InputTensorInfos.push_back(inputTensorInfo); - const IWorkloadFactory& workloadFactory = GetWorkloadFactory(layer); - auto outputWorkload = workloadFactory.CreateOutput(outputQueueDescriptor, info); + auto outputWorkload = std::make_unique<CopyMemGenericWorkload>(outputQueueDescriptor, info); BOOST_ASSERT_MSG(outputWorkload, "No output workload created"); m_OutputQueue.push_back(move(outputWorkload)); } |