IVGCVSW-3277 Mem export/import suppor for Tensors

* Rename MemoryStrategy to EdgeStrategy * Add MemImportLayer * Import memory rather than copy when possible Change-Id: I1d3a9414f2cbe517dc2aae9bbd4fdd92712b38ef Signed-off-by: Derek Lamberti <derek.lamberti@arm.com>
author: Derek Lamberti <derek.lamberti@arm.com> 2019-08-01 15:56:25 +0100
committer: Áron Virginás-Tar <aron.virginas-tar@arm.com> 2019-08-05 13:51:42 +0000
commit: f674aa0fd2809126debdaaeb8067067790d86907 (patch)
tree: d86d0261c7a25149217918986043c76d0823ee44 /src/armnn
parent: 737d9ff58b348b11234b6c2363390607d576177d (diff)
download: armnn-f674aa0fd2809126debdaaeb8067067790d86907.tar.gz
13 files changed, 376 insertions, 105 deletions
diff --git a/src/armnn/Graph.cpp b/src/armnn/Graph.cpp
index 9e00f5ec01..6212c49eba 100644
--- a/src/armnn/Graph.cpp
+++ b/src/armnn/Graph.cpp
@@ -255,26 +255,31 @@ const Graph& Graph::TopologicalSort() const
     return *this;
 }
 
-void Graph::AddCopyLayers(std::map<BackendId, std::unique_ptr<IBackendInternal>>& backends,
-                          TensorHandleFactoryRegistry& registry)
+void Graph::AddCompatibilityLayers(std::map<BackendId, std::unique_ptr<IBackendInternal>>& backends,
+                                   TensorHandleFactoryRegistry& registry)
 {
-    // Returns true if the given layer could potentially need an intermediate copy layer (depending on its
-    // connections to other layers). At the time of writing, copy layers will be inserted in the following situations:
-    // CPU -> CL (and viceversa)
-    // CPU -> Neon (and viceversa)
-    auto MayNeedCopyLayer = [](const Layer& layer)
+    // Returns true if the given layer could potentially need an intermediate copy/import layer (depending on its
+    // connections to other layers).
+    auto MayNeedCompatibilityLayer = [](const Layer& layer)
     {
         // All layers should have been associated with a valid compute device at this point.
         BOOST_ASSERT(layer.GetBackendId() != Compute::Undefined);
-        // Does not need another copy layer if a copy layer is already present.
-        return layer.GetType() != LayerType::MemCopy;
+        // Does not need another compatibility layer if a copy or import layer is already present.
+        return layer.GetType() != LayerType::MemCopy &&
+               layer.GetType() != LayerType::MemImport;
     };
 
-    ForEachLayer([this, &backends, &registry, MayNeedCopyLayer](Layer* srcLayer)
+    auto IsCompatibilityStrategy = [](EdgeStrategy strategy)
+    {
+        return strategy == EdgeStrategy::CopyToTarget ||
+               strategy == EdgeStrategy::ExportToTarget;
+    };
+
+    ForEachLayer([this, &backends, &registry, MayNeedCompatibilityLayer, IsCompatibilityStrategy](Layer* srcLayer)
     {
         BOOST_ASSERT(srcLayer);
 
-        if (!MayNeedCopyLayer(*srcLayer))
+        if (!MayNeedCompatibilityLayer(*srcLayer))
         {
             // The current layer does not need copy layers, move to the next one
             return;
@@ -285,33 +290,43 @@ void Graph::AddCopyLayers(std::map<BackendId, std::unique_ptr<IBackendInternal>>
         {
             OutputSlot& srcOutputSlot = srcLayer->GetOutputSlot(srcOutputIndex);
             const std::vector<InputSlot*> srcConnections = srcOutputSlot.GetConnections();
-            const std::vector<MemoryStrategy> srcMemoryStrategies = srcOutputSlot.GetMemoryStrategies();
+            const std::vector<EdgeStrategy> srcEdgeStrategies = srcOutputSlot.GetEdgeStrategies();
             for (unsigned int srcConnectionIndex = 0; srcConnectionIndex < srcConnections.size(); srcConnectionIndex++)
             {
                 InputSlot* dstInputSlot = srcConnections[srcConnectionIndex];
                 BOOST_ASSERT(dstInputSlot);
 
-                MemoryStrategy strategy = srcMemoryStrategies[srcConnectionIndex];
-                BOOST_ASSERT_MSG(strategy != MemoryStrategy::Undefined,
+                EdgeStrategy strategy = srcEdgeStrategies[srcConnectionIndex];
+                BOOST_ASSERT_MSG(strategy != EdgeStrategy::Undefined,
                                  "Undefined memory strategy found while adding copy layers for compatibility");
 
                 const Layer& dstLayer = dstInputSlot->GetOwningLayer();
-                if (MayNeedCopyLayer(dstLayer) &&
-                    strategy == MemoryStrategy::CopyToTarget)
+                if (MayNeedCompatibilityLayer(dstLayer) &&
+                    IsCompatibilityStrategy(strategy))
                 {
                     // A copy layer is needed in between the source and destination layers.
                     // Record the operation rather than attempting to modify the graph as we go.
                     // (invalidating iterators)
-                    const std::string copyLayerName = boost::str(boost::format("[ %1% (%2%) -> %3% (%4%) ]")
+                    const std::string compLayerName = boost::str(boost::format("[ %1% (%2%) -> %3% (%4%) ]")
                                                                  % srcLayer->GetName()
                                                                  % srcOutputIndex
                                                                  % dstLayer.GetName()
                                                                  % dstInputSlot->GetSlotIndex());
 
-                    MemCopyLayer* const copyLayer = InsertNewLayer<MemCopyLayer>(*dstInputSlot, copyLayerName.c_str());
-                    copyLayer->SetBackendId(dstLayer.GetBackendId());
+                    Layer* compLayer = nullptr;
+                    if (strategy == EdgeStrategy::CopyToTarget)
+                    {
+                        compLayer = InsertNewLayer<MemCopyLayer>(*dstInputSlot, compLayerName.c_str());
+                    }
+                    else
+                    {
+                        BOOST_ASSERT_MSG(strategy == EdgeStrategy::ExportToTarget, "Invalid edge strategy found.");
+                        compLayer = InsertNewLayer<MemImportLayer>(*dstInputSlot, compLayerName.c_str());
+                    }
+
+                    compLayer->SetBackendId(dstLayer.GetBackendId());
 
-                    OutputSlot& copyOutputSlot = copyLayer->GetOutputSlot(0);
+                    OutputSlot& compOutputSlot = compLayer->GetOutputSlot(0);
                     auto backendIt = backends.find(dstLayer.GetBackendId());
                     if (backendIt != backends.end() &&
                         backendIt->second &&
@@ -325,34 +340,40 @@ void Graph::AddCopyLayers(std::map<BackendId, std::unique_ptr<IBackendInternal>>
                         for (auto preference : tensorHandleFactoryIds)
                         {
                             auto factory = registry.GetFactory(preference);
-                            if (factory && factory->SupportsMapUnmap())
+                            if (factory)
                             {
-                                copyOutputSlot.SetTensorHandleFactory(preference);
-                                found = true;
-                                break;
+                                auto srcPref = srcOutputSlot.GetTensorHandleFactoryId();
+                                auto srcFactory = registry.GetFactory(srcPref);
+                                bool canExportImport = (factory->GetImportFlags() & srcFactory->GetExportFlags()) != 0;
+                                if (factory->SupportsMapUnmap() || canExportImport)
+                                {
+                                    compOutputSlot.SetTensorHandleFactory(preference);
+                                    found = true;
+                                    break;
+                                }
                             }
                         }
 
-                        BOOST_ASSERT_MSG(found, "Could not find a mappable TensorHandle for copy layer");
+                        BOOST_ASSERT_MSG(found, "Could not find a valid TensorHandle for compatibilty layer");
                     }
                     else
                     {
-                        copyOutputSlot.SetTensorHandleFactory(ITensorHandleFactory::LegacyFactoryId);
+                        compOutputSlot.SetTensorHandleFactory(ITensorHandleFactory::LegacyFactoryId);
                     }
 
-                    // The output strategy of a copy layer is always DirectCompatibility.
-                    copyOutputSlot.SetMemoryStrategy(0, MemoryStrategy::DirectCompatibility);
+                    // The output strategy of a compatibility layer is always DirectCompatibility.
+                    compOutputSlot.SetEdgeStrategy(0, EdgeStrategy::DirectCompatibility);
 
                     // Recalculate the connection index on the previous layer as we have just inserted into it.
                     const std::vector<InputSlot*>& newSourceConnections = srcOutputSlot.GetConnections();
                     long newSrcConnectionIndex = std::distance(newSourceConnections.begin(),
                                                                std::find(newSourceConnections.begin(),
                                                                          newSourceConnections.end(),
-                                                                         &copyLayer->GetInputSlot(0)));
+                                                                         &compLayer->GetInputSlot(0)));
 
-                    // The input strategy of a copy layer is always DirectCompatibilty.
-                    srcOutputSlot.SetMemoryStrategy(boost::numeric_cast<unsigned int>(newSrcConnectionIndex),
-                                                    MemoryStrategy::DirectCompatibility);
+                    // The input strategy of a compatibility layer is always DirectCompatibilty.
+                    srcOutputSlot.SetEdgeStrategy(boost::numeric_cast<unsigned int>(newSrcConnectionIndex),
+                                                    EdgeStrategy::DirectCompatibility);
                 }
             }
         }
diff --git a/src/armnn/Graph.hpp b/src/armnn/Graph.hpp
index f8113375c9..c65f12bbc3 100644
--- a/src/armnn/Graph.hpp
+++ b/src/armnn/Graph.hpp
@@ -191,8 +191,8 @@ public:
 
     /// Modifies the graph in-place, removing edges connecting layers using different compute devices,
     /// and relinking them via an intermediary copy layers.
-    void AddCopyLayers(std::map<BackendId, std::unique_ptr<class IBackendInternal>>& backends,
-                       TensorHandleFactoryRegistry& registry);
+    void AddCompatibilityLayers(std::map<BackendId, std::unique_ptr<class IBackendInternal>>& backends,
+                                TensorHandleFactoryRegistry& registry);
 
     /// Substitutes the given sub-graph with either a new layer or a new sub-graph.
     /// In either case, the given layer or all the layers in the given sub-graph must belong to this graph.
diff --git a/src/armnn/InternalTypes.hpp b/src/armnn/InternalTypes.hpp
index b0fea7c8c2..7a0f9a1cb0 100644
--- a/src/armnn/InternalTypes.hpp
+++ b/src/armnn/InternalTypes.hpp
@@ -40,6 +40,7 @@ enum class LayerType
     Maximum,
     Mean,
     MemCopy,
+    MemImport,
     Merge,
     Minimum,
     Multiplication,
diff --git a/src/armnn/Layer.cpp b/src/armnn/Layer.cpp
index a287220702..528020bab5 100644
--- a/src/armnn/Layer.cpp
+++ b/src/armnn/Layer.cpp
@@ -31,7 +31,7 @@ void InputSlot::Insert(Layer& layer)
         // Connects inserted layer to parent.
         BOOST_ASSERT(layer.GetNumInputSlots() == 1);
         int idx = prevSlot->Connect(layer.GetInputSlot(0));
-        prevSlot->SetMemoryStrategy(boost::numeric_cast<unsigned int>(idx), MemoryStrategy::Undefined);
+        prevSlot->SetEdgeStrategy(boost::numeric_cast<unsigned int>(idx), EdgeStrategy::Undefined);
 
         // Sets tensor info for inserted layer.
         const TensorInfo& tensorInfo = prevSlot->GetTensorInfo();
@@ -40,7 +40,7 @@ void InputSlot::Insert(Layer& layer)
 
     // Connects inserted layer to this.
     layer.GetOutputSlot(0).Connect(*this);
-    layer.GetOutputSlot(0).SetMemoryStrategy(0, MemoryStrategy::Undefined);
+    layer.GetOutputSlot(0).SetEdgeStrategy(0, EdgeStrategy::Undefined);
 }
 
 const InputSlot* OutputSlot::GetConnection(unsigned int index) const
@@ -80,7 +80,7 @@ int OutputSlot::Connect(InputSlot& destination)
 {
     destination.SetConnection(this);
     m_Connections.push_back(&destination);
-    m_MemoryStrategies.push_back(MemoryStrategy::Undefined);
+    m_EdgeStrategies.push_back(EdgeStrategy::Undefined);
     return boost::numeric_cast<int>(m_Connections.size() - 1);
 }
 
@@ -97,7 +97,7 @@ void OutputSlot::Disconnect(InputSlot& slot)
     auto idx = std::distance(m_Connections.begin(), it);
     m_Connections.erase(std::remove(m_Connections.begin(), m_Connections.end(), &slot), m_Connections.end());
 
-    m_MemoryStrategies.erase(m_MemoryStrategies.begin() + idx);
+    m_EdgeStrategies.erase(m_EdgeStrategies.begin() + idx);
 }
 
 void OutputSlot::DisconnectAll()
@@ -113,7 +113,7 @@ void OutputSlot::MoveAllConnections(OutputSlot& destination)
 {
     while (GetNumConnections() > 0)
     {
-        BOOST_ASSERT_MSG(m_MemoryStrategies[0] == MemoryStrategy::Undefined,
+        BOOST_ASSERT_MSG(m_EdgeStrategies[0] == EdgeStrategy::Undefined,
             "Cannot move connections once memory strategies have be established.");
 
         InputSlot& connection = *GetConnection(0);
@@ -174,14 +174,14 @@ ITensorHandleFactory::FactoryId OutputSlot::GetTensorHandleFactoryId() const
     return m_TensorHandleFactoryId;
 }
 
-void OutputSlot::SetMemoryStrategy(unsigned int connectionIndex, MemoryStrategy strategy)
+void OutputSlot::SetEdgeStrategy(unsigned int connectionIndex, EdgeStrategy strategy)
 {
-    m_MemoryStrategies[connectionIndex] = strategy;
+    m_EdgeStrategies[connectionIndex] = strategy;
 }
 
-MemoryStrategy OutputSlot::GetMemoryStrategyForConnection(unsigned int connectionIdx) const
+EdgeStrategy OutputSlot::GetEdgeStrategyForConnection(unsigned int connectionIdx) const
 {
-    return m_MemoryStrategies[connectionIdx];
+    return m_EdgeStrategies[connectionIdx];
 }
 
 namespace {
diff --git a/src/armnn/Layer.hpp b/src/armnn/Layer.hpp
index b90d040475..5944ea83ed 100644
--- a/src/armnn/Layer.hpp
+++ b/src/armnn/Layer.hpp
@@ -123,7 +123,7 @@ public:
     void Disconnect(InputSlot& slot);
 
     const std::vector<InputSlot*>& GetConnections() const { return m_Connections; }
-    const std::vector<MemoryStrategy>& GetMemoryStrategies() const { return m_MemoryStrategies; }
+    const std::vector<EdgeStrategy>& GetEdgeStrategies() const { return m_EdgeStrategies; }
 
     bool ValidateTensorShape(const TensorShape& shape) const;
 
@@ -160,8 +160,8 @@ public:
     void SetTensorHandleFactory(const ITensorHandleFactory::FactoryId& id);
     ITensorHandleFactory::FactoryId GetTensorHandleFactoryId() const;
 
-    void SetMemoryStrategy(unsigned int connectionIndex, MemoryStrategy strategy);
-    MemoryStrategy GetMemoryStrategyForConnection(unsigned int connectionIdx) const;
+    void SetEdgeStrategy(unsigned int connectionIndex, EdgeStrategy strategy);
+    EdgeStrategy GetEdgeStrategyForConnection(unsigned int connectionIdx) const;
 
 private:
     void ValidateConnectionIndex(unsigned int index) const;
@@ -171,7 +171,7 @@ private:
     std::vector<InputSlot*> m_Connections;
 
     ITensorHandleFactory::FactoryId m_TensorHandleFactoryId;
-    std::vector<MemoryStrategy> m_MemoryStrategies;
+    std::vector<EdgeStrategy> m_EdgeStrategies;
 };
 
 // InputSlot inlines that need OutputSlot declaration.
diff --git a/src/armnn/LayerSupport.cpp b/src/armnn/LayerSupport.cpp
index 047c80a8c4..5a756b9544 100644
--- a/src/armnn/LayerSupport.cpp
+++ b/src/armnn/LayerSupport.cpp
@@ -371,6 +371,15 @@ bool IsMemCopySupported(const BackendId &backend,
     FORWARD_LAYER_SUPPORT_FUNC(backend, IsMemCopySupported, input, output);
 }
 
+bool IsMemImportSupported(const BackendId &backend,
+                          const TensorInfo &input,
+                          const TensorInfo &output,
+                          char *reasonIfUnsupported,
+                          size_t reasonIfUnsupportedMaxLength)
+{
+    FORWARD_LAYER_SUPPORT_FUNC(backend, IsMemImportSupported, input, output);
+}
+
 bool IsMergeSupported(const BackendId& backend,
                       const TensorInfo& input0,
                       const TensorInfo& input1,
diff --git a/src/armnn/LayersFwd.hpp b/src/armnn/LayersFwd.hpp
index 2c8d5d2e07..cadcd49acb 100644
--- a/src/armnn/LayersFwd.hpp
+++ b/src/armnn/LayersFwd.hpp
@@ -32,6 +32,7 @@
 #include "layers/MaximumLayer.hpp"
 #include "layers/MeanLayer.hpp"
 #include "layers/MemCopyLayer.hpp"
+#include "layers/MemImportLayer.hpp"
 #include "layers/MergeLayer.hpp"
 #include "layers/MinimumLayer.hpp"
 #include "layers/MultiplicationLayer.hpp"
@@ -110,6 +111,7 @@ DECLARE_LAYER(Lstm)
 DECLARE_LAYER(Maximum)
 DECLARE_LAYER(Mean)
 DECLARE_LAYER(MemCopy)
+DECLARE_LAYER(MemImport)
 DECLARE_LAYER(Merge)
 DECLARE_LAYER(Minimum)
 DECLARE_LAYER(Multiplication)
diff --git a/src/armnn/LoadedNetwork.cpp b/src/armnn/LoadedNetwork.cpp
index 7873e48780..a81528aa65 100644
--- a/src/armnn/LoadedNetwork.cpp
+++ b/src/armnn/LoadedNetwork.cpp
@@ -14,6 +14,8 @@
 #include <backendsCommon/CpuTensorHandle.hpp>
 #include <backendsCommon/BackendRegistry.hpp>
 #include <backendsCommon/IMemoryManager.hpp>
+#include <backendsCommon/MemCopyWorkload.hpp>
+#include <backendsCommon/MemSyncWorkload.hpp>
 
 #include <boost/polymorphic_cast.hpp>
 #include <boost/assert.hpp>
@@ -389,8 +391,22 @@ void LoadedNetwork::EnqueueInput(const BindableLayer& layer, ITensorHandle* tens
     inputQueueDescriptor.m_Outputs.push_back(outputTensorHandle);
     info.m_OutputTensorInfos.push_back(outputTensorInfo);
 
-    const IWorkloadFactory& workloadFactory = GetWorkloadFactory(layer);
-    auto inputWorkload = workloadFactory.CreateInput(inputQueueDescriptor, info);
+    MemorySourceFlags importFlags = outputTensorHandle->GetImportFlags();
+    if (CheckFlag(importFlags, MemorySource::Malloc))  // Try import the input tensor
+    {
+        // This assumes a CPU Tensor handle
+        void* mem = tensorHandle->Map(false);
+        if (outputTensorHandle->Import(mem, MemorySource::Malloc))
+        {
+            tensorHandle->Unmap();
+            return; // No need for a workload since the import has been done.
+        }
+        tensorHandle->Unmap();
+    }
+
+    // Create a mem copy workload for input since we could not import
+    auto inputWorkload = std::make_unique<CopyMemGenericWorkload>(inputQueueDescriptor, info);
+
     BOOST_ASSERT_MSG(inputWorkload, "No input workload created");
     m_InputQueue.push_back(move(inputWorkload));
 }
@@ -422,11 +438,41 @@ void LoadedNetwork::EnqueueOutput(const BindableLayer& layer, ITensorHandle* ten
     ITensorHandle* inputTensorHandle = outputHandler.GetData();
     BOOST_ASSERT_MSG(inputTensorHandle != nullptr, "Data should have been allocated.");
 
+    // Try import the output tensor.
+    // Note: We can only import the output pointer if all of the following  hold true:
+    // a) The imported pointer is aligned sufficiently
+    // b) The tensor has zero padding
+    // c) There is only one connection to the OutputSlot and it is to an OutputLayer.
+    // d) The output pointer is allocated via malloc. (Other types will be supported in a later release)
+    if (layer.GetInputSlots()[0].GetConnectedOutputSlot()->GetNumConnections() == 1)
+    {
+        MemorySourceFlags importFlags = inputTensorHandle->GetImportFlags();
+        if (CheckFlag(importFlags, MemorySource::Malloc))
+        {
+            void* mem = tensorHandle->Map(false);
+            bool importOk = inputTensorHandle->Import(mem, MemorySource::Malloc);
+            tensorHandle->Unmap();
+
+            if (importOk)
+            {
+                // Insert synchronization workload
+                MemSyncQueueDescriptor syncDesc;
+                syncDesc.m_Inputs.push_back(inputTensorHandle);
+                info.m_InputTensorInfos.push_back(inputTensorInfo);
+                auto syncWorkload = std::make_unique<SyncMemGenericWorkload>(syncDesc, info);
+                BOOST_ASSERT_MSG(syncWorkload, "No sync workload created");
+                m_OutputQueue.push_back(move(syncWorkload));
+
+                return; //No need to add the output workload below
+            }
+        }
+    }
+
+    // If we got here then we couldn't import the memory, so add an output workload which performs a memcopy.
     outputQueueDescriptor.m_Inputs.push_back(inputTensorHandle);
     info.m_InputTensorInfos.push_back(inputTensorInfo);
 
-    const IWorkloadFactory& workloadFactory = GetWorkloadFactory(layer);
-    auto outputWorkload = workloadFactory.CreateOutput(outputQueueDescriptor, info);
+    auto outputWorkload = std::make_unique<CopyMemGenericWorkload>(outputQueueDescriptor, info);
     BOOST_ASSERT_MSG(outputWorkload, "No output workload created");
     m_OutputQueue.push_back(move(outputWorkload));
 }
diff --git a/src/armnn/Network.cpp b/src/armnn/Network.cpp
index 2195c71735..b30cd9f3c2 100644
--- a/src/armnn/Network.cpp
+++ b/src/armnn/Network.cpp
@@ -441,7 +441,7 @@ bool RequiresCopy(ITensorHandleFactory::FactoryId src,
         ITensorHandleFactory* srcFactory = registry.GetFactory(src);
         ITensorHandleFactory* dstFactory = registry.GetFactory(dst);
 
-        if (srcFactory->SupportsExport() && dstFactory->SupportsImport())
+        if ((srcFactory->GetExportFlags() & dstFactory->GetImportFlags()) != 0)
         {
             return false;
         }
@@ -493,11 +493,14 @@ ITensorHandleFactory::FactoryId CalculateSlotOptionForInput(BackendsMap& backend
         auto dstPrefs = toBackend->second.get()->GetHandleFactoryPreferences();
         for (auto&& dst : dstPrefs)
         {
-            // Input layers use the mem copy workload, so the selected factory must support map/unmap API
+            // Input layers use the mem copy workload or import, so the selected factory must
+            // support either the map/unmap API or Import API
             ITensorHandleFactory* factory = registry.GetFactory(dst);
-            if (!factory->SupportsMapUnmap())
+            if (!factory->SupportsMapUnmap() &&
+                !CheckFlag(factory->GetImportFlags(), MemorySource::Malloc)) // Just support cpu mem imports for now
             {
-                // The current tensor handle factory does not support the map/unmap strategy, move to the next one
+                // The current tensor handle factory does not support the map/unmap or import
+                // strategy, move to the next one
                 continue;
             }
 
@@ -648,11 +651,11 @@ ITensorHandleFactory::FactoryId CalculateSlotOption(BackendsMap& backends,
     return ITensorHandleFactory::LegacyFactoryId;
 }
 
-MemoryStrategy CalculateStrategy(BackendsMap& backends,
-                                 ITensorHandleFactory::FactoryId srcFactoryId,
-                                 const Layer& layer,
-                                 const Layer& connectedLayer,
-                                 TensorHandleFactoryRegistry& registry)
+EdgeStrategy CalculateEdgeStrategy(BackendsMap& backends,
+                                   ITensorHandleFactory::FactoryId srcFactoryId,
+                                   const Layer& layer,
+                                   const Layer& connectedLayer,
+                                   TensorHandleFactoryRegistry& registry)
 {
     auto toBackend = backends.find(connectedLayer.GetBackendId());
     BOOST_ASSERT_MSG(toBackend != backends.end(), "Backend id not found for the connected layer");
@@ -664,19 +667,19 @@ MemoryStrategy CalculateStrategy(BackendsMap& backends,
     {
         if (layer.GetBackendId() != connectedLayer.GetBackendId())
         {
-            return MemoryStrategy::CopyToTarget;
+            return EdgeStrategy::CopyToTarget;
         }
         else
         {
-            return MemoryStrategy::DirectCompatibility;
+            return EdgeStrategy::DirectCompatibility;
         }
     }
 
     // TensorHandleFactory API present, so perform more sophisticated strategies.
-    // Dst Output layers don't require copy because they use map/unmap
+    // Dst Output layers don't require copy because they use import or map/unmap
     if (connectedLayer.GetType() == LayerType::Output)
     {
-        return MemoryStrategy::DirectCompatibility;
+        return EdgeStrategy::DirectCompatibility;
     }
 
     // Search for direct match in prefs
@@ -684,20 +687,20 @@ MemoryStrategy CalculateStrategy(BackendsMap& backends,
     {
         if (pref == srcFactoryId)
         {
-            return MemoryStrategy::DirectCompatibility;
+            return EdgeStrategy::DirectCompatibility;
         }
     }
 
     // Search for export/import options
     ITensorHandleFactory* srcFactory = registry.GetFactory(srcFactoryId);
-    if (srcFactory->SupportsExport())
+    if (srcFactory->GetExportFlags() != 0)
     {
         for (auto&& pref : dstPrefs)
         {
             ITensorHandleFactory* dstFactory = registry.GetFactory(pref);
-            if (dstFactory->SupportsImport())
+            if ((dstFactory->GetImportFlags() & srcFactory->GetExportFlags()) != 0)
             {
-                return MemoryStrategy::ExportToTarget;
+                return EdgeStrategy::ExportToTarget;
             }
         }
     }
@@ -710,12 +713,12 @@ MemoryStrategy CalculateStrategy(BackendsMap& backends,
             ITensorHandleFactory* dstFactory = registry.GetFactory(pref);
             if (dstFactory->SupportsMapUnmap())
             {
-                return MemoryStrategy::CopyToTarget;
+                return EdgeStrategy::CopyToTarget;
             }
         }
     }
 
-    return MemoryStrategy::Undefined;
+    return EdgeStrategy::Undefined;
 }
 
 // Select the TensorHandleFactories and the corresponding memory strategy
@@ -756,15 +759,15 @@ OptimizationResult SelectTensorHandleStrategy(Graph& optGraph,
             }
             outputSlot.SetTensorHandleFactory(slotOption);
 
-            // Now determine the "best" memory strategy for each connection given the slotOption.
+            // Now determine the "best" edge strategy for each connection given the slotOption.
             unsigned int connectionIdx = 0;
             for (auto&& connection : outputSlot.GetConnections())
             {
                 const Layer& connectedLayer = connection->GetOwningLayer();
 
-                MemoryStrategy strategy = CalculateStrategy(backends, slotOption, *layer, connectedLayer, registry);
+                EdgeStrategy strategy = CalculateEdgeStrategy(backends, slotOption, *layer, connectedLayer, registry);
 
-                if (strategy == MemoryStrategy::Undefined)
+                if (strategy == EdgeStrategy::Undefined)
                 {
                     result.m_Error = true;
                     if (errMessages)
@@ -775,7 +778,7 @@ OptimizationResult SelectTensorHandleStrategy(Graph& optGraph,
                     return;
                 }
 
-                outputSlot.SetMemoryStrategy(connectionIdx, strategy);
+                outputSlot.SetEdgeStrategy(connectionIdx, strategy);
 
                 connectionIdx++;
             }
@@ -887,7 +890,7 @@ IOptimizedNetworkPtr Optimize(const INetwork& inNetwork,
     }
 
     // Based on the tensor handle strategy determined above, insert copy layers where required.
-    optGraph.AddCopyLayers(backends, tensorHandleFactoryRegistry);
+    optGraph.AddCompatibilityLayers(backends, tensorHandleFactoryRegistry);
 
     // Convert constants
     Optimizer::Pass(optGraph, MakeOptimizations(ConvertConstantsFloatToHalf()));
diff --git a/src/armnn/layers/MemImportLayer.cpp b/src/armnn/layers/MemImportLayer.cpp
new file mode 100644
index 0000000000..7a922f5a7c
--- /dev/null
+++ b/src/armnn/layers/MemImportLayer.cpp
@@ -0,0 +1,54 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+#include "MemImportLayer.hpp"
+
+#include "LayerCloneBase.hpp"
+
+#include <armnn/TypesUtils.hpp>
+#include <backendsCommon/WorkloadData.hpp>
+#include <backendsCommon/WorkloadFactory.hpp>
+#include <backendsCommon/MemImportWorkload.hpp>
+
+namespace armnn
+{
+
+MemImportLayer::MemImportLayer(const char* name)
+    : Layer(1, 1, LayerType::MemImport, name)
+{
+}
+
+MemImportLayer* MemImportLayer::Clone(Graph& graph) const
+{
+    return CloneBase<MemImportLayer>(graph, GetName());
+}
+
+std::unique_ptr<IWorkload> MemImportLayer::CreateWorkload(const Graph& graph, const IWorkloadFactory& factory) const
+{
+    MemImportQueueDescriptor descriptor;
+
+    //This is different from other workloads. Does not get created by the workload factory.
+    return std::make_unique<ImportMemGenericWorkload>(descriptor, PrepInfoAndDesc(descriptor, graph));
+}
+
+void MemImportLayer::ValidateTensorShapesFromInputs()
+{
+    VerifyLayerConnections(1, CHECK_LOCATION());
+
+    auto inferredShapes = InferOutputShapes({ GetInputSlot(0).GetConnection()->GetTensorInfo().GetShape() });
+
+    BOOST_ASSERT(inferredShapes.size() == 1);
+
+    ConditionalThrowIfNotEqual<LayerValidationException>(
+        "MemImportLayer: TensorShape set on OutputSlot[0] does not match the inferred shape.",
+        GetOutputSlot(0).GetTensorInfo().GetShape(),
+        inferredShapes[0]);
+}
+
+void MemImportLayer::Accept(ILayerVisitor& visitor) const
+{
+    throw armnn::Exception("MemImportLayer should not appear in an input graph");
+}
+
+} // namespace armnn
diff --git a/src/armnn/layers/MemImportLayer.hpp b/src/armnn/layers/MemImportLayer.hpp
new file mode 100644
index 0000000000..2d02c1fb41
--- /dev/null
+++ b/src/armnn/layers/MemImportLayer.hpp
@@ -0,0 +1,42 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+#pragma once
+
+#include <Layer.hpp>
+
+namespace armnn
+{
+
+/// This layer represents a memory import operation.
+class MemImportLayer : public Layer
+{
+public:
+    /// Makes a workload for the MemImport type.
+    /// @param [in] graph The graph where this layer can be found.
+    /// @param [in] factory The workload factory which will create the workload.
+    /// @return A pointer to the created workload, or nullptr if not created.
+    virtual std::unique_ptr<IWorkload>CreateWorkload(const Graph& graph,
+                                                     const IWorkloadFactory& factory) const override;
+
+    /// Creates a dynamically-allocated copy of this layer.
+    /// @param [in] graph The graph into which this layer is being cloned.
+    MemImportLayer* Clone(Graph& graph) const override;
+
+    /// Check if the input tensor shape(s)
+    /// will lead to a valid configuration of @ref MemImportLayer.
+    void ValidateTensorShapesFromInputs() override;
+
+    void Accept(ILayerVisitor& visitor) const override;
+
+protected:
+    /// Constructor to create a MemImportLayer.
+    /// @param [in] name Optional name for the layer.
+    MemImportLayer(const char* name);
+
+    /// Default destructor
+    ~MemImportLayer() = default;
+};
+
+} // namespace
diff --git a/src/armnn/test/GraphTests.cpp b/src/armnn/test/GraphTests.cpp
index 7950ec49f4..7bd6aac98b 100644
--- a/src/armnn/test/GraphTests.cpp
+++ b/src/armnn/test/GraphTests.cpp
@@ -495,13 +495,13 @@ struct CopyLayersFixture
 
         // Set the memory strategies - for this test should be DirectCompatibility for same backends,
         // and CopyToTarget for different backends
-        inputLayer->GetOutputSlot(0).SetMemoryStrategy(0, MemoryStrategy::DirectCompatibility);
-        convLayer1->GetOutputSlot(0).SetMemoryStrategy(0, MemoryStrategy::CopyToTarget);
-        convLayer1->GetOutputSlot(0).SetMemoryStrategy(1, MemoryStrategy::DirectCompatibility);
-        convLayer2->GetOutputSlot(0).SetMemoryStrategy(0, MemoryStrategy::CopyToTarget);
-        concatLayer->GetOutputSlot(0).SetMemoryStrategy(0, MemoryStrategy::DirectCompatibility);
-        actLayer->GetOutputSlot(0).SetMemoryStrategy(0, MemoryStrategy::DirectCompatibility);
-        softmaxLayer->GetOutputSlot(0).SetMemoryStrategy(0, MemoryStrategy::CopyToTarget);
+        inputLayer->GetOutputSlot(0).SetEdgeStrategy(0, EdgeStrategy::DirectCompatibility);
+        convLayer1->GetOutputSlot(0).SetEdgeStrategy(0, EdgeStrategy::CopyToTarget);
+        convLayer1->GetOutputSlot(0).SetEdgeStrategy(1, EdgeStrategy::DirectCompatibility);
+        convLayer2->GetOutputSlot(0).SetEdgeStrategy(0, EdgeStrategy::CopyToTarget);
+        concatLayer->GetOutputSlot(0).SetEdgeStrategy(0, EdgeStrategy::DirectCompatibility);
+        actLayer->GetOutputSlot(0).SetEdgeStrategy(0, EdgeStrategy::DirectCompatibility);
+        softmaxLayer->GetOutputSlot(0).SetEdgeStrategy(0, EdgeStrategy::CopyToTarget);
     }
 
     armnn::TensorInfo m_TensorDesc;
@@ -529,7 +529,7 @@ BOOST_FIXTURE_TEST_CASE(AddCopyLayers, CopyLayersFixture)
 {
     InitialiseTestGraph();
     const armnn::Graph origGraph(m_Graph);
-    m_Graph.AddCopyLayers(m_Backends, m_FactoryRegistry);
+    m_Graph.AddCompatibilityLayers(m_Backends, m_FactoryRegistry);
 
     TestGraphAfterAddingCopyLayers(m_Graph, origGraph);
 }
@@ -537,13 +537,13 @@ BOOST_FIXTURE_TEST_CASE(AddCopyLayers, CopyLayersFixture)
 BOOST_FIXTURE_TEST_CASE(AddCopyLayersSeveralTimes, CopyLayersFixture)
 {
     InitialiseTestGraph();
-    m_Graph.AddCopyLayers(m_Backends, m_FactoryRegistry);
+    m_Graph.AddCompatibilityLayers(m_Backends, m_FactoryRegistry);
 
-    // Calling AddCopyLayers() several times should not change the connections.
+    // Calling AddCompatibilityLayers() several times should not change the connections.
     const std::vector<Edge> edges = GetEdgeList(m_Graph);
     for (int i = 0; i < 4; ++i)
     {
-        m_Graph.AddCopyLayers(m_Backends, m_FactoryRegistry);
+        m_Graph.AddCompatibilityLayers(m_Backends, m_FactoryRegistry);
         const std::vector<Edge> otherEdges = GetEdgeList(m_Graph);
         BOOST_TEST((edges == otherEdges));
     }
@@ -571,18 +571,18 @@ BOOST_FIXTURE_TEST_CASE(CopyLayersAddedBetweenSameLayersHaveDifferentNames, Copy
     splitterLayer->GetOutputSlot(1).Connect(additionLayer->GetInputSlot(1));
     additionLayer->GetOutputSlot(0).Connect(outputLayer->GetInputSlot(0));
 
-    inputLayer->GetOutputSlot(0).SetMemoryStrategy(0, armnn::MemoryStrategy::DirectCompatibility);
-    splitterLayer->GetOutputSlot(0).SetMemoryStrategy(0, armnn::MemoryStrategy::CopyToTarget);
-    splitterLayer->GetOutputSlot(1).SetMemoryStrategy(0, armnn::MemoryStrategy::CopyToTarget);
-    additionLayer->GetOutputSlot(0).SetMemoryStrategy(0, armnn::MemoryStrategy::DirectCompatibility);
+    inputLayer->GetOutputSlot(0).SetEdgeStrategy(0, armnn::EdgeStrategy::DirectCompatibility);
+    splitterLayer->GetOutputSlot(0).SetEdgeStrategy(0, armnn::EdgeStrategy::CopyToTarget);
+    splitterLayer->GetOutputSlot(1).SetEdgeStrategy(0, armnn::EdgeStrategy::CopyToTarget);
+    additionLayer->GetOutputSlot(0).SetEdgeStrategy(0, armnn::EdgeStrategy::DirectCompatibility);
 
-    graph.AddCopyLayers(m_Backends, m_FactoryRegistry);
+    graph.AddCompatibilityLayers(m_Backends, m_FactoryRegistry);
 
     std::vector<Edge> edges = GetEdgeList(graph);
     BOOST_CHECK(edges.size() == 6u);
     std::sort(edges.begin(), edges.end());
     auto last = std::unique(edges.begin(), edges.end());
-    BOOST_CHECK_MESSAGE(last == edges.end(), "Found duplicated edges after AddCopyLayers()");
+    BOOST_CHECK_MESSAGE(last == edges.end(), "Found duplicated edges after AddCompatibilityLayers()");
 }
 
 BOOST_AUTO_TEST_CASE(DuplicateLayerNames)
diff --git a/src/armnn/test/TensorHandleStrategyTest.cpp b/src/armnn/test/TensorHandleStrategyTest.cpp
index 3bb1c68169..c391b04d97 100644
--- a/src/armnn/test/TensorHandleStrategyTest.cpp
+++ b/src/armnn/test/TensorHandleStrategyTest.cpp
@@ -50,9 +50,11 @@ public:
         return nullptr;
     }
 
-    virtual const FactoryId GetId() const override { return m_Id; }
+    const FactoryId GetId() const override { return m_Id; }
 
-    virtual bool SupportsSubTensors() const override { return true; }
+    bool SupportsSubTensors() const override { return true; }
+
+    MemorySourceFlags GetExportFlags() const override { return 1; }
 
 private:
     FactoryId m_Id = "UninitializedId";
@@ -60,6 +62,38 @@ private:
     std::weak_ptr<IMemoryManager> m_MemMgr;
 };
 
+class TestFactoryImport : public ITensorHandleFactory
+{
+public:
+    TestFactoryImport(std::weak_ptr<IMemoryManager> mgr, ITensorHandleFactory::FactoryId id)
+        : m_Id(id)
+        , m_MemMgr(mgr)
+    {}
+
+    std::unique_ptr<ITensorHandle> CreateSubTensorHandle(ITensorHandle& parent,
+                                                         TensorShape const& subTensorShape,
+                                                         unsigned int const* subTensorOrigin) const override
+    {
+        return nullptr;
+    }
+
+    std::unique_ptr<ITensorHandle> CreateTensorHandle(const TensorInfo& tensorInfo) const override
+    {
+        return nullptr;
+    }
+
+    const FactoryId GetId() const override { return m_Id; }
+
+    bool SupportsSubTensors() const override { return true; }
+
+    MemorySourceFlags GetImportFlags() const override { return 1; }
+
+private:
+    FactoryId m_Id = "ImporterId";
+
+    std::weak_ptr<IMemoryManager> m_MemMgr;
+};
+
 class TestBackendA : public IBackendInternal
 {
 public:
@@ -173,6 +207,42 @@ private:
     BackendId m_Id = "BackendC";
 };
 
+class TestBackendD : public IBackendInternal
+{
+public:
+    TestBackendD() = default;
+
+    const BackendId& GetId() const override { return m_Id; }
+
+    IWorkloadFactoryPtr CreateWorkloadFactory(const IMemoryManagerSharedPtr& memoryManager = nullptr) const override
+    {
+        return IWorkloadFactoryPtr{};
+    }
+
+    IBackendInternal::ILayerSupportSharedPtr GetLayerSupport() const override
+    {
+        return ILayerSupportSharedPtr{};
+    }
+
+    std::vector<ITensorHandleFactory::FactoryId> GetHandleFactoryPreferences() const override
+    {
+        return std::vector<ITensorHandleFactory::FactoryId>{
+            "TestHandleFactoryD1"
+        };
+    }
+
+    void RegisterTensorHandleFactories(TensorHandleFactoryRegistry& registry) override
+    {
+        auto mgr = std::make_shared<TestMemMgr>();
+
+        registry.RegisterMemoryManager(mgr);
+        registry.RegisterFactory(std::make_unique<TestFactoryImport>(mgr, "TestHandleFactoryD1"));
+    }
+
+private:
+    BackendId m_Id = "BackendD";
+};
+
 
 BOOST_AUTO_TEST_SUITE(TensorHandle)
 
@@ -200,16 +270,19 @@ BOOST_AUTO_TEST_CASE(TensorHandleSelectionStrategy)
     auto backendA = std::make_unique<TestBackendA>();
     auto backendB = std::make_unique<TestBackendB>();
     auto backendC = std::make_unique<TestBackendC>();
+    auto backendD = std::make_unique<TestBackendD>();
 
     TensorHandleFactoryRegistry registry;
     backendA->RegisterTensorHandleFactories(registry);
     backendB->RegisterTensorHandleFactories(registry);
     backendC->RegisterTensorHandleFactories(registry);
+    backendD->RegisterTensorHandleFactories(registry);
 
     BackendsMap backends;
     backends["BackendA"] = std::move(backendA);
     backends["BackendB"] = std::move(backendB);
     backends["BackendC"] = std::move(backendC);
+    backends["BackendD"] = std::move(backendD);
 
     armnn::Graph graph;
 
@@ -226,13 +299,17 @@ BOOST_AUTO_TEST_CASE(TensorHandleSelectionStrategy)
     armnn::SoftmaxLayer* const softmaxLayer3 = graph.AddLayer<armnn::SoftmaxLayer>(smDesc, "softmax3");
     softmaxLayer3->SetBackendId("BackendC");
 
+    armnn::SoftmaxLayer* const softmaxLayer4 = graph.AddLayer<armnn::SoftmaxLayer>(smDesc, "softmax4");
+    softmaxLayer4->SetBackendId("BackendD");
+
     armnn::OutputLayer* const outputLayer = graph.AddLayer<armnn::OutputLayer>(0, "output");
     outputLayer->SetBackendId("BackendA");
 
     inputLayer->GetOutputSlot(0).Connect(softmaxLayer1->GetInputSlot(0));
     softmaxLayer1->GetOutputSlot(0).Connect(softmaxLayer2->GetInputSlot(0));
     softmaxLayer2->GetOutputSlot(0).Connect(softmaxLayer3->GetInputSlot(0));
-    softmaxLayer3->GetOutputSlot(0).Connect(outputLayer->GetInputSlot(0));
+    softmaxLayer3->GetOutputSlot(0).Connect(softmaxLayer4->GetInputSlot(0));
+    softmaxLayer4->GetOutputSlot(0).Connect(outputLayer->GetInputSlot(0));
 
     graph.TopologicalSort();
 
@@ -246,29 +323,45 @@ BOOST_AUTO_TEST_CASE(TensorHandleSelectionStrategy)
     OutputSlot& softmaxLayer1Out = softmaxLayer1->GetOutputSlot(0);
     OutputSlot& softmaxLayer2Out = softmaxLayer2->GetOutputSlot(0);
     OutputSlot& softmaxLayer3Out = softmaxLayer3->GetOutputSlot(0);
+    OutputSlot& softmaxLayer4Out = softmaxLayer4->GetOutputSlot(0);
 
     // Check that the correct factory was selected
     BOOST_TEST(inputLayerOut.GetTensorHandleFactoryId() == "TestHandleFactoryA1");
     BOOST_TEST(softmaxLayer1Out.GetTensorHandleFactoryId() == "TestHandleFactoryB1");
     BOOST_TEST(softmaxLayer2Out.GetTensorHandleFactoryId() == "TestHandleFactoryB1");
     BOOST_TEST(softmaxLayer3Out.GetTensorHandleFactoryId() == "TestHandleFactoryC1");
+    BOOST_TEST(softmaxLayer4Out.GetTensorHandleFactoryId() == "TestHandleFactoryD1");
 
     // Check that the correct strategy was selected
-    BOOST_TEST((inputLayerOut.GetMemoryStrategyForConnection(0) == MemoryStrategy::DirectCompatibility));
-    BOOST_TEST((softmaxLayer1Out.GetMemoryStrategyForConnection(0) == MemoryStrategy::DirectCompatibility));
-    BOOST_TEST((softmaxLayer2Out.GetMemoryStrategyForConnection(0) == MemoryStrategy::CopyToTarget));
-    BOOST_TEST((softmaxLayer3Out.GetMemoryStrategyForConnection(0) == MemoryStrategy::DirectCompatibility));
-
-    graph.AddCopyLayers(backends, registry);
-    int count= 0;
-    graph.ForEachLayer([&count](Layer* layer)
+    BOOST_TEST((inputLayerOut.GetEdgeStrategyForConnection(0) == EdgeStrategy::DirectCompatibility));
+    BOOST_TEST((softmaxLayer1Out.GetEdgeStrategyForConnection(0) == EdgeStrategy::DirectCompatibility));
+    BOOST_TEST((softmaxLayer2Out.GetEdgeStrategyForConnection(0) == EdgeStrategy::CopyToTarget));
+    BOOST_TEST((softmaxLayer3Out.GetEdgeStrategyForConnection(0) == EdgeStrategy::ExportToTarget));
+    BOOST_TEST((softmaxLayer4Out.GetEdgeStrategyForConnection(0) == EdgeStrategy::DirectCompatibility));
+
+    graph.AddCompatibilityLayers(backends, registry);
+
+    // Test for copy layers
+    int copyCount= 0;
+    graph.ForEachLayer([&copyCount](Layer* layer)
     {
         if (layer->GetType() == LayerType::MemCopy)
         {
-            count++;
+            copyCount++;
+        }
+    });
+    BOOST_TEST(copyCount == 1);
+
+    // Test for import layers
+    int importCount= 0;
+    graph.ForEachLayer([&importCount](Layer *layer)
+    {
+        if (layer->GetType() == LayerType::MemImport)
+        {
+            importCount++;
         }
     });
-    BOOST_TEST(count == 1);
+    BOOST_TEST(importCount == 1);
 }
 
 BOOST_AUTO_TEST_SUITE_END()
author	Derek Lamberti <derek.lamberti@arm.com>	2019-08-01 15:56:25 +0100
committer	Áron Virginás-Tar <aron.virginas-tar@arm.com>	2019-08-05 13:51:42 +0000
commit	f674aa0fd2809126debdaaeb8067067790d86907 (patch)
tree	d86d0261c7a25149217918986043c76d0823ee44 /src/armnn
parent	737d9ff58b348b11234b6c2363390607d576177d (diff)
download	armnn-f674aa0fd2809126debdaaeb8067067790d86907.tar.gz