diff options
Diffstat (limited to 'src/armnn')
-rw-r--r-- | src/armnn/Graph.cpp | 85 | ||||
-rw-r--r-- | src/armnn/Graph.hpp | 4 | ||||
-rw-r--r-- | src/armnn/InternalTypes.hpp | 1 | ||||
-rw-r--r-- | src/armnn/Layer.cpp | 18 | ||||
-rw-r--r-- | src/armnn/Layer.hpp | 8 | ||||
-rw-r--r-- | src/armnn/LayerSupport.cpp | 9 | ||||
-rw-r--r-- | src/armnn/LayersFwd.hpp | 2 | ||||
-rw-r--r-- | src/armnn/LoadedNetwork.cpp | 54 | ||||
-rw-r--r-- | src/armnn/Network.cpp | 51 | ||||
-rw-r--r-- | src/armnn/layers/MemImportLayer.cpp | 54 | ||||
-rw-r--r-- | src/armnn/layers/MemImportLayer.hpp | 42 | ||||
-rw-r--r-- | src/armnn/test/GraphTests.cpp | 34 | ||||
-rw-r--r-- | src/armnn/test/TensorHandleStrategyTest.cpp | 119 |
13 files changed, 376 insertions, 105 deletions
diff --git a/src/armnn/Graph.cpp b/src/armnn/Graph.cpp index 9e00f5ec01..6212c49eba 100644 --- a/src/armnn/Graph.cpp +++ b/src/armnn/Graph.cpp @@ -255,26 +255,31 @@ const Graph& Graph::TopologicalSort() const return *this; } -void Graph::AddCopyLayers(std::map<BackendId, std::unique_ptr<IBackendInternal>>& backends, - TensorHandleFactoryRegistry& registry) +void Graph::AddCompatibilityLayers(std::map<BackendId, std::unique_ptr<IBackendInternal>>& backends, + TensorHandleFactoryRegistry& registry) { - // Returns true if the given layer could potentially need an intermediate copy layer (depending on its - // connections to other layers). At the time of writing, copy layers will be inserted in the following situations: - // CPU -> CL (and viceversa) - // CPU -> Neon (and viceversa) - auto MayNeedCopyLayer = [](const Layer& layer) + // Returns true if the given layer could potentially need an intermediate copy/import layer (depending on its + // connections to other layers). + auto MayNeedCompatibilityLayer = [](const Layer& layer) { // All layers should have been associated with a valid compute device at this point. BOOST_ASSERT(layer.GetBackendId() != Compute::Undefined); - // Does not need another copy layer if a copy layer is already present. - return layer.GetType() != LayerType::MemCopy; + // Does not need another compatibility layer if a copy or import layer is already present. + return layer.GetType() != LayerType::MemCopy && + layer.GetType() != LayerType::MemImport; }; - ForEachLayer([this, &backends, ®istry, MayNeedCopyLayer](Layer* srcLayer) + auto IsCompatibilityStrategy = [](EdgeStrategy strategy) + { + return strategy == EdgeStrategy::CopyToTarget || + strategy == EdgeStrategy::ExportToTarget; + }; + + ForEachLayer([this, &backends, ®istry, MayNeedCompatibilityLayer, IsCompatibilityStrategy](Layer* srcLayer) { BOOST_ASSERT(srcLayer); - if (!MayNeedCopyLayer(*srcLayer)) + if (!MayNeedCompatibilityLayer(*srcLayer)) { // The current layer does not need copy layers, move to the next one return; @@ -285,33 +290,43 @@ void Graph::AddCopyLayers(std::map<BackendId, std::unique_ptr<IBackendInternal>> { OutputSlot& srcOutputSlot = srcLayer->GetOutputSlot(srcOutputIndex); const std::vector<InputSlot*> srcConnections = srcOutputSlot.GetConnections(); - const std::vector<MemoryStrategy> srcMemoryStrategies = srcOutputSlot.GetMemoryStrategies(); + const std::vector<EdgeStrategy> srcEdgeStrategies = srcOutputSlot.GetEdgeStrategies(); for (unsigned int srcConnectionIndex = 0; srcConnectionIndex < srcConnections.size(); srcConnectionIndex++) { InputSlot* dstInputSlot = srcConnections[srcConnectionIndex]; BOOST_ASSERT(dstInputSlot); - MemoryStrategy strategy = srcMemoryStrategies[srcConnectionIndex]; - BOOST_ASSERT_MSG(strategy != MemoryStrategy::Undefined, + EdgeStrategy strategy = srcEdgeStrategies[srcConnectionIndex]; + BOOST_ASSERT_MSG(strategy != EdgeStrategy::Undefined, "Undefined memory strategy found while adding copy layers for compatibility"); const Layer& dstLayer = dstInputSlot->GetOwningLayer(); - if (MayNeedCopyLayer(dstLayer) && - strategy == MemoryStrategy::CopyToTarget) + if (MayNeedCompatibilityLayer(dstLayer) && + IsCompatibilityStrategy(strategy)) { // A copy layer is needed in between the source and destination layers. // Record the operation rather than attempting to modify the graph as we go. // (invalidating iterators) - const std::string copyLayerName = boost::str(boost::format("[ %1% (%2%) -> %3% (%4%) ]") + const std::string compLayerName = boost::str(boost::format("[ %1% (%2%) -> %3% (%4%) ]") % srcLayer->GetName() % srcOutputIndex % dstLayer.GetName() % dstInputSlot->GetSlotIndex()); - MemCopyLayer* const copyLayer = InsertNewLayer<MemCopyLayer>(*dstInputSlot, copyLayerName.c_str()); - copyLayer->SetBackendId(dstLayer.GetBackendId()); + Layer* compLayer = nullptr; + if (strategy == EdgeStrategy::CopyToTarget) + { + compLayer = InsertNewLayer<MemCopyLayer>(*dstInputSlot, compLayerName.c_str()); + } + else + { + BOOST_ASSERT_MSG(strategy == EdgeStrategy::ExportToTarget, "Invalid edge strategy found."); + compLayer = InsertNewLayer<MemImportLayer>(*dstInputSlot, compLayerName.c_str()); + } + + compLayer->SetBackendId(dstLayer.GetBackendId()); - OutputSlot& copyOutputSlot = copyLayer->GetOutputSlot(0); + OutputSlot& compOutputSlot = compLayer->GetOutputSlot(0); auto backendIt = backends.find(dstLayer.GetBackendId()); if (backendIt != backends.end() && backendIt->second && @@ -325,34 +340,40 @@ void Graph::AddCopyLayers(std::map<BackendId, std::unique_ptr<IBackendInternal>> for (auto preference : tensorHandleFactoryIds) { auto factory = registry.GetFactory(preference); - if (factory && factory->SupportsMapUnmap()) + if (factory) { - copyOutputSlot.SetTensorHandleFactory(preference); - found = true; - break; + auto srcPref = srcOutputSlot.GetTensorHandleFactoryId(); + auto srcFactory = registry.GetFactory(srcPref); + bool canExportImport = (factory->GetImportFlags() & srcFactory->GetExportFlags()) != 0; + if (factory->SupportsMapUnmap() || canExportImport) + { + compOutputSlot.SetTensorHandleFactory(preference); + found = true; + break; + } } } - BOOST_ASSERT_MSG(found, "Could not find a mappable TensorHandle for copy layer"); + BOOST_ASSERT_MSG(found, "Could not find a valid TensorHandle for compatibilty layer"); } else { - copyOutputSlot.SetTensorHandleFactory(ITensorHandleFactory::LegacyFactoryId); + compOutputSlot.SetTensorHandleFactory(ITensorHandleFactory::LegacyFactoryId); } - // The output strategy of a copy layer is always DirectCompatibility. - copyOutputSlot.SetMemoryStrategy(0, MemoryStrategy::DirectCompatibility); + // The output strategy of a compatibility layer is always DirectCompatibility. + compOutputSlot.SetEdgeStrategy(0, EdgeStrategy::DirectCompatibility); // Recalculate the connection index on the previous layer as we have just inserted into it. const std::vector<InputSlot*>& newSourceConnections = srcOutputSlot.GetConnections(); long newSrcConnectionIndex = std::distance(newSourceConnections.begin(), std::find(newSourceConnections.begin(), newSourceConnections.end(), - ©Layer->GetInputSlot(0))); + &compLayer->GetInputSlot(0))); - // The input strategy of a copy layer is always DirectCompatibilty. - srcOutputSlot.SetMemoryStrategy(boost::numeric_cast<unsigned int>(newSrcConnectionIndex), - MemoryStrategy::DirectCompatibility); + // The input strategy of a compatibility layer is always DirectCompatibilty. + srcOutputSlot.SetEdgeStrategy(boost::numeric_cast<unsigned int>(newSrcConnectionIndex), + EdgeStrategy::DirectCompatibility); } } } diff --git a/src/armnn/Graph.hpp b/src/armnn/Graph.hpp index f8113375c9..c65f12bbc3 100644 --- a/src/armnn/Graph.hpp +++ b/src/armnn/Graph.hpp @@ -191,8 +191,8 @@ public: /// Modifies the graph in-place, removing edges connecting layers using different compute devices, /// and relinking them via an intermediary copy layers. - void AddCopyLayers(std::map<BackendId, std::unique_ptr<class IBackendInternal>>& backends, - TensorHandleFactoryRegistry& registry); + void AddCompatibilityLayers(std::map<BackendId, std::unique_ptr<class IBackendInternal>>& backends, + TensorHandleFactoryRegistry& registry); /// Substitutes the given sub-graph with either a new layer or a new sub-graph. /// In either case, the given layer or all the layers in the given sub-graph must belong to this graph. diff --git a/src/armnn/InternalTypes.hpp b/src/armnn/InternalTypes.hpp index b0fea7c8c2..7a0f9a1cb0 100644 --- a/src/armnn/InternalTypes.hpp +++ b/src/armnn/InternalTypes.hpp @@ -40,6 +40,7 @@ enum class LayerType Maximum, Mean, MemCopy, + MemImport, Merge, Minimum, Multiplication, diff --git a/src/armnn/Layer.cpp b/src/armnn/Layer.cpp index a287220702..528020bab5 100644 --- a/src/armnn/Layer.cpp +++ b/src/armnn/Layer.cpp @@ -31,7 +31,7 @@ void InputSlot::Insert(Layer& layer) // Connects inserted layer to parent. BOOST_ASSERT(layer.GetNumInputSlots() == 1); int idx = prevSlot->Connect(layer.GetInputSlot(0)); - prevSlot->SetMemoryStrategy(boost::numeric_cast<unsigned int>(idx), MemoryStrategy::Undefined); + prevSlot->SetEdgeStrategy(boost::numeric_cast<unsigned int>(idx), EdgeStrategy::Undefined); // Sets tensor info for inserted layer. const TensorInfo& tensorInfo = prevSlot->GetTensorInfo(); @@ -40,7 +40,7 @@ void InputSlot::Insert(Layer& layer) // Connects inserted layer to this. layer.GetOutputSlot(0).Connect(*this); - layer.GetOutputSlot(0).SetMemoryStrategy(0, MemoryStrategy::Undefined); + layer.GetOutputSlot(0).SetEdgeStrategy(0, EdgeStrategy::Undefined); } const InputSlot* OutputSlot::GetConnection(unsigned int index) const @@ -80,7 +80,7 @@ int OutputSlot::Connect(InputSlot& destination) { destination.SetConnection(this); m_Connections.push_back(&destination); - m_MemoryStrategies.push_back(MemoryStrategy::Undefined); + m_EdgeStrategies.push_back(EdgeStrategy::Undefined); return boost::numeric_cast<int>(m_Connections.size() - 1); } @@ -97,7 +97,7 @@ void OutputSlot::Disconnect(InputSlot& slot) auto idx = std::distance(m_Connections.begin(), it); m_Connections.erase(std::remove(m_Connections.begin(), m_Connections.end(), &slot), m_Connections.end()); - m_MemoryStrategies.erase(m_MemoryStrategies.begin() + idx); + m_EdgeStrategies.erase(m_EdgeStrategies.begin() + idx); } void OutputSlot::DisconnectAll() @@ -113,7 +113,7 @@ void OutputSlot::MoveAllConnections(OutputSlot& destination) { while (GetNumConnections() > 0) { - BOOST_ASSERT_MSG(m_MemoryStrategies[0] == MemoryStrategy::Undefined, + BOOST_ASSERT_MSG(m_EdgeStrategies[0] == EdgeStrategy::Undefined, "Cannot move connections once memory strategies have be established."); InputSlot& connection = *GetConnection(0); @@ -174,14 +174,14 @@ ITensorHandleFactory::FactoryId OutputSlot::GetTensorHandleFactoryId() const return m_TensorHandleFactoryId; } -void OutputSlot::SetMemoryStrategy(unsigned int connectionIndex, MemoryStrategy strategy) +void OutputSlot::SetEdgeStrategy(unsigned int connectionIndex, EdgeStrategy strategy) { - m_MemoryStrategies[connectionIndex] = strategy; + m_EdgeStrategies[connectionIndex] = strategy; } -MemoryStrategy OutputSlot::GetMemoryStrategyForConnection(unsigned int connectionIdx) const +EdgeStrategy OutputSlot::GetEdgeStrategyForConnection(unsigned int connectionIdx) const { - return m_MemoryStrategies[connectionIdx]; + return m_EdgeStrategies[connectionIdx]; } namespace { diff --git a/src/armnn/Layer.hpp b/src/armnn/Layer.hpp index b90d040475..5944ea83ed 100644 --- a/src/armnn/Layer.hpp +++ b/src/armnn/Layer.hpp @@ -123,7 +123,7 @@ public: void Disconnect(InputSlot& slot); const std::vector<InputSlot*>& GetConnections() const { return m_Connections; } - const std::vector<MemoryStrategy>& GetMemoryStrategies() const { return m_MemoryStrategies; } + const std::vector<EdgeStrategy>& GetEdgeStrategies() const { return m_EdgeStrategies; } bool ValidateTensorShape(const TensorShape& shape) const; @@ -160,8 +160,8 @@ public: void SetTensorHandleFactory(const ITensorHandleFactory::FactoryId& id); ITensorHandleFactory::FactoryId GetTensorHandleFactoryId() const; - void SetMemoryStrategy(unsigned int connectionIndex, MemoryStrategy strategy); - MemoryStrategy GetMemoryStrategyForConnection(unsigned int connectionIdx) const; + void SetEdgeStrategy(unsigned int connectionIndex, EdgeStrategy strategy); + EdgeStrategy GetEdgeStrategyForConnection(unsigned int connectionIdx) const; private: void ValidateConnectionIndex(unsigned int index) const; @@ -171,7 +171,7 @@ private: std::vector<InputSlot*> m_Connections; ITensorHandleFactory::FactoryId m_TensorHandleFactoryId; - std::vector<MemoryStrategy> m_MemoryStrategies; + std::vector<EdgeStrategy> m_EdgeStrategies; }; // InputSlot inlines that need OutputSlot declaration. diff --git a/src/armnn/LayerSupport.cpp b/src/armnn/LayerSupport.cpp index 047c80a8c4..5a756b9544 100644 --- a/src/armnn/LayerSupport.cpp +++ b/src/armnn/LayerSupport.cpp @@ -371,6 +371,15 @@ bool IsMemCopySupported(const BackendId &backend, FORWARD_LAYER_SUPPORT_FUNC(backend, IsMemCopySupported, input, output); } +bool IsMemImportSupported(const BackendId &backend, + const TensorInfo &input, + const TensorInfo &output, + char *reasonIfUnsupported, + size_t reasonIfUnsupportedMaxLength) +{ + FORWARD_LAYER_SUPPORT_FUNC(backend, IsMemImportSupported, input, output); +} + bool IsMergeSupported(const BackendId& backend, const TensorInfo& input0, const TensorInfo& input1, diff --git a/src/armnn/LayersFwd.hpp b/src/armnn/LayersFwd.hpp index 2c8d5d2e07..cadcd49acb 100644 --- a/src/armnn/LayersFwd.hpp +++ b/src/armnn/LayersFwd.hpp @@ -32,6 +32,7 @@ #include "layers/MaximumLayer.hpp" #include "layers/MeanLayer.hpp" #include "layers/MemCopyLayer.hpp" +#include "layers/MemImportLayer.hpp" #include "layers/MergeLayer.hpp" #include "layers/MinimumLayer.hpp" #include "layers/MultiplicationLayer.hpp" @@ -110,6 +111,7 @@ DECLARE_LAYER(Lstm) DECLARE_LAYER(Maximum) DECLARE_LAYER(Mean) DECLARE_LAYER(MemCopy) +DECLARE_LAYER(MemImport) DECLARE_LAYER(Merge) DECLARE_LAYER(Minimum) DECLARE_LAYER(Multiplication) diff --git a/src/armnn/LoadedNetwork.cpp b/src/armnn/LoadedNetwork.cpp index 7873e48780..a81528aa65 100644 --- a/src/armnn/LoadedNetwork.cpp +++ b/src/armnn/LoadedNetwork.cpp @@ -14,6 +14,8 @@ #include <backendsCommon/CpuTensorHandle.hpp> #include <backendsCommon/BackendRegistry.hpp> #include <backendsCommon/IMemoryManager.hpp> +#include <backendsCommon/MemCopyWorkload.hpp> +#include <backendsCommon/MemSyncWorkload.hpp> #include <boost/polymorphic_cast.hpp> #include <boost/assert.hpp> @@ -389,8 +391,22 @@ void LoadedNetwork::EnqueueInput(const BindableLayer& layer, ITensorHandle* tens inputQueueDescriptor.m_Outputs.push_back(outputTensorHandle); info.m_OutputTensorInfos.push_back(outputTensorInfo); - const IWorkloadFactory& workloadFactory = GetWorkloadFactory(layer); - auto inputWorkload = workloadFactory.CreateInput(inputQueueDescriptor, info); + MemorySourceFlags importFlags = outputTensorHandle->GetImportFlags(); + if (CheckFlag(importFlags, MemorySource::Malloc)) // Try import the input tensor + { + // This assumes a CPU Tensor handle + void* mem = tensorHandle->Map(false); + if (outputTensorHandle->Import(mem, MemorySource::Malloc)) + { + tensorHandle->Unmap(); + return; // No need for a workload since the import has been done. + } + tensorHandle->Unmap(); + } + + // Create a mem copy workload for input since we could not import + auto inputWorkload = std::make_unique<CopyMemGenericWorkload>(inputQueueDescriptor, info); + BOOST_ASSERT_MSG(inputWorkload, "No input workload created"); m_InputQueue.push_back(move(inputWorkload)); } @@ -422,11 +438,41 @@ void LoadedNetwork::EnqueueOutput(const BindableLayer& layer, ITensorHandle* ten ITensorHandle* inputTensorHandle = outputHandler.GetData(); BOOST_ASSERT_MSG(inputTensorHandle != nullptr, "Data should have been allocated."); + // Try import the output tensor. + // Note: We can only import the output pointer if all of the following hold true: + // a) The imported pointer is aligned sufficiently + // b) The tensor has zero padding + // c) There is only one connection to the OutputSlot and it is to an OutputLayer. + // d) The output pointer is allocated via malloc. (Other types will be supported in a later release) + if (layer.GetInputSlots()[0].GetConnectedOutputSlot()->GetNumConnections() == 1) + { + MemorySourceFlags importFlags = inputTensorHandle->GetImportFlags(); + if (CheckFlag(importFlags, MemorySource::Malloc)) + { + void* mem = tensorHandle->Map(false); + bool importOk = inputTensorHandle->Import(mem, MemorySource::Malloc); + tensorHandle->Unmap(); + + if (importOk) + { + // Insert synchronization workload + MemSyncQueueDescriptor syncDesc; + syncDesc.m_Inputs.push_back(inputTensorHandle); + info.m_InputTensorInfos.push_back(inputTensorInfo); + auto syncWorkload = std::make_unique<SyncMemGenericWorkload>(syncDesc, info); + BOOST_ASSERT_MSG(syncWorkload, "No sync workload created"); + m_OutputQueue.push_back(move(syncWorkload)); + + return; //No need to add the output workload below + } + } + } + + // If we got here then we couldn't import the memory, so add an output workload which performs a memcopy. outputQueueDescriptor.m_Inputs.push_back(inputTensorHandle); info.m_InputTensorInfos.push_back(inputTensorInfo); - const IWorkloadFactory& workloadFactory = GetWorkloadFactory(layer); - auto outputWorkload = workloadFactory.CreateOutput(outputQueueDescriptor, info); + auto outputWorkload = std::make_unique<CopyMemGenericWorkload>(outputQueueDescriptor, info); BOOST_ASSERT_MSG(outputWorkload, "No output workload created"); m_OutputQueue.push_back(move(outputWorkload)); } diff --git a/src/armnn/Network.cpp b/src/armnn/Network.cpp index 2195c71735..b30cd9f3c2 100644 --- a/src/armnn/Network.cpp +++ b/src/armnn/Network.cpp @@ -441,7 +441,7 @@ bool RequiresCopy(ITensorHandleFactory::FactoryId src, ITensorHandleFactory* srcFactory = registry.GetFactory(src); ITensorHandleFactory* dstFactory = registry.GetFactory(dst); - if (srcFactory->SupportsExport() && dstFactory->SupportsImport()) + if ((srcFactory->GetExportFlags() & dstFactory->GetImportFlags()) != 0) { return false; } @@ -493,11 +493,14 @@ ITensorHandleFactory::FactoryId CalculateSlotOptionForInput(BackendsMap& backend auto dstPrefs = toBackend->second.get()->GetHandleFactoryPreferences(); for (auto&& dst : dstPrefs) { - // Input layers use the mem copy workload, so the selected factory must support map/unmap API + // Input layers use the mem copy workload or import, so the selected factory must + // support either the map/unmap API or Import API ITensorHandleFactory* factory = registry.GetFactory(dst); - if (!factory->SupportsMapUnmap()) + if (!factory->SupportsMapUnmap() && + !CheckFlag(factory->GetImportFlags(), MemorySource::Malloc)) // Just support cpu mem imports for now { - // The current tensor handle factory does not support the map/unmap strategy, move to the next one + // The current tensor handle factory does not support the map/unmap or import + // strategy, move to the next one continue; } @@ -648,11 +651,11 @@ ITensorHandleFactory::FactoryId CalculateSlotOption(BackendsMap& backends, return ITensorHandleFactory::LegacyFactoryId; } -MemoryStrategy CalculateStrategy(BackendsMap& backends, - ITensorHandleFactory::FactoryId srcFactoryId, - const Layer& layer, - const Layer& connectedLayer, - TensorHandleFactoryRegistry& registry) +EdgeStrategy CalculateEdgeStrategy(BackendsMap& backends, + ITensorHandleFactory::FactoryId srcFactoryId, + const Layer& layer, + const Layer& connectedLayer, + TensorHandleFactoryRegistry& registry) { auto toBackend = backends.find(connectedLayer.GetBackendId()); BOOST_ASSERT_MSG(toBackend != backends.end(), "Backend id not found for the connected layer"); @@ -664,19 +667,19 @@ MemoryStrategy CalculateStrategy(BackendsMap& backends, { if (layer.GetBackendId() != connectedLayer.GetBackendId()) { - return MemoryStrategy::CopyToTarget; + return EdgeStrategy::CopyToTarget; } else { - return MemoryStrategy::DirectCompatibility; + return EdgeStrategy::DirectCompatibility; } } // TensorHandleFactory API present, so perform more sophisticated strategies. - // Dst Output layers don't require copy because they use map/unmap + // Dst Output layers don't require copy because they use import or map/unmap if (connectedLayer.GetType() == LayerType::Output) { - return MemoryStrategy::DirectCompatibility; + return EdgeStrategy::DirectCompatibility; } // Search for direct match in prefs @@ -684,20 +687,20 @@ MemoryStrategy CalculateStrategy(BackendsMap& backends, { if (pref == srcFactoryId) { - return MemoryStrategy::DirectCompatibility; + return EdgeStrategy::DirectCompatibility; } } // Search for export/import options ITensorHandleFactory* srcFactory = registry.GetFactory(srcFactoryId); - if (srcFactory->SupportsExport()) + if (srcFactory->GetExportFlags() != 0) { for (auto&& pref : dstPrefs) { ITensorHandleFactory* dstFactory = registry.GetFactory(pref); - if (dstFactory->SupportsImport()) + if ((dstFactory->GetImportFlags() & srcFactory->GetExportFlags()) != 0) { - return MemoryStrategy::ExportToTarget; + return EdgeStrategy::ExportToTarget; } } } @@ -710,12 +713,12 @@ MemoryStrategy CalculateStrategy(BackendsMap& backends, ITensorHandleFactory* dstFactory = registry.GetFactory(pref); if (dstFactory->SupportsMapUnmap()) { - return MemoryStrategy::CopyToTarget; + return EdgeStrategy::CopyToTarget; } } } - return MemoryStrategy::Undefined; + return EdgeStrategy::Undefined; } // Select the TensorHandleFactories and the corresponding memory strategy @@ -756,15 +759,15 @@ OptimizationResult SelectTensorHandleStrategy(Graph& optGraph, } outputSlot.SetTensorHandleFactory(slotOption); - // Now determine the "best" memory strategy for each connection given the slotOption. + // Now determine the "best" edge strategy for each connection given the slotOption. unsigned int connectionIdx = 0; for (auto&& connection : outputSlot.GetConnections()) { const Layer& connectedLayer = connection->GetOwningLayer(); - MemoryStrategy strategy = CalculateStrategy(backends, slotOption, *layer, connectedLayer, registry); + EdgeStrategy strategy = CalculateEdgeStrategy(backends, slotOption, *layer, connectedLayer, registry); - if (strategy == MemoryStrategy::Undefined) + if (strategy == EdgeStrategy::Undefined) { result.m_Error = true; if (errMessages) @@ -775,7 +778,7 @@ OptimizationResult SelectTensorHandleStrategy(Graph& optGraph, return; } - outputSlot.SetMemoryStrategy(connectionIdx, strategy); + outputSlot.SetEdgeStrategy(connectionIdx, strategy); connectionIdx++; } @@ -887,7 +890,7 @@ IOptimizedNetworkPtr Optimize(const INetwork& inNetwork, } // Based on the tensor handle strategy determined above, insert copy layers where required. - optGraph.AddCopyLayers(backends, tensorHandleFactoryRegistry); + optGraph.AddCompatibilityLayers(backends, tensorHandleFactoryRegistry); // Convert constants Optimizer::Pass(optGraph, MakeOptimizations(ConvertConstantsFloatToHalf())); diff --git a/src/armnn/layers/MemImportLayer.cpp b/src/armnn/layers/MemImportLayer.cpp new file mode 100644 index 0000000000..7a922f5a7c --- /dev/null +++ b/src/armnn/layers/MemImportLayer.cpp @@ -0,0 +1,54 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// +#include "MemImportLayer.hpp" + +#include "LayerCloneBase.hpp" + +#include <armnn/TypesUtils.hpp> +#include <backendsCommon/WorkloadData.hpp> +#include <backendsCommon/WorkloadFactory.hpp> +#include <backendsCommon/MemImportWorkload.hpp> + +namespace armnn +{ + +MemImportLayer::MemImportLayer(const char* name) + : Layer(1, 1, LayerType::MemImport, name) +{ +} + +MemImportLayer* MemImportLayer::Clone(Graph& graph) const +{ + return CloneBase<MemImportLayer>(graph, GetName()); +} + +std::unique_ptr<IWorkload> MemImportLayer::CreateWorkload(const Graph& graph, const IWorkloadFactory& factory) const +{ + MemImportQueueDescriptor descriptor; + + //This is different from other workloads. Does not get created by the workload factory. + return std::make_unique<ImportMemGenericWorkload>(descriptor, PrepInfoAndDesc(descriptor, graph)); +} + +void MemImportLayer::ValidateTensorShapesFromInputs() +{ + VerifyLayerConnections(1, CHECK_LOCATION()); + + auto inferredShapes = InferOutputShapes({ GetInputSlot(0).GetConnection()->GetTensorInfo().GetShape() }); + + BOOST_ASSERT(inferredShapes.size() == 1); + + ConditionalThrowIfNotEqual<LayerValidationException>( + "MemImportLayer: TensorShape set on OutputSlot[0] does not match the inferred shape.", + GetOutputSlot(0).GetTensorInfo().GetShape(), + inferredShapes[0]); +} + +void MemImportLayer::Accept(ILayerVisitor& visitor) const +{ + throw armnn::Exception("MemImportLayer should not appear in an input graph"); +} + +} // namespace armnn diff --git a/src/armnn/layers/MemImportLayer.hpp b/src/armnn/layers/MemImportLayer.hpp new file mode 100644 index 0000000000..2d02c1fb41 --- /dev/null +++ b/src/armnn/layers/MemImportLayer.hpp @@ -0,0 +1,42 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// +#pragma once + +#include <Layer.hpp> + +namespace armnn +{ + +/// This layer represents a memory import operation. +class MemImportLayer : public Layer +{ +public: + /// Makes a workload for the MemImport type. + /// @param [in] graph The graph where this layer can be found. + /// @param [in] factory The workload factory which will create the workload. + /// @return A pointer to the created workload, or nullptr if not created. + virtual std::unique_ptr<IWorkload>CreateWorkload(const Graph& graph, + const IWorkloadFactory& factory) const override; + + /// Creates a dynamically-allocated copy of this layer. + /// @param [in] graph The graph into which this layer is being cloned. + MemImportLayer* Clone(Graph& graph) const override; + + /// Check if the input tensor shape(s) + /// will lead to a valid configuration of @ref MemImportLayer. + void ValidateTensorShapesFromInputs() override; + + void Accept(ILayerVisitor& visitor) const override; + +protected: + /// Constructor to create a MemImportLayer. + /// @param [in] name Optional name for the layer. + MemImportLayer(const char* name); + + /// Default destructor + ~MemImportLayer() = default; +}; + +} // namespace diff --git a/src/armnn/test/GraphTests.cpp b/src/armnn/test/GraphTests.cpp index 7950ec49f4..7bd6aac98b 100644 --- a/src/armnn/test/GraphTests.cpp +++ b/src/armnn/test/GraphTests.cpp @@ -495,13 +495,13 @@ struct CopyLayersFixture // Set the memory strategies - for this test should be DirectCompatibility for same backends, // and CopyToTarget for different backends - inputLayer->GetOutputSlot(0).SetMemoryStrategy(0, MemoryStrategy::DirectCompatibility); - convLayer1->GetOutputSlot(0).SetMemoryStrategy(0, MemoryStrategy::CopyToTarget); - convLayer1->GetOutputSlot(0).SetMemoryStrategy(1, MemoryStrategy::DirectCompatibility); - convLayer2->GetOutputSlot(0).SetMemoryStrategy(0, MemoryStrategy::CopyToTarget); - concatLayer->GetOutputSlot(0).SetMemoryStrategy(0, MemoryStrategy::DirectCompatibility); - actLayer->GetOutputSlot(0).SetMemoryStrategy(0, MemoryStrategy::DirectCompatibility); - softmaxLayer->GetOutputSlot(0).SetMemoryStrategy(0, MemoryStrategy::CopyToTarget); + inputLayer->GetOutputSlot(0).SetEdgeStrategy(0, EdgeStrategy::DirectCompatibility); + convLayer1->GetOutputSlot(0).SetEdgeStrategy(0, EdgeStrategy::CopyToTarget); + convLayer1->GetOutputSlot(0).SetEdgeStrategy(1, EdgeStrategy::DirectCompatibility); + convLayer2->GetOutputSlot(0).SetEdgeStrategy(0, EdgeStrategy::CopyToTarget); + concatLayer->GetOutputSlot(0).SetEdgeStrategy(0, EdgeStrategy::DirectCompatibility); + actLayer->GetOutputSlot(0).SetEdgeStrategy(0, EdgeStrategy::DirectCompatibility); + softmaxLayer->GetOutputSlot(0).SetEdgeStrategy(0, EdgeStrategy::CopyToTarget); } armnn::TensorInfo m_TensorDesc; @@ -529,7 +529,7 @@ BOOST_FIXTURE_TEST_CASE(AddCopyLayers, CopyLayersFixture) { InitialiseTestGraph(); const armnn::Graph origGraph(m_Graph); - m_Graph.AddCopyLayers(m_Backends, m_FactoryRegistry); + m_Graph.AddCompatibilityLayers(m_Backends, m_FactoryRegistry); TestGraphAfterAddingCopyLayers(m_Graph, origGraph); } @@ -537,13 +537,13 @@ BOOST_FIXTURE_TEST_CASE(AddCopyLayers, CopyLayersFixture) BOOST_FIXTURE_TEST_CASE(AddCopyLayersSeveralTimes, CopyLayersFixture) { InitialiseTestGraph(); - m_Graph.AddCopyLayers(m_Backends, m_FactoryRegistry); + m_Graph.AddCompatibilityLayers(m_Backends, m_FactoryRegistry); - // Calling AddCopyLayers() several times should not change the connections. + // Calling AddCompatibilityLayers() several times should not change the connections. const std::vector<Edge> edges = GetEdgeList(m_Graph); for (int i = 0; i < 4; ++i) { - m_Graph.AddCopyLayers(m_Backends, m_FactoryRegistry); + m_Graph.AddCompatibilityLayers(m_Backends, m_FactoryRegistry); const std::vector<Edge> otherEdges = GetEdgeList(m_Graph); BOOST_TEST((edges == otherEdges)); } @@ -571,18 +571,18 @@ BOOST_FIXTURE_TEST_CASE(CopyLayersAddedBetweenSameLayersHaveDifferentNames, Copy splitterLayer->GetOutputSlot(1).Connect(additionLayer->GetInputSlot(1)); additionLayer->GetOutputSlot(0).Connect(outputLayer->GetInputSlot(0)); - inputLayer->GetOutputSlot(0).SetMemoryStrategy(0, armnn::MemoryStrategy::DirectCompatibility); - splitterLayer->GetOutputSlot(0).SetMemoryStrategy(0, armnn::MemoryStrategy::CopyToTarget); - splitterLayer->GetOutputSlot(1).SetMemoryStrategy(0, armnn::MemoryStrategy::CopyToTarget); - additionLayer->GetOutputSlot(0).SetMemoryStrategy(0, armnn::MemoryStrategy::DirectCompatibility); + inputLayer->GetOutputSlot(0).SetEdgeStrategy(0, armnn::EdgeStrategy::DirectCompatibility); + splitterLayer->GetOutputSlot(0).SetEdgeStrategy(0, armnn::EdgeStrategy::CopyToTarget); + splitterLayer->GetOutputSlot(1).SetEdgeStrategy(0, armnn::EdgeStrategy::CopyToTarget); + additionLayer->GetOutputSlot(0).SetEdgeStrategy(0, armnn::EdgeStrategy::DirectCompatibility); - graph.AddCopyLayers(m_Backends, m_FactoryRegistry); + graph.AddCompatibilityLayers(m_Backends, m_FactoryRegistry); std::vector<Edge> edges = GetEdgeList(graph); BOOST_CHECK(edges.size() == 6u); std::sort(edges.begin(), edges.end()); auto last = std::unique(edges.begin(), edges.end()); - BOOST_CHECK_MESSAGE(last == edges.end(), "Found duplicated edges after AddCopyLayers()"); + BOOST_CHECK_MESSAGE(last == edges.end(), "Found duplicated edges after AddCompatibilityLayers()"); } BOOST_AUTO_TEST_CASE(DuplicateLayerNames) diff --git a/src/armnn/test/TensorHandleStrategyTest.cpp b/src/armnn/test/TensorHandleStrategyTest.cpp index 3bb1c68169..c391b04d97 100644 --- a/src/armnn/test/TensorHandleStrategyTest.cpp +++ b/src/armnn/test/TensorHandleStrategyTest.cpp @@ -50,9 +50,11 @@ public: return nullptr; } - virtual const FactoryId GetId() const override { return m_Id; } + const FactoryId GetId() const override { return m_Id; } - virtual bool SupportsSubTensors() const override { return true; } + bool SupportsSubTensors() const override { return true; } + + MemorySourceFlags GetExportFlags() const override { return 1; } private: FactoryId m_Id = "UninitializedId"; @@ -60,6 +62,38 @@ private: std::weak_ptr<IMemoryManager> m_MemMgr; }; +class TestFactoryImport : public ITensorHandleFactory +{ +public: + TestFactoryImport(std::weak_ptr<IMemoryManager> mgr, ITensorHandleFactory::FactoryId id) + : m_Id(id) + , m_MemMgr(mgr) + {} + + std::unique_ptr<ITensorHandle> CreateSubTensorHandle(ITensorHandle& parent, + TensorShape const& subTensorShape, + unsigned int const* subTensorOrigin) const override + { + return nullptr; + } + + std::unique_ptr<ITensorHandle> CreateTensorHandle(const TensorInfo& tensorInfo) const override + { + return nullptr; + } + + const FactoryId GetId() const override { return m_Id; } + + bool SupportsSubTensors() const override { return true; } + + MemorySourceFlags GetImportFlags() const override { return 1; } + +private: + FactoryId m_Id = "ImporterId"; + + std::weak_ptr<IMemoryManager> m_MemMgr; +}; + class TestBackendA : public IBackendInternal { public: @@ -173,6 +207,42 @@ private: BackendId m_Id = "BackendC"; }; +class TestBackendD : public IBackendInternal +{ +public: + TestBackendD() = default; + + const BackendId& GetId() const override { return m_Id; } + + IWorkloadFactoryPtr CreateWorkloadFactory(const IMemoryManagerSharedPtr& memoryManager = nullptr) const override + { + return IWorkloadFactoryPtr{}; + } + + IBackendInternal::ILayerSupportSharedPtr GetLayerSupport() const override + { + return ILayerSupportSharedPtr{}; + } + + std::vector<ITensorHandleFactory::FactoryId> GetHandleFactoryPreferences() const override + { + return std::vector<ITensorHandleFactory::FactoryId>{ + "TestHandleFactoryD1" + }; + } + + void RegisterTensorHandleFactories(TensorHandleFactoryRegistry& registry) override + { + auto mgr = std::make_shared<TestMemMgr>(); + + registry.RegisterMemoryManager(mgr); + registry.RegisterFactory(std::make_unique<TestFactoryImport>(mgr, "TestHandleFactoryD1")); + } + +private: + BackendId m_Id = "BackendD"; +}; + BOOST_AUTO_TEST_SUITE(TensorHandle) @@ -200,16 +270,19 @@ BOOST_AUTO_TEST_CASE(TensorHandleSelectionStrategy) auto backendA = std::make_unique<TestBackendA>(); auto backendB = std::make_unique<TestBackendB>(); auto backendC = std::make_unique<TestBackendC>(); + auto backendD = std::make_unique<TestBackendD>(); TensorHandleFactoryRegistry registry; backendA->RegisterTensorHandleFactories(registry); backendB->RegisterTensorHandleFactories(registry); backendC->RegisterTensorHandleFactories(registry); + backendD->RegisterTensorHandleFactories(registry); BackendsMap backends; backends["BackendA"] = std::move(backendA); backends["BackendB"] = std::move(backendB); backends["BackendC"] = std::move(backendC); + backends["BackendD"] = std::move(backendD); armnn::Graph graph; @@ -226,13 +299,17 @@ BOOST_AUTO_TEST_CASE(TensorHandleSelectionStrategy) armnn::SoftmaxLayer* const softmaxLayer3 = graph.AddLayer<armnn::SoftmaxLayer>(smDesc, "softmax3"); softmaxLayer3->SetBackendId("BackendC"); + armnn::SoftmaxLayer* const softmaxLayer4 = graph.AddLayer<armnn::SoftmaxLayer>(smDesc, "softmax4"); + softmaxLayer4->SetBackendId("BackendD"); + armnn::OutputLayer* const outputLayer = graph.AddLayer<armnn::OutputLayer>(0, "output"); outputLayer->SetBackendId("BackendA"); inputLayer->GetOutputSlot(0).Connect(softmaxLayer1->GetInputSlot(0)); softmaxLayer1->GetOutputSlot(0).Connect(softmaxLayer2->GetInputSlot(0)); softmaxLayer2->GetOutputSlot(0).Connect(softmaxLayer3->GetInputSlot(0)); - softmaxLayer3->GetOutputSlot(0).Connect(outputLayer->GetInputSlot(0)); + softmaxLayer3->GetOutputSlot(0).Connect(softmaxLayer4->GetInputSlot(0)); + softmaxLayer4->GetOutputSlot(0).Connect(outputLayer->GetInputSlot(0)); graph.TopologicalSort(); @@ -246,29 +323,45 @@ BOOST_AUTO_TEST_CASE(TensorHandleSelectionStrategy) OutputSlot& softmaxLayer1Out = softmaxLayer1->GetOutputSlot(0); OutputSlot& softmaxLayer2Out = softmaxLayer2->GetOutputSlot(0); OutputSlot& softmaxLayer3Out = softmaxLayer3->GetOutputSlot(0); + OutputSlot& softmaxLayer4Out = softmaxLayer4->GetOutputSlot(0); // Check that the correct factory was selected BOOST_TEST(inputLayerOut.GetTensorHandleFactoryId() == "TestHandleFactoryA1"); BOOST_TEST(softmaxLayer1Out.GetTensorHandleFactoryId() == "TestHandleFactoryB1"); BOOST_TEST(softmaxLayer2Out.GetTensorHandleFactoryId() == "TestHandleFactoryB1"); BOOST_TEST(softmaxLayer3Out.GetTensorHandleFactoryId() == "TestHandleFactoryC1"); + BOOST_TEST(softmaxLayer4Out.GetTensorHandleFactoryId() == "TestHandleFactoryD1"); // Check that the correct strategy was selected - BOOST_TEST((inputLayerOut.GetMemoryStrategyForConnection(0) == MemoryStrategy::DirectCompatibility)); - BOOST_TEST((softmaxLayer1Out.GetMemoryStrategyForConnection(0) == MemoryStrategy::DirectCompatibility)); - BOOST_TEST((softmaxLayer2Out.GetMemoryStrategyForConnection(0) == MemoryStrategy::CopyToTarget)); - BOOST_TEST((softmaxLayer3Out.GetMemoryStrategyForConnection(0) == MemoryStrategy::DirectCompatibility)); - - graph.AddCopyLayers(backends, registry); - int count= 0; - graph.ForEachLayer([&count](Layer* layer) + BOOST_TEST((inputLayerOut.GetEdgeStrategyForConnection(0) == EdgeStrategy::DirectCompatibility)); + BOOST_TEST((softmaxLayer1Out.GetEdgeStrategyForConnection(0) == EdgeStrategy::DirectCompatibility)); + BOOST_TEST((softmaxLayer2Out.GetEdgeStrategyForConnection(0) == EdgeStrategy::CopyToTarget)); + BOOST_TEST((softmaxLayer3Out.GetEdgeStrategyForConnection(0) == EdgeStrategy::ExportToTarget)); + BOOST_TEST((softmaxLayer4Out.GetEdgeStrategyForConnection(0) == EdgeStrategy::DirectCompatibility)); + + graph.AddCompatibilityLayers(backends, registry); + + // Test for copy layers + int copyCount= 0; + graph.ForEachLayer([©Count](Layer* layer) { if (layer->GetType() == LayerType::MemCopy) { - count++; + copyCount++; + } + }); + BOOST_TEST(copyCount == 1); + + // Test for import layers + int importCount= 0; + graph.ForEachLayer([&importCount](Layer *layer) + { + if (layer->GetType() == LayerType::MemImport) + { + importCount++; } }); - BOOST_TEST(count == 1); + BOOST_TEST(importCount == 1); } BOOST_AUTO_TEST_SUITE_END() |