From f674aa0fd2809126debdaaeb8067067790d86907 Mon Sep 17 00:00:00 2001 From: Derek Lamberti Date: Thu, 1 Aug 2019 15:56:25 +0100 Subject: IVGCVSW-3277 Mem export/import suppor for Tensors * Rename MemoryStrategy to EdgeStrategy * Add MemImportLayer * Import memory rather than copy when possible Change-Id: I1d3a9414f2cbe517dc2aae9bbd4fdd92712b38ef Signed-off-by: Derek Lamberti --- Android.mk | 1 + CMakeLists.txt | 3 + include/armnn/ILayerSupport.hpp | 4 + include/armnn/MemorySources.hpp | 51 ++++++ src/armnn/Graph.cpp | 85 ++++++---- src/armnn/Graph.hpp | 4 +- src/armnn/InternalTypes.hpp | 1 + src/armnn/Layer.cpp | 18 +- src/armnn/Layer.hpp | 8 +- src/armnn/LayerSupport.cpp | 9 + src/armnn/LayersFwd.hpp | 2 + src/armnn/LoadedNetwork.cpp | 54 +++++- src/armnn/Network.cpp | 51 +++--- src/armnn/layers/MemImportLayer.cpp | 54 ++++++ src/armnn/layers/MemImportLayer.hpp | 42 +++++ src/armnn/test/GraphTests.cpp | 34 ++-- src/armnn/test/TensorHandleStrategyTest.cpp | 119 +++++++++++-- src/backends/backendsCommon/CMakeLists.txt | 5 + src/backends/backendsCommon/ITensorHandle.hpp | 11 ++ .../backendsCommon/ITensorHandleFactory.hpp | 19 +-- src/backends/backendsCommon/LayerSupportBase.cpp | 15 +- src/backends/backendsCommon/LayerSupportBase.hpp | 4 + src/backends/backendsCommon/LayerSupportRules.hpp | 185 +++++++++++++++++++++ src/backends/backendsCommon/MemImportWorkload.cpp | 34 ++++ src/backends/backendsCommon/MemImportWorkload.hpp | 27 +++ src/backends/backendsCommon/MemSyncWorkload.cpp | 33 ++++ src/backends/backendsCommon/MemSyncWorkload.hpp | 26 +++ src/backends/backendsCommon/WorkloadData.cpp | 103 ++++++++++++ src/backends/backendsCommon/WorkloadData.hpp | 10 ++ src/backends/backendsCommon/WorkloadFactory.cpp | 16 ++ src/backends/backendsCommon/WorkloadFactory.hpp | 3 + src/backends/backendsCommon/common.mk | 2 + .../test/IsLayerSupportedTestImpl.hpp | 2 + src/backends/cl/ClLayerSupport.cpp | 9 - src/backends/cl/ClLayerSupport.hpp | 4 - src/backends/cl/ClWorkloadFactory.cpp | 12 ++ src/backends/cl/ClWorkloadFactory.hpp | 3 + src/backends/neon/NeonLayerSupport.cpp | 9 - src/backends/neon/NeonLayerSupport.hpp | 4 - src/backends/neon/NeonWorkloadFactory.cpp | 12 ++ src/backends/neon/NeonWorkloadFactory.hpp | 3 + src/backends/reference/RefLayerSupport.cpp | 150 +---------------- src/backends/reference/RefWorkloadFactory.cpp | 11 ++ src/backends/reference/RefWorkloadFactory.hpp | 3 + 44 files changed, 964 insertions(+), 291 deletions(-) create mode 100644 include/armnn/MemorySources.hpp create mode 100644 src/armnn/layers/MemImportLayer.cpp create mode 100644 src/armnn/layers/MemImportLayer.hpp create mode 100644 src/backends/backendsCommon/LayerSupportRules.hpp create mode 100644 src/backends/backendsCommon/MemImportWorkload.cpp create mode 100644 src/backends/backendsCommon/MemImportWorkload.hpp create mode 100644 src/backends/backendsCommon/MemSyncWorkload.cpp create mode 100644 src/backends/backendsCommon/MemSyncWorkload.hpp diff --git a/Android.mk b/Android.mk index 8a24e7b9eb..bb7ed86283 100644 --- a/Android.mk +++ b/Android.mk @@ -114,6 +114,7 @@ LOCAL_SRC_FILES := \ src/armnn/layers/MaximumLayer.cpp \ src/armnn/layers/MeanLayer.cpp \ src/armnn/layers/MemCopyLayer.cpp \ + src/armnn/layers/MemImportLayer.cpp \ src/armnn/layers/MergeLayer.cpp \ src/armnn/layers/MinimumLayer.cpp \ src/armnn/layers/MultiplicationLayer.cpp \ diff --git a/CMakeLists.txt b/CMakeLists.txt index c823b817cf..eaaf2d6813 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -212,6 +212,7 @@ list(APPEND armnn_sources include/armnn/LayerSupport.hpp include/armnn/LayerVisitorBase.hpp include/armnn/LstmParams.hpp + include/armnn/MemorySources.hpp include/armnn/NetworkFwd.hpp include/armnn/Optional.hpp include/armnn/QuantizedLstmParams.hpp @@ -275,6 +276,8 @@ list(APPEND armnn_sources src/armnn/layers/MeanLayer.cpp src/armnn/layers/MemCopyLayer.hpp src/armnn/layers/MemCopyLayer.cpp + src/armnn/layers/MemImportLayer.hpp + src/armnn/layers/MemImportLayer.cpp src/armnn/layers/MergeLayer.hpp src/armnn/layers/MergeLayer.cpp src/armnn/layers/MinimumLayer.cpp diff --git a/include/armnn/ILayerSupport.hpp b/include/armnn/ILayerSupport.hpp index 45360984ff..33f86dea59 100644 --- a/include/armnn/ILayerSupport.hpp +++ b/include/armnn/ILayerSupport.hpp @@ -172,6 +172,10 @@ public: const TensorInfo& output, Optional reasonIfUnsupported = EmptyOptional()) const = 0; + virtual bool IsMemImportSupported(const TensorInfo& input, + const TensorInfo& output, + Optional reasonIfUnsupported = EmptyOptional()) const = 0; + virtual bool IsMergeSupported(const TensorInfo& input0, const TensorInfo& input1, const TensorInfo& output, diff --git a/include/armnn/MemorySources.hpp b/include/armnn/MemorySources.hpp new file mode 100644 index 0000000000..e138f56fd4 --- /dev/null +++ b/include/armnn/MemorySources.hpp @@ -0,0 +1,51 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include + +namespace armnn +{ + +enum class MemorySource +{ + Malloc = 1, + DmaBuf = 2, + DmaBufProtected = 4 +}; + +using MemorySourceFlags = unsigned int; + +template +struct IsMemorySource +{ + static const bool value = false; +}; + +template<> +struct IsMemorySource +{ + static const bool value = true; +}; + +template ::value>::type* = nullptr> +MemorySourceFlags Combine(Arg sourceA, Arg sourceB) +{ + return static_cast(sourceA) | static_cast(sourceB); +} + +template ::value>::type* = nullptr> +MemorySourceFlags Combine(Arg source, Args... rest) +{ + return static_cast(source) | Combine(rest...); +} + +inline bool CheckFlag(MemorySourceFlags flags, MemorySource source) +{ + return (static_cast(source) & flags) != 0; +} + +} //namespace armnn \ No newline at end of file diff --git a/src/armnn/Graph.cpp b/src/armnn/Graph.cpp index 9e00f5ec01..6212c49eba 100644 --- a/src/armnn/Graph.cpp +++ b/src/armnn/Graph.cpp @@ -255,26 +255,31 @@ const Graph& Graph::TopologicalSort() const return *this; } -void Graph::AddCopyLayers(std::map>& backends, - TensorHandleFactoryRegistry& registry) +void Graph::AddCompatibilityLayers(std::map>& backends, + TensorHandleFactoryRegistry& registry) { - // Returns true if the given layer could potentially need an intermediate copy layer (depending on its - // connections to other layers). At the time of writing, copy layers will be inserted in the following situations: - // CPU -> CL (and viceversa) - // CPU -> Neon (and viceversa) - auto MayNeedCopyLayer = [](const Layer& layer) + // Returns true if the given layer could potentially need an intermediate copy/import layer (depending on its + // connections to other layers). + auto MayNeedCompatibilityLayer = [](const Layer& layer) { // All layers should have been associated with a valid compute device at this point. BOOST_ASSERT(layer.GetBackendId() != Compute::Undefined); - // Does not need another copy layer if a copy layer is already present. - return layer.GetType() != LayerType::MemCopy; + // Does not need another compatibility layer if a copy or import layer is already present. + return layer.GetType() != LayerType::MemCopy && + layer.GetType() != LayerType::MemImport; }; - ForEachLayer([this, &backends, ®istry, MayNeedCopyLayer](Layer* srcLayer) + auto IsCompatibilityStrategy = [](EdgeStrategy strategy) + { + return strategy == EdgeStrategy::CopyToTarget || + strategy == EdgeStrategy::ExportToTarget; + }; + + ForEachLayer([this, &backends, ®istry, MayNeedCompatibilityLayer, IsCompatibilityStrategy](Layer* srcLayer) { BOOST_ASSERT(srcLayer); - if (!MayNeedCopyLayer(*srcLayer)) + if (!MayNeedCompatibilityLayer(*srcLayer)) { // The current layer does not need copy layers, move to the next one return; @@ -285,33 +290,43 @@ void Graph::AddCopyLayers(std::map> { OutputSlot& srcOutputSlot = srcLayer->GetOutputSlot(srcOutputIndex); const std::vector srcConnections = srcOutputSlot.GetConnections(); - const std::vector srcMemoryStrategies = srcOutputSlot.GetMemoryStrategies(); + const std::vector srcEdgeStrategies = srcOutputSlot.GetEdgeStrategies(); for (unsigned int srcConnectionIndex = 0; srcConnectionIndex < srcConnections.size(); srcConnectionIndex++) { InputSlot* dstInputSlot = srcConnections[srcConnectionIndex]; BOOST_ASSERT(dstInputSlot); - MemoryStrategy strategy = srcMemoryStrategies[srcConnectionIndex]; - BOOST_ASSERT_MSG(strategy != MemoryStrategy::Undefined, + EdgeStrategy strategy = srcEdgeStrategies[srcConnectionIndex]; + BOOST_ASSERT_MSG(strategy != EdgeStrategy::Undefined, "Undefined memory strategy found while adding copy layers for compatibility"); const Layer& dstLayer = dstInputSlot->GetOwningLayer(); - if (MayNeedCopyLayer(dstLayer) && - strategy == MemoryStrategy::CopyToTarget) + if (MayNeedCompatibilityLayer(dstLayer) && + IsCompatibilityStrategy(strategy)) { // A copy layer is needed in between the source and destination layers. // Record the operation rather than attempting to modify the graph as we go. // (invalidating iterators) - const std::string copyLayerName = boost::str(boost::format("[ %1% (%2%) -> %3% (%4%) ]") + const std::string compLayerName = boost::str(boost::format("[ %1% (%2%) -> %3% (%4%) ]") % srcLayer->GetName() % srcOutputIndex % dstLayer.GetName() % dstInputSlot->GetSlotIndex()); - MemCopyLayer* const copyLayer = InsertNewLayer(*dstInputSlot, copyLayerName.c_str()); - copyLayer->SetBackendId(dstLayer.GetBackendId()); + Layer* compLayer = nullptr; + if (strategy == EdgeStrategy::CopyToTarget) + { + compLayer = InsertNewLayer(*dstInputSlot, compLayerName.c_str()); + } + else + { + BOOST_ASSERT_MSG(strategy == EdgeStrategy::ExportToTarget, "Invalid edge strategy found."); + compLayer = InsertNewLayer(*dstInputSlot, compLayerName.c_str()); + } + + compLayer->SetBackendId(dstLayer.GetBackendId()); - OutputSlot& copyOutputSlot = copyLayer->GetOutputSlot(0); + OutputSlot& compOutputSlot = compLayer->GetOutputSlot(0); auto backendIt = backends.find(dstLayer.GetBackendId()); if (backendIt != backends.end() && backendIt->second && @@ -325,34 +340,40 @@ void Graph::AddCopyLayers(std::map> for (auto preference : tensorHandleFactoryIds) { auto factory = registry.GetFactory(preference); - if (factory && factory->SupportsMapUnmap()) + if (factory) { - copyOutputSlot.SetTensorHandleFactory(preference); - found = true; - break; + auto srcPref = srcOutputSlot.GetTensorHandleFactoryId(); + auto srcFactory = registry.GetFactory(srcPref); + bool canExportImport = (factory->GetImportFlags() & srcFactory->GetExportFlags()) != 0; + if (factory->SupportsMapUnmap() || canExportImport) + { + compOutputSlot.SetTensorHandleFactory(preference); + found = true; + break; + } } } - BOOST_ASSERT_MSG(found, "Could not find a mappable TensorHandle for copy layer"); + BOOST_ASSERT_MSG(found, "Could not find a valid TensorHandle for compatibilty layer"); } else { - copyOutputSlot.SetTensorHandleFactory(ITensorHandleFactory::LegacyFactoryId); + compOutputSlot.SetTensorHandleFactory(ITensorHandleFactory::LegacyFactoryId); } - // The output strategy of a copy layer is always DirectCompatibility. - copyOutputSlot.SetMemoryStrategy(0, MemoryStrategy::DirectCompatibility); + // The output strategy of a compatibility layer is always DirectCompatibility. + compOutputSlot.SetEdgeStrategy(0, EdgeStrategy::DirectCompatibility); // Recalculate the connection index on the previous layer as we have just inserted into it. const std::vector& newSourceConnections = srcOutputSlot.GetConnections(); long newSrcConnectionIndex = std::distance(newSourceConnections.begin(), std::find(newSourceConnections.begin(), newSourceConnections.end(), - ©Layer->GetInputSlot(0))); + &compLayer->GetInputSlot(0))); - // The input strategy of a copy layer is always DirectCompatibilty. - srcOutputSlot.SetMemoryStrategy(boost::numeric_cast(newSrcConnectionIndex), - MemoryStrategy::DirectCompatibility); + // The input strategy of a compatibility layer is always DirectCompatibilty. + srcOutputSlot.SetEdgeStrategy(boost::numeric_cast(newSrcConnectionIndex), + EdgeStrategy::DirectCompatibility); } } } diff --git a/src/armnn/Graph.hpp b/src/armnn/Graph.hpp index f8113375c9..c65f12bbc3 100644 --- a/src/armnn/Graph.hpp +++ b/src/armnn/Graph.hpp @@ -191,8 +191,8 @@ public: /// Modifies the graph in-place, removing edges connecting layers using different compute devices, /// and relinking them via an intermediary copy layers. - void AddCopyLayers(std::map>& backends, - TensorHandleFactoryRegistry& registry); + void AddCompatibilityLayers(std::map>& backends, + TensorHandleFactoryRegistry& registry); /// Substitutes the given sub-graph with either a new layer or a new sub-graph. /// In either case, the given layer or all the layers in the given sub-graph must belong to this graph. diff --git a/src/armnn/InternalTypes.hpp b/src/armnn/InternalTypes.hpp index b0fea7c8c2..7a0f9a1cb0 100644 --- a/src/armnn/InternalTypes.hpp +++ b/src/armnn/InternalTypes.hpp @@ -40,6 +40,7 @@ enum class LayerType Maximum, Mean, MemCopy, + MemImport, Merge, Minimum, Multiplication, diff --git a/src/armnn/Layer.cpp b/src/armnn/Layer.cpp index a287220702..528020bab5 100644 --- a/src/armnn/Layer.cpp +++ b/src/armnn/Layer.cpp @@ -31,7 +31,7 @@ void InputSlot::Insert(Layer& layer) // Connects inserted layer to parent. BOOST_ASSERT(layer.GetNumInputSlots() == 1); int idx = prevSlot->Connect(layer.GetInputSlot(0)); - prevSlot->SetMemoryStrategy(boost::numeric_cast(idx), MemoryStrategy::Undefined); + prevSlot->SetEdgeStrategy(boost::numeric_cast(idx), EdgeStrategy::Undefined); // Sets tensor info for inserted layer. const TensorInfo& tensorInfo = prevSlot->GetTensorInfo(); @@ -40,7 +40,7 @@ void InputSlot::Insert(Layer& layer) // Connects inserted layer to this. layer.GetOutputSlot(0).Connect(*this); - layer.GetOutputSlot(0).SetMemoryStrategy(0, MemoryStrategy::Undefined); + layer.GetOutputSlot(0).SetEdgeStrategy(0, EdgeStrategy::Undefined); } const InputSlot* OutputSlot::GetConnection(unsigned int index) const @@ -80,7 +80,7 @@ int OutputSlot::Connect(InputSlot& destination) { destination.SetConnection(this); m_Connections.push_back(&destination); - m_MemoryStrategies.push_back(MemoryStrategy::Undefined); + m_EdgeStrategies.push_back(EdgeStrategy::Undefined); return boost::numeric_cast(m_Connections.size() - 1); } @@ -97,7 +97,7 @@ void OutputSlot::Disconnect(InputSlot& slot) auto idx = std::distance(m_Connections.begin(), it); m_Connections.erase(std::remove(m_Connections.begin(), m_Connections.end(), &slot), m_Connections.end()); - m_MemoryStrategies.erase(m_MemoryStrategies.begin() + idx); + m_EdgeStrategies.erase(m_EdgeStrategies.begin() + idx); } void OutputSlot::DisconnectAll() @@ -113,7 +113,7 @@ void OutputSlot::MoveAllConnections(OutputSlot& destination) { while (GetNumConnections() > 0) { - BOOST_ASSERT_MSG(m_MemoryStrategies[0] == MemoryStrategy::Undefined, + BOOST_ASSERT_MSG(m_EdgeStrategies[0] == EdgeStrategy::Undefined, "Cannot move connections once memory strategies have be established."); InputSlot& connection = *GetConnection(0); @@ -174,14 +174,14 @@ ITensorHandleFactory::FactoryId OutputSlot::GetTensorHandleFactoryId() const return m_TensorHandleFactoryId; } -void OutputSlot::SetMemoryStrategy(unsigned int connectionIndex, MemoryStrategy strategy) +void OutputSlot::SetEdgeStrategy(unsigned int connectionIndex, EdgeStrategy strategy) { - m_MemoryStrategies[connectionIndex] = strategy; + m_EdgeStrategies[connectionIndex] = strategy; } -MemoryStrategy OutputSlot::GetMemoryStrategyForConnection(unsigned int connectionIdx) const +EdgeStrategy OutputSlot::GetEdgeStrategyForConnection(unsigned int connectionIdx) const { - return m_MemoryStrategies[connectionIdx]; + return m_EdgeStrategies[connectionIdx]; } namespace { diff --git a/src/armnn/Layer.hpp b/src/armnn/Layer.hpp index b90d040475..5944ea83ed 100644 --- a/src/armnn/Layer.hpp +++ b/src/armnn/Layer.hpp @@ -123,7 +123,7 @@ public: void Disconnect(InputSlot& slot); const std::vector& GetConnections() const { return m_Connections; } - const std::vector& GetMemoryStrategies() const { return m_MemoryStrategies; } + const std::vector& GetEdgeStrategies() const { return m_EdgeStrategies; } bool ValidateTensorShape(const TensorShape& shape) const; @@ -160,8 +160,8 @@ public: void SetTensorHandleFactory(const ITensorHandleFactory::FactoryId& id); ITensorHandleFactory::FactoryId GetTensorHandleFactoryId() const; - void SetMemoryStrategy(unsigned int connectionIndex, MemoryStrategy strategy); - MemoryStrategy GetMemoryStrategyForConnection(unsigned int connectionIdx) const; + void SetEdgeStrategy(unsigned int connectionIndex, EdgeStrategy strategy); + EdgeStrategy GetEdgeStrategyForConnection(unsigned int connectionIdx) const; private: void ValidateConnectionIndex(unsigned int index) const; @@ -171,7 +171,7 @@ private: std::vector m_Connections; ITensorHandleFactory::FactoryId m_TensorHandleFactoryId; - std::vector m_MemoryStrategies; + std::vector m_EdgeStrategies; }; // InputSlot inlines that need OutputSlot declaration. diff --git a/src/armnn/LayerSupport.cpp b/src/armnn/LayerSupport.cpp index 047c80a8c4..5a756b9544 100644 --- a/src/armnn/LayerSupport.cpp +++ b/src/armnn/LayerSupport.cpp @@ -371,6 +371,15 @@ bool IsMemCopySupported(const BackendId &backend, FORWARD_LAYER_SUPPORT_FUNC(backend, IsMemCopySupported, input, output); } +bool IsMemImportSupported(const BackendId &backend, + const TensorInfo &input, + const TensorInfo &output, + char *reasonIfUnsupported, + size_t reasonIfUnsupportedMaxLength) +{ + FORWARD_LAYER_SUPPORT_FUNC(backend, IsMemImportSupported, input, output); +} + bool IsMergeSupported(const BackendId& backend, const TensorInfo& input0, const TensorInfo& input1, diff --git a/src/armnn/LayersFwd.hpp b/src/armnn/LayersFwd.hpp index 2c8d5d2e07..cadcd49acb 100644 --- a/src/armnn/LayersFwd.hpp +++ b/src/armnn/LayersFwd.hpp @@ -32,6 +32,7 @@ #include "layers/MaximumLayer.hpp" #include "layers/MeanLayer.hpp" #include "layers/MemCopyLayer.hpp" +#include "layers/MemImportLayer.hpp" #include "layers/MergeLayer.hpp" #include "layers/MinimumLayer.hpp" #include "layers/MultiplicationLayer.hpp" @@ -110,6 +111,7 @@ DECLARE_LAYER(Lstm) DECLARE_LAYER(Maximum) DECLARE_LAYER(Mean) DECLARE_LAYER(MemCopy) +DECLARE_LAYER(MemImport) DECLARE_LAYER(Merge) DECLARE_LAYER(Minimum) DECLARE_LAYER(Multiplication) diff --git a/src/armnn/LoadedNetwork.cpp b/src/armnn/LoadedNetwork.cpp index 7873e48780..a81528aa65 100644 --- a/src/armnn/LoadedNetwork.cpp +++ b/src/armnn/LoadedNetwork.cpp @@ -14,6 +14,8 @@ #include #include #include +#include +#include #include #include @@ -389,8 +391,22 @@ void LoadedNetwork::EnqueueInput(const BindableLayer& layer, ITensorHandle* tens inputQueueDescriptor.m_Outputs.push_back(outputTensorHandle); info.m_OutputTensorInfos.push_back(outputTensorInfo); - const IWorkloadFactory& workloadFactory = GetWorkloadFactory(layer); - auto inputWorkload = workloadFactory.CreateInput(inputQueueDescriptor, info); + MemorySourceFlags importFlags = outputTensorHandle->GetImportFlags(); + if (CheckFlag(importFlags, MemorySource::Malloc)) // Try import the input tensor + { + // This assumes a CPU Tensor handle + void* mem = tensorHandle->Map(false); + if (outputTensorHandle->Import(mem, MemorySource::Malloc)) + { + tensorHandle->Unmap(); + return; // No need for a workload since the import has been done. + } + tensorHandle->Unmap(); + } + + // Create a mem copy workload for input since we could not import + auto inputWorkload = std::make_unique(inputQueueDescriptor, info); + BOOST_ASSERT_MSG(inputWorkload, "No input workload created"); m_InputQueue.push_back(move(inputWorkload)); } @@ -422,11 +438,41 @@ void LoadedNetwork::EnqueueOutput(const BindableLayer& layer, ITensorHandle* ten ITensorHandle* inputTensorHandle = outputHandler.GetData(); BOOST_ASSERT_MSG(inputTensorHandle != nullptr, "Data should have been allocated."); + // Try import the output tensor. + // Note: We can only import the output pointer if all of the following hold true: + // a) The imported pointer is aligned sufficiently + // b) The tensor has zero padding + // c) There is only one connection to the OutputSlot and it is to an OutputLayer. + // d) The output pointer is allocated via malloc. (Other types will be supported in a later release) + if (layer.GetInputSlots()[0].GetConnectedOutputSlot()->GetNumConnections() == 1) + { + MemorySourceFlags importFlags = inputTensorHandle->GetImportFlags(); + if (CheckFlag(importFlags, MemorySource::Malloc)) + { + void* mem = tensorHandle->Map(false); + bool importOk = inputTensorHandle->Import(mem, MemorySource::Malloc); + tensorHandle->Unmap(); + + if (importOk) + { + // Insert synchronization workload + MemSyncQueueDescriptor syncDesc; + syncDesc.m_Inputs.push_back(inputTensorHandle); + info.m_InputTensorInfos.push_back(inputTensorInfo); + auto syncWorkload = std::make_unique(syncDesc, info); + BOOST_ASSERT_MSG(syncWorkload, "No sync workload created"); + m_OutputQueue.push_back(move(syncWorkload)); + + return; //No need to add the output workload below + } + } + } + + // If we got here then we couldn't import the memory, so add an output workload which performs a memcopy. outputQueueDescriptor.m_Inputs.push_back(inputTensorHandle); info.m_InputTensorInfos.push_back(inputTensorInfo); - const IWorkloadFactory& workloadFactory = GetWorkloadFactory(layer); - auto outputWorkload = workloadFactory.CreateOutput(outputQueueDescriptor, info); + auto outputWorkload = std::make_unique(outputQueueDescriptor, info); BOOST_ASSERT_MSG(outputWorkload, "No output workload created"); m_OutputQueue.push_back(move(outputWorkload)); } diff --git a/src/armnn/Network.cpp b/src/armnn/Network.cpp index 2195c71735..b30cd9f3c2 100644 --- a/src/armnn/Network.cpp +++ b/src/armnn/Network.cpp @@ -441,7 +441,7 @@ bool RequiresCopy(ITensorHandleFactory::FactoryId src, ITensorHandleFactory* srcFactory = registry.GetFactory(src); ITensorHandleFactory* dstFactory = registry.GetFactory(dst); - if (srcFactory->SupportsExport() && dstFactory->SupportsImport()) + if ((srcFactory->GetExportFlags() & dstFactory->GetImportFlags()) != 0) { return false; } @@ -493,11 +493,14 @@ ITensorHandleFactory::FactoryId CalculateSlotOptionForInput(BackendsMap& backend auto dstPrefs = toBackend->second.get()->GetHandleFactoryPreferences(); for (auto&& dst : dstPrefs) { - // Input layers use the mem copy workload, so the selected factory must support map/unmap API + // Input layers use the mem copy workload or import, so the selected factory must + // support either the map/unmap API or Import API ITensorHandleFactory* factory = registry.GetFactory(dst); - if (!factory->SupportsMapUnmap()) + if (!factory->SupportsMapUnmap() && + !CheckFlag(factory->GetImportFlags(), MemorySource::Malloc)) // Just support cpu mem imports for now { - // The current tensor handle factory does not support the map/unmap strategy, move to the next one + // The current tensor handle factory does not support the map/unmap or import + // strategy, move to the next one continue; } @@ -648,11 +651,11 @@ ITensorHandleFactory::FactoryId CalculateSlotOption(BackendsMap& backends, return ITensorHandleFactory::LegacyFactoryId; } -MemoryStrategy CalculateStrategy(BackendsMap& backends, - ITensorHandleFactory::FactoryId srcFactoryId, - const Layer& layer, - const Layer& connectedLayer, - TensorHandleFactoryRegistry& registry) +EdgeStrategy CalculateEdgeStrategy(BackendsMap& backends, + ITensorHandleFactory::FactoryId srcFactoryId, + const Layer& layer, + const Layer& connectedLayer, + TensorHandleFactoryRegistry& registry) { auto toBackend = backends.find(connectedLayer.GetBackendId()); BOOST_ASSERT_MSG(toBackend != backends.end(), "Backend id not found for the connected layer"); @@ -664,19 +667,19 @@ MemoryStrategy CalculateStrategy(BackendsMap& backends, { if (layer.GetBackendId() != connectedLayer.GetBackendId()) { - return MemoryStrategy::CopyToTarget; + return EdgeStrategy::CopyToTarget; } else { - return MemoryStrategy::DirectCompatibility; + return EdgeStrategy::DirectCompatibility; } } // TensorHandleFactory API present, so perform more sophisticated strategies. - // Dst Output layers don't require copy because they use map/unmap + // Dst Output layers don't require copy because they use import or map/unmap if (connectedLayer.GetType() == LayerType::Output) { - return MemoryStrategy::DirectCompatibility; + return EdgeStrategy::DirectCompatibility; } // Search for direct match in prefs @@ -684,20 +687,20 @@ MemoryStrategy CalculateStrategy(BackendsMap& backends, { if (pref == srcFactoryId) { - return MemoryStrategy::DirectCompatibility; + return EdgeStrategy::DirectCompatibility; } } // Search for export/import options ITensorHandleFactory* srcFactory = registry.GetFactory(srcFactoryId); - if (srcFactory->SupportsExport()) + if (srcFactory->GetExportFlags() != 0) { for (auto&& pref : dstPrefs) { ITensorHandleFactory* dstFactory = registry.GetFactory(pref); - if (dstFactory->SupportsImport()) + if ((dstFactory->GetImportFlags() & srcFactory->GetExportFlags()) != 0) { - return MemoryStrategy::ExportToTarget; + return EdgeStrategy::ExportToTarget; } } } @@ -710,12 +713,12 @@ MemoryStrategy CalculateStrategy(BackendsMap& backends, ITensorHandleFactory* dstFactory = registry.GetFactory(pref); if (dstFactory->SupportsMapUnmap()) { - return MemoryStrategy::CopyToTarget; + return EdgeStrategy::CopyToTarget; } } } - return MemoryStrategy::Undefined; + return EdgeStrategy::Undefined; } // Select the TensorHandleFactories and the corresponding memory strategy @@ -756,15 +759,15 @@ OptimizationResult SelectTensorHandleStrategy(Graph& optGraph, } outputSlot.SetTensorHandleFactory(slotOption); - // Now determine the "best" memory strategy for each connection given the slotOption. + // Now determine the "best" edge strategy for each connection given the slotOption. unsigned int connectionIdx = 0; for (auto&& connection : outputSlot.GetConnections()) { const Layer& connectedLayer = connection->GetOwningLayer(); - MemoryStrategy strategy = CalculateStrategy(backends, slotOption, *layer, connectedLayer, registry); + EdgeStrategy strategy = CalculateEdgeStrategy(backends, slotOption, *layer, connectedLayer, registry); - if (strategy == MemoryStrategy::Undefined) + if (strategy == EdgeStrategy::Undefined) { result.m_Error = true; if (errMessages) @@ -775,7 +778,7 @@ OptimizationResult SelectTensorHandleStrategy(Graph& optGraph, return; } - outputSlot.SetMemoryStrategy(connectionIdx, strategy); + outputSlot.SetEdgeStrategy(connectionIdx, strategy); connectionIdx++; } @@ -887,7 +890,7 @@ IOptimizedNetworkPtr Optimize(const INetwork& inNetwork, } // Based on the tensor handle strategy determined above, insert copy layers where required. - optGraph.AddCopyLayers(backends, tensorHandleFactoryRegistry); + optGraph.AddCompatibilityLayers(backends, tensorHandleFactoryRegistry); // Convert constants Optimizer::Pass(optGraph, MakeOptimizations(ConvertConstantsFloatToHalf())); diff --git a/src/armnn/layers/MemImportLayer.cpp b/src/armnn/layers/MemImportLayer.cpp new file mode 100644 index 0000000000..7a922f5a7c --- /dev/null +++ b/src/armnn/layers/MemImportLayer.cpp @@ -0,0 +1,54 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// +#include "MemImportLayer.hpp" + +#include "LayerCloneBase.hpp" + +#include +#include +#include +#include + +namespace armnn +{ + +MemImportLayer::MemImportLayer(const char* name) + : Layer(1, 1, LayerType::MemImport, name) +{ +} + +MemImportLayer* MemImportLayer::Clone(Graph& graph) const +{ + return CloneBase(graph, GetName()); +} + +std::unique_ptr MemImportLayer::CreateWorkload(const Graph& graph, const IWorkloadFactory& factory) const +{ + MemImportQueueDescriptor descriptor; + + //This is different from other workloads. Does not get created by the workload factory. + return std::make_unique(descriptor, PrepInfoAndDesc(descriptor, graph)); +} + +void MemImportLayer::ValidateTensorShapesFromInputs() +{ + VerifyLayerConnections(1, CHECK_LOCATION()); + + auto inferredShapes = InferOutputShapes({ GetInputSlot(0).GetConnection()->GetTensorInfo().GetShape() }); + + BOOST_ASSERT(inferredShapes.size() == 1); + + ConditionalThrowIfNotEqual( + "MemImportLayer: TensorShape set on OutputSlot[0] does not match the inferred shape.", + GetOutputSlot(0).GetTensorInfo().GetShape(), + inferredShapes[0]); +} + +void MemImportLayer::Accept(ILayerVisitor& visitor) const +{ + throw armnn::Exception("MemImportLayer should not appear in an input graph"); +} + +} // namespace armnn diff --git a/src/armnn/layers/MemImportLayer.hpp b/src/armnn/layers/MemImportLayer.hpp new file mode 100644 index 0000000000..2d02c1fb41 --- /dev/null +++ b/src/armnn/layers/MemImportLayer.hpp @@ -0,0 +1,42 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// +#pragma once + +#include + +namespace armnn +{ + +/// This layer represents a memory import operation. +class MemImportLayer : public Layer +{ +public: + /// Makes a workload for the MemImport type. + /// @param [in] graph The graph where this layer can be found. + /// @param [in] factory The workload factory which will create the workload. + /// @return A pointer to the created workload, or nullptr if not created. + virtual std::unique_ptrCreateWorkload(const Graph& graph, + const IWorkloadFactory& factory) const override; + + /// Creates a dynamically-allocated copy of this layer. + /// @param [in] graph The graph into which this layer is being cloned. + MemImportLayer* Clone(Graph& graph) const override; + + /// Check if the input tensor shape(s) + /// will lead to a valid configuration of @ref MemImportLayer. + void ValidateTensorShapesFromInputs() override; + + void Accept(ILayerVisitor& visitor) const override; + +protected: + /// Constructor to create a MemImportLayer. + /// @param [in] name Optional name for the layer. + MemImportLayer(const char* name); + + /// Default destructor + ~MemImportLayer() = default; +}; + +} // namespace diff --git a/src/armnn/test/GraphTests.cpp b/src/armnn/test/GraphTests.cpp index 7950ec49f4..7bd6aac98b 100644 --- a/src/armnn/test/GraphTests.cpp +++ b/src/armnn/test/GraphTests.cpp @@ -495,13 +495,13 @@ struct CopyLayersFixture // Set the memory strategies - for this test should be DirectCompatibility for same backends, // and CopyToTarget for different backends - inputLayer->GetOutputSlot(0).SetMemoryStrategy(0, MemoryStrategy::DirectCompatibility); - convLayer1->GetOutputSlot(0).SetMemoryStrategy(0, MemoryStrategy::CopyToTarget); - convLayer1->GetOutputSlot(0).SetMemoryStrategy(1, MemoryStrategy::DirectCompatibility); - convLayer2->GetOutputSlot(0).SetMemoryStrategy(0, MemoryStrategy::CopyToTarget); - concatLayer->GetOutputSlot(0).SetMemoryStrategy(0, MemoryStrategy::DirectCompatibility); - actLayer->GetOutputSlot(0).SetMemoryStrategy(0, MemoryStrategy::DirectCompatibility); - softmaxLayer->GetOutputSlot(0).SetMemoryStrategy(0, MemoryStrategy::CopyToTarget); + inputLayer->GetOutputSlot(0).SetEdgeStrategy(0, EdgeStrategy::DirectCompatibility); + convLayer1->GetOutputSlot(0).SetEdgeStrategy(0, EdgeStrategy::CopyToTarget); + convLayer1->GetOutputSlot(0).SetEdgeStrategy(1, EdgeStrategy::DirectCompatibility); + convLayer2->GetOutputSlot(0).SetEdgeStrategy(0, EdgeStrategy::CopyToTarget); + concatLayer->GetOutputSlot(0).SetEdgeStrategy(0, EdgeStrategy::DirectCompatibility); + actLayer->GetOutputSlot(0).SetEdgeStrategy(0, EdgeStrategy::DirectCompatibility); + softmaxLayer->GetOutputSlot(0).SetEdgeStrategy(0, EdgeStrategy::CopyToTarget); } armnn::TensorInfo m_TensorDesc; @@ -529,7 +529,7 @@ BOOST_FIXTURE_TEST_CASE(AddCopyLayers, CopyLayersFixture) { InitialiseTestGraph(); const armnn::Graph origGraph(m_Graph); - m_Graph.AddCopyLayers(m_Backends, m_FactoryRegistry); + m_Graph.AddCompatibilityLayers(m_Backends, m_FactoryRegistry); TestGraphAfterAddingCopyLayers(m_Graph, origGraph); } @@ -537,13 +537,13 @@ BOOST_FIXTURE_TEST_CASE(AddCopyLayers, CopyLayersFixture) BOOST_FIXTURE_TEST_CASE(AddCopyLayersSeveralTimes, CopyLayersFixture) { InitialiseTestGraph(); - m_Graph.AddCopyLayers(m_Backends, m_FactoryRegistry); + m_Graph.AddCompatibilityLayers(m_Backends, m_FactoryRegistry); - // Calling AddCopyLayers() several times should not change the connections. + // Calling AddCompatibilityLayers() several times should not change the connections. const std::vector edges = GetEdgeList(m_Graph); for (int i = 0; i < 4; ++i) { - m_Graph.AddCopyLayers(m_Backends, m_FactoryRegistry); + m_Graph.AddCompatibilityLayers(m_Backends, m_FactoryRegistry); const std::vector otherEdges = GetEdgeList(m_Graph); BOOST_TEST((edges == otherEdges)); } @@ -571,18 +571,18 @@ BOOST_FIXTURE_TEST_CASE(CopyLayersAddedBetweenSameLayersHaveDifferentNames, Copy splitterLayer->GetOutputSlot(1).Connect(additionLayer->GetInputSlot(1)); additionLayer->GetOutputSlot(0).Connect(outputLayer->GetInputSlot(0)); - inputLayer->GetOutputSlot(0).SetMemoryStrategy(0, armnn::MemoryStrategy::DirectCompatibility); - splitterLayer->GetOutputSlot(0).SetMemoryStrategy(0, armnn::MemoryStrategy::CopyToTarget); - splitterLayer->GetOutputSlot(1).SetMemoryStrategy(0, armnn::MemoryStrategy::CopyToTarget); - additionLayer->GetOutputSlot(0).SetMemoryStrategy(0, armnn::MemoryStrategy::DirectCompatibility); + inputLayer->GetOutputSlot(0).SetEdgeStrategy(0, armnn::EdgeStrategy::DirectCompatibility); + splitterLayer->GetOutputSlot(0).SetEdgeStrategy(0, armnn::EdgeStrategy::CopyToTarget); + splitterLayer->GetOutputSlot(1).SetEdgeStrategy(0, armnn::EdgeStrategy::CopyToTarget); + additionLayer->GetOutputSlot(0).SetEdgeStrategy(0, armnn::EdgeStrategy::DirectCompatibility); - graph.AddCopyLayers(m_Backends, m_FactoryRegistry); + graph.AddCompatibilityLayers(m_Backends, m_FactoryRegistry); std::vector edges = GetEdgeList(graph); BOOST_CHECK(edges.size() == 6u); std::sort(edges.begin(), edges.end()); auto last = std::unique(edges.begin(), edges.end()); - BOOST_CHECK_MESSAGE(last == edges.end(), "Found duplicated edges after AddCopyLayers()"); + BOOST_CHECK_MESSAGE(last == edges.end(), "Found duplicated edges after AddCompatibilityLayers()"); } BOOST_AUTO_TEST_CASE(DuplicateLayerNames) diff --git a/src/armnn/test/TensorHandleStrategyTest.cpp b/src/armnn/test/TensorHandleStrategyTest.cpp index 3bb1c68169..c391b04d97 100644 --- a/src/armnn/test/TensorHandleStrategyTest.cpp +++ b/src/armnn/test/TensorHandleStrategyTest.cpp @@ -50,9 +50,11 @@ public: return nullptr; } - virtual const FactoryId GetId() const override { return m_Id; } + const FactoryId GetId() const override { return m_Id; } - virtual bool SupportsSubTensors() const override { return true; } + bool SupportsSubTensors() const override { return true; } + + MemorySourceFlags GetExportFlags() const override { return 1; } private: FactoryId m_Id = "UninitializedId"; @@ -60,6 +62,38 @@ private: std::weak_ptr m_MemMgr; }; +class TestFactoryImport : public ITensorHandleFactory +{ +public: + TestFactoryImport(std::weak_ptr mgr, ITensorHandleFactory::FactoryId id) + : m_Id(id) + , m_MemMgr(mgr) + {} + + std::unique_ptr CreateSubTensorHandle(ITensorHandle& parent, + TensorShape const& subTensorShape, + unsigned int const* subTensorOrigin) const override + { + return nullptr; + } + + std::unique_ptr CreateTensorHandle(const TensorInfo& tensorInfo) const override + { + return nullptr; + } + + const FactoryId GetId() const override { return m_Id; } + + bool SupportsSubTensors() const override { return true; } + + MemorySourceFlags GetImportFlags() const override { return 1; } + +private: + FactoryId m_Id = "ImporterId"; + + std::weak_ptr m_MemMgr; +}; + class TestBackendA : public IBackendInternal { public: @@ -173,6 +207,42 @@ private: BackendId m_Id = "BackendC"; }; +class TestBackendD : public IBackendInternal +{ +public: + TestBackendD() = default; + + const BackendId& GetId() const override { return m_Id; } + + IWorkloadFactoryPtr CreateWorkloadFactory(const IMemoryManagerSharedPtr& memoryManager = nullptr) const override + { + return IWorkloadFactoryPtr{}; + } + + IBackendInternal::ILayerSupportSharedPtr GetLayerSupport() const override + { + return ILayerSupportSharedPtr{}; + } + + std::vector GetHandleFactoryPreferences() const override + { + return std::vector{ + "TestHandleFactoryD1" + }; + } + + void RegisterTensorHandleFactories(TensorHandleFactoryRegistry& registry) override + { + auto mgr = std::make_shared(); + + registry.RegisterMemoryManager(mgr); + registry.RegisterFactory(std::make_unique(mgr, "TestHandleFactoryD1")); + } + +private: + BackendId m_Id = "BackendD"; +}; + BOOST_AUTO_TEST_SUITE(TensorHandle) @@ -200,16 +270,19 @@ BOOST_AUTO_TEST_CASE(TensorHandleSelectionStrategy) auto backendA = std::make_unique(); auto backendB = std::make_unique(); auto backendC = std::make_unique(); + auto backendD = std::make_unique(); TensorHandleFactoryRegistry registry; backendA->RegisterTensorHandleFactories(registry); backendB->RegisterTensorHandleFactories(registry); backendC->RegisterTensorHandleFactories(registry); + backendD->RegisterTensorHandleFactories(registry); BackendsMap backends; backends["BackendA"] = std::move(backendA); backends["BackendB"] = std::move(backendB); backends["BackendC"] = std::move(backendC); + backends["BackendD"] = std::move(backendD); armnn::Graph graph; @@ -226,13 +299,17 @@ BOOST_AUTO_TEST_CASE(TensorHandleSelectionStrategy) armnn::SoftmaxLayer* const softmaxLayer3 = graph.AddLayer(smDesc, "softmax3"); softmaxLayer3->SetBackendId("BackendC"); + armnn::SoftmaxLayer* const softmaxLayer4 = graph.AddLayer(smDesc, "softmax4"); + softmaxLayer4->SetBackendId("BackendD"); + armnn::OutputLayer* const outputLayer = graph.AddLayer(0, "output"); outputLayer->SetBackendId("BackendA"); inputLayer->GetOutputSlot(0).Connect(softmaxLayer1->GetInputSlot(0)); softmaxLayer1->GetOutputSlot(0).Connect(softmaxLayer2->GetInputSlot(0)); softmaxLayer2->GetOutputSlot(0).Connect(softmaxLayer3->GetInputSlot(0)); - softmaxLayer3->GetOutputSlot(0).Connect(outputLayer->GetInputSlot(0)); + softmaxLayer3->GetOutputSlot(0).Connect(softmaxLayer4->GetInputSlot(0)); + softmaxLayer4->GetOutputSlot(0).Connect(outputLayer->GetInputSlot(0)); graph.TopologicalSort(); @@ -246,29 +323,45 @@ BOOST_AUTO_TEST_CASE(TensorHandleSelectionStrategy) OutputSlot& softmaxLayer1Out = softmaxLayer1->GetOutputSlot(0); OutputSlot& softmaxLayer2Out = softmaxLayer2->GetOutputSlot(0); OutputSlot& softmaxLayer3Out = softmaxLayer3->GetOutputSlot(0); + OutputSlot& softmaxLayer4Out = softmaxLayer4->GetOutputSlot(0); // Check that the correct factory was selected BOOST_TEST(inputLayerOut.GetTensorHandleFactoryId() == "TestHandleFactoryA1"); BOOST_TEST(softmaxLayer1Out.GetTensorHandleFactoryId() == "TestHandleFactoryB1"); BOOST_TEST(softmaxLayer2Out.GetTensorHandleFactoryId() == "TestHandleFactoryB1"); BOOST_TEST(softmaxLayer3Out.GetTensorHandleFactoryId() == "TestHandleFactoryC1"); + BOOST_TEST(softmaxLayer4Out.GetTensorHandleFactoryId() == "TestHandleFactoryD1"); // Check that the correct strategy was selected - BOOST_TEST((inputLayerOut.GetMemoryStrategyForConnection(0) == MemoryStrategy::DirectCompatibility)); - BOOST_TEST((softmaxLayer1Out.GetMemoryStrategyForConnection(0) == MemoryStrategy::DirectCompatibility)); - BOOST_TEST((softmaxLayer2Out.GetMemoryStrategyForConnection(0) == MemoryStrategy::CopyToTarget)); - BOOST_TEST((softmaxLayer3Out.GetMemoryStrategyForConnection(0) == MemoryStrategy::DirectCompatibility)); - - graph.AddCopyLayers(backends, registry); - int count= 0; - graph.ForEachLayer([&count](Layer* layer) + BOOST_TEST((inputLayerOut.GetEdgeStrategyForConnection(0) == EdgeStrategy::DirectCompatibility)); + BOOST_TEST((softmaxLayer1Out.GetEdgeStrategyForConnection(0) == EdgeStrategy::DirectCompatibility)); + BOOST_TEST((softmaxLayer2Out.GetEdgeStrategyForConnection(0) == EdgeStrategy::CopyToTarget)); + BOOST_TEST((softmaxLayer3Out.GetEdgeStrategyForConnection(0) == EdgeStrategy::ExportToTarget)); + BOOST_TEST((softmaxLayer4Out.GetEdgeStrategyForConnection(0) == EdgeStrategy::DirectCompatibility)); + + graph.AddCompatibilityLayers(backends, registry); + + // Test for copy layers + int copyCount= 0; + graph.ForEachLayer([©Count](Layer* layer) { if (layer->GetType() == LayerType::MemCopy) { - count++; + copyCount++; + } + }); + BOOST_TEST(copyCount == 1); + + // Test for import layers + int importCount= 0; + graph.ForEachLayer([&importCount](Layer *layer) + { + if (layer->GetType() == LayerType::MemImport) + { + importCount++; } }); - BOOST_TEST(count == 1); + BOOST_TEST(importCount == 1); } BOOST_AUTO_TEST_SUITE_END() diff --git a/src/backends/backendsCommon/CMakeLists.txt b/src/backends/backendsCommon/CMakeLists.txt index 653f3727ee..44131ea1b5 100644 --- a/src/backends/backendsCommon/CMakeLists.txt +++ b/src/backends/backendsCommon/CMakeLists.txt @@ -20,11 +20,16 @@ list(APPEND armnnBackendsCommon_sources ITensorHandleFactory.hpp LayerSupportBase.cpp LayerSupportBase.hpp + LayerSupportRules.hpp IMemoryManager.hpp ITensorHandle.hpp MakeWorkloadHelper.hpp MemCopyWorkload.cpp MemCopyWorkload.hpp + MemImportWorkload.cpp + MemImportWorkload.hpp + MemSyncWorkload.cpp + MemSyncWorkload.hpp OptimizationViews.cpp OptimizationViews.hpp OutputHandler.cpp diff --git a/src/backends/backendsCommon/ITensorHandle.hpp b/src/backends/backendsCommon/ITensorHandle.hpp index 176b021d76..e1b80b874a 100644 --- a/src/backends/backendsCommon/ITensorHandle.hpp +++ b/src/backends/backendsCommon/ITensorHandle.hpp @@ -4,6 +4,8 @@ // #pragma once +#include + namespace armnn { @@ -61,6 +63,15 @@ public: // Testing support to be able to verify and set tensor data content virtual void CopyOutTo(void* memory) const = 0; virtual void CopyInFrom(const void* memory) = 0; + + /// Get flags describing supported import sources. + virtual unsigned int GetImportFlags() const { return 0; } + + /// Import externally allocated memory + /// \param memory base address of the memory being imported. + /// \param source source of the allocation for the memory being imported. + /// \return true on success or false on failure + virtual bool Import(void* memory, MemorySource source) { return false; }; }; } diff --git a/src/backends/backendsCommon/ITensorHandleFactory.hpp b/src/backends/backendsCommon/ITensorHandleFactory.hpp index 7685061eb3..89a2a7fa3b 100644 --- a/src/backends/backendsCommon/ITensorHandleFactory.hpp +++ b/src/backends/backendsCommon/ITensorHandleFactory.hpp @@ -5,8 +5,9 @@ #pragma once -#include #include +#include +#include namespace armnn { @@ -20,7 +21,6 @@ public: virtual ~ITensorHandleFactory() {} - virtual std::unique_ptr CreateSubTensorHandle(ITensorHandle& parent, TensorShape const& subTensorShape, unsigned int const* subTensorOrigin) const = 0; @@ -33,17 +33,16 @@ public: virtual bool SupportsMapUnmap() const final { return true; } - virtual bool SupportsExport() const final { return false; } - - virtual bool SupportsImport() const final { return false; } + virtual MemorySourceFlags GetExportFlags() const { return 0; } + virtual MemorySourceFlags GetImportFlags() const { return 0; } }; -enum class MemoryStrategy +enum class EdgeStrategy { - Undefined, - DirectCompatibility, // Only allocate the tensorhandle using the assigned factory - CopyToTarget, // Default + Insert MemCopy node before target - ExportToTarget, // Default + Insert Import node + Undefined, /// No strategy has been defined. Used internally to verify integrity of optimizations. + DirectCompatibility, /// Destination backend can work directly with tensors on source backend. + ExportToTarget, /// Source backends tensor data can be exported to destination backend tensor without copy. + CopyToTarget /// Copy contents from source backend tensor to destination backend tensor. }; } //namespace armnn diff --git a/src/backends/backendsCommon/LayerSupportBase.cpp b/src/backends/backendsCommon/LayerSupportBase.cpp index f202fedb4f..ee8dc5f7e9 100644 --- a/src/backends/backendsCommon/LayerSupportBase.cpp +++ b/src/backends/backendsCommon/LayerSupportBase.cpp @@ -7,6 +7,8 @@ #include +#include + namespace { @@ -252,7 +254,18 @@ bool LayerSupportBase::IsMemCopySupported(const armnn::TensorInfo& input, const armnn::TensorInfo& output, armnn::Optional reasonIfUnsupported) const { - return DefaultLayerSupport(__func__, __FILE__, __LINE__, reasonIfUnsupported); + boost::ignore_unused(input); + boost::ignore_unused(output); + return true; +} + +bool LayerSupportBase::IsMemImportSupported(const armnn::TensorInfo& input, + const armnn::TensorInfo& output, + armnn::Optional reasonIfUnsupported) const +{ + boost::ignore_unused(input); + boost::ignore_unused(output); + return true; } bool LayerSupportBase::IsMergeSupported(const TensorInfo& input0, diff --git a/src/backends/backendsCommon/LayerSupportBase.hpp b/src/backends/backendsCommon/LayerSupportBase.hpp index c860e34874..0d5a2af16e 100644 --- a/src/backends/backendsCommon/LayerSupportBase.hpp +++ b/src/backends/backendsCommon/LayerSupportBase.hpp @@ -157,6 +157,10 @@ public: const TensorInfo& output, Optional reasonIfUnsupported = EmptyOptional()) const override; + bool IsMemImportSupported(const TensorInfo& input, + const TensorInfo& output, + Optional reasonIfUnsupported = EmptyOptional()) const override; + bool IsMergeSupported(const TensorInfo& input0, const TensorInfo& input1, const TensorInfo& output, diff --git a/src/backends/backendsCommon/LayerSupportRules.hpp b/src/backends/backendsCommon/LayerSupportRules.hpp new file mode 100644 index 0000000000..db3f38ccbb --- /dev/null +++ b/src/backends/backendsCommon/LayerSupportRules.hpp @@ -0,0 +1,185 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include +#include + +namespace armnn +{ + +namespace +{ + +inline armnn::Optional GetBiasTypeFromWeightsType(armnn::Optional weightsType) +{ + if (!weightsType) + { + return weightsType; + } + + switch(weightsType.value()) + { + case armnn::DataType::Float16: + case armnn::DataType::Float32: + return weightsType; + case armnn::DataType::QuantisedAsymm8: + return armnn::DataType::Signed32; + case armnn::DataType::QuantisedSymm16: + return armnn::DataType::Signed32; + default: + BOOST_ASSERT_MSG(false, "GetBiasTypeFromWeightsType(): Unsupported data type."); + } + return armnn::EmptyOptional(); +} + +} //namespace + +template +bool CheckSupportRule(F rule, Optional reasonIfUnsupported, const char* reason) +{ + bool supported = rule(); + if (!supported && reason) + { + reasonIfUnsupported.value() += std::string(reason) + "\n"; // Append the reason on a new line + } + return supported; +} + +struct Rule +{ + bool operator()() const + { + return m_Res; + } + + bool m_Res = true; +}; + +template +bool AllTypesAreEqualImpl(T t) +{ + return true; +} + +template +bool AllTypesAreEqualImpl(T t1, T t2, Rest... rest) +{ + static_assert(std::is_same::value, "Type T must be a TensorInfo"); + + return (t1.GetDataType() == t2.GetDataType()) && AllTypesAreEqualImpl(t2, rest...); +} + +struct TypesAreEqual : public Rule +{ + template + TypesAreEqual(const Ts&... ts) + { + m_Res = AllTypesAreEqualImpl(ts...); + } +}; + +struct QuantizationParametersAreEqual : public Rule +{ + QuantizationParametersAreEqual(const TensorInfo& info0, const TensorInfo& info1) + { + m_Res = info0.GetQuantizationScale() == info1.GetQuantizationScale() && + info0.GetQuantizationOffset() == info1.GetQuantizationOffset(); + } +}; + +struct TypeAnyOf : public Rule +{ + template + TypeAnyOf(const TensorInfo& info, const Container& c) + { + m_Res = std::any_of(c.begin(), c.end(), [&info](DataType dt) + { + return dt == info.GetDataType(); + }); + } +}; + +struct TypeIs : public Rule +{ + TypeIs(const TensorInfo& info, DataType dt) + { + m_Res = dt == info.GetDataType(); + } +}; + +struct BiasAndWeightsTypesMatch : public Rule +{ + BiasAndWeightsTypesMatch(const TensorInfo& biases, const TensorInfo& weights) + { + m_Res = biases.GetDataType() == GetBiasTypeFromWeightsType(weights.GetDataType()).value(); + } +}; + +struct BiasAndWeightsTypesCompatible : public Rule +{ + template + BiasAndWeightsTypesCompatible(const TensorInfo& info, const Container& c) + { + m_Res = std::any_of(c.begin(), c.end(), [&info](DataType dt) + { + return dt == GetBiasTypeFromWeightsType(info.GetDataType()).value(); + }); + } +}; + +struct ShapesAreSameRank : public Rule +{ + ShapesAreSameRank(const TensorInfo& info0, const TensorInfo& info1) + { + m_Res = info0.GetShape().GetNumDimensions() == info1.GetShape().GetNumDimensions(); + } +}; + +struct ShapesAreSameTotalSize : public Rule +{ + ShapesAreSameTotalSize(const TensorInfo& info0, const TensorInfo& info1) + { + m_Res = info0.GetNumElements() == info1.GetNumElements(); + } +}; + +struct ShapesAreBroadcastCompatible : public Rule +{ + unsigned int CalcInputSize(const TensorShape& in, const TensorShape& out, unsigned int idx) + { + unsigned int offset = out.GetNumDimensions() - in.GetNumDimensions(); + unsigned int sizeIn = (idx < offset) ? 1 : in[idx-offset]; + return sizeIn; + } + + ShapesAreBroadcastCompatible(const TensorInfo& in0, const TensorInfo& in1, const TensorInfo& out) + { + const TensorShape& shape0 = in0.GetShape(); + const TensorShape& shape1 = in1.GetShape(); + const TensorShape& outShape = out.GetShape(); + + for (unsigned int i=0; i < outShape.GetNumDimensions() && m_Res; i++) + { + unsigned int sizeOut = outShape[i]; + unsigned int sizeIn0 = CalcInputSize(shape0, outShape, i); + unsigned int sizeIn1 = CalcInputSize(shape1, outShape, i); + + m_Res &= ((sizeIn0 == sizeOut) || (sizeIn0 == 1)) && + ((sizeIn1 == sizeOut) || (sizeIn1 == 1)); + } + } +}; + +struct TensorNumDimensionsAreCorrect : public Rule +{ + TensorNumDimensionsAreCorrect(const TensorInfo& info, unsigned int expectedNumDimensions) + { + m_Res = info.GetNumDimensions() == expectedNumDimensions; + } +}; + +} //namespace armnn \ No newline at end of file diff --git a/src/backends/backendsCommon/MemImportWorkload.cpp b/src/backends/backendsCommon/MemImportWorkload.cpp new file mode 100644 index 0000000000..ed00241bb6 --- /dev/null +++ b/src/backends/backendsCommon/MemImportWorkload.cpp @@ -0,0 +1,34 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "MemImportWorkload.hpp" + +#include "CpuTensorHandle.hpp" + +#include + +#include + +#include + +namespace armnn +{ + +ImportMemGenericWorkload::ImportMemGenericWorkload(const MemImportQueueDescriptor& descriptor, + const WorkloadInfo& info) + : BaseWorkload(descriptor, info) +{ + m_TensorHandlePairs = std::make_pair(descriptor.m_Inputs[0], descriptor.m_Outputs[0]); +} + +void ImportMemGenericWorkload::Execute() const +{ + ARMNN_SCOPED_PROFILING_EVENT(Compute::Undefined, "ImportMemGeneric_Execute"); + + m_TensorHandlePairs.second->Import(const_cast(m_TensorHandlePairs.first->Map(true)), MemorySource::Malloc); + m_TensorHandlePairs.first->Unmap(); +} + +} //namespace armnn diff --git a/src/backends/backendsCommon/MemImportWorkload.hpp b/src/backends/backendsCommon/MemImportWorkload.hpp new file mode 100644 index 0000000000..e16b99e9e0 --- /dev/null +++ b/src/backends/backendsCommon/MemImportWorkload.hpp @@ -0,0 +1,27 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// +#pragma once + +#include "CpuTensorHandleFwd.hpp" +#include "Workload.hpp" +#include "WorkloadUtils.hpp" + +#include + +namespace armnn +{ + +class ImportMemGenericWorkload : public BaseWorkload +{ +public: + ImportMemGenericWorkload(const MemImportQueueDescriptor& descriptor, const WorkloadInfo& info); + void Execute() const override; + +private: + using TensorHandlePair = std::pair; + TensorHandlePair m_TensorHandlePairs; +}; + +} //namespace armnn diff --git a/src/backends/backendsCommon/MemSyncWorkload.cpp b/src/backends/backendsCommon/MemSyncWorkload.cpp new file mode 100644 index 0000000000..a1d309cefb --- /dev/null +++ b/src/backends/backendsCommon/MemSyncWorkload.cpp @@ -0,0 +1,33 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "MemSyncWorkload.hpp" + +#include "CpuTensorHandle.hpp" + +#include + +#include + +#include + +namespace armnn +{ + +SyncMemGenericWorkload::SyncMemGenericWorkload(const MemSyncQueueDescriptor& descriptor, + const WorkloadInfo& info) + : BaseWorkload(descriptor, info) +{ + m_TensorHandle = descriptor.m_Inputs[0]; +} + +void SyncMemGenericWorkload::Execute() const +{ + ARMNN_SCOPED_PROFILING_EVENT(Compute::Undefined, "SyncMemGeneric_Execute"); + m_TensorHandle->Map(true); + m_TensorHandle->Unmap(); +} + +} //namespace armnn diff --git a/src/backends/backendsCommon/MemSyncWorkload.hpp b/src/backends/backendsCommon/MemSyncWorkload.hpp new file mode 100644 index 0000000000..3a167d2a00 --- /dev/null +++ b/src/backends/backendsCommon/MemSyncWorkload.hpp @@ -0,0 +1,26 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// +#pragma once + +#include "CpuTensorHandleFwd.hpp" +#include "Workload.hpp" +#include "WorkloadUtils.hpp" + +#include + +namespace armnn +{ + +class SyncMemGenericWorkload : public BaseWorkload +{ +public: + SyncMemGenericWorkload(const MemSyncQueueDescriptor& descriptor, const WorkloadInfo& info); + void Execute() const override; + +private: + ITensorHandle* m_TensorHandle; +}; + +} //namespace armnn diff --git a/src/backends/backendsCommon/WorkloadData.cpp b/src/backends/backendsCommon/WorkloadData.cpp index a4d35827fa..1c607da707 100644 --- a/src/backends/backendsCommon/WorkloadData.cpp +++ b/src/backends/backendsCommon/WorkloadData.cpp @@ -351,6 +351,109 @@ void MemCopyQueueDescriptor::Validate(const WorkloadInfo& workloadInfo) const } } +//--------------------------------------------------------------- +void MemImportQueueDescriptor::Validate(const WorkloadInfo& workloadInfo) const +{ + ValidateNumInputs(workloadInfo, "MemImportQueueDescriptor", 1); + ValidateNumOutputs(workloadInfo, "MemImportQueueDescriptor" , 1); + + if (workloadInfo.m_InputTensorInfos.size() != 1) + { + throw InvalidArgumentException(boost::str( + boost::format("Number of input infos (%1%) is not 1.") + % workloadInfo.m_InputTensorInfos.size())); + + } + + if (workloadInfo.m_InputTensorInfos.size() != workloadInfo.m_OutputTensorInfos.size()) + { + throw InvalidArgumentException(boost::str( + boost::format("Number of input infos (%1%) does not match the number of output infos (%2%)") + % workloadInfo.m_InputTensorInfos.size() % workloadInfo.m_OutputTensorInfos.size())); + } + + for (std::size_t i = 0; i < workloadInfo.m_InputTensorInfos.size(); ++i) + { + if (workloadInfo.m_InputTensorInfos[i].GetNumElements() != + workloadInfo.m_OutputTensorInfos[i].GetNumElements()) + { + throw InvalidArgumentException(boost::str( + boost::format("Number of elements for tensor input and output %1% does not match") + % i )); + } + } + + if (m_Inputs.size() != 1) + { + throw InvalidArgumentException(boost::str( + boost::format("Number of inputs (%1%) is not 1.") + % m_Inputs.size())); + } + + if (m_Inputs.size() != m_Outputs.size()) + { + throw InvalidArgumentException(boost::str( + boost::format("Number of inputs (%1%) does not match the number of outputs (%2%)") + % m_Inputs.size() % m_Outputs.size())); + } + + for (unsigned int i = 0; i < m_Inputs.size(); ++i) + { + if (!m_Inputs[i]) + { + throw InvalidArgumentException(boost::str(boost::format("Invalid null input %1%") % i)); + } + + if (!m_Outputs[i]) + { + throw InvalidArgumentException(boost::str(boost::format("Invalid null output %1%") % i)); + } + } +} + +//--------------------------------------------------------------- +void MemSyncQueueDescriptor::Validate(const WorkloadInfo& workloadInfo) const +{ + ValidateNumInputs(workloadInfo, "MemSyncQueueDescriptor", 1); + ValidateNumOutputs(workloadInfo, "MemSyncQueueDescriptor" , 1); + + if (workloadInfo.m_InputTensorInfos.size() != 1) + { + throw InvalidArgumentException(boost::str( + boost::format("Number of input infos (%1%) is not 1.") + % workloadInfo.m_InputTensorInfos.size())); + + } + + if (workloadInfo.m_OutputTensorInfos.size() != 0) + { + throw InvalidArgumentException(boost::str( + boost::format("Number of output infos (%1%) is not 0.") + % workloadInfo.m_InputTensorInfos.size())); + + } + + if (m_Inputs.size() != 1) + { + throw InvalidArgumentException(boost::str( + boost::format("Number of inputs (%1%) is not 1.") + % m_Inputs.size())); + } + + if (m_Outputs.size() != 0) + { + throw InvalidArgumentException(boost::str( + boost::format("Number of outputs (%1%) is not 0.") + % m_Inputs.size() % m_Outputs.size())); + } + + if (!m_Inputs[0]) + { + throw InvalidArgumentException(boost::str(boost::format("Invalid null input 0"))); + } +} + +//--------------------------------------------------------------- void ActivationQueueDescriptor::Validate(const WorkloadInfo& workloadInfo) const { const std::string descriptorName{"ActivationQueueDescriptor"}; diff --git a/src/backends/backendsCommon/WorkloadData.hpp b/src/backends/backendsCommon/WorkloadData.hpp index d790dafd58..c055beb88d 100644 --- a/src/backends/backendsCommon/WorkloadData.hpp +++ b/src/backends/backendsCommon/WorkloadData.hpp @@ -63,6 +63,16 @@ struct MemCopyQueueDescriptor : QueueDescriptor using InputQueueDescriptor = MemCopyQueueDescriptor; using OutputQueueDescriptor = MemCopyQueueDescriptor; +struct MemImportQueueDescriptor : QueueDescriptor +{ + void Validate(const WorkloadInfo& workloadInfo) const; +}; + +struct MemSyncQueueDescriptor : QueueDescriptor +{ + void Validate(const WorkloadInfo& workloadInfo) const; +}; + // Softmax layer workload data. struct SoftmaxQueueDescriptor : QueueDescriptorWithParameters { diff --git a/src/backends/backendsCommon/WorkloadFactory.cpp b/src/backends/backendsCommon/WorkloadFactory.cpp index 1f616f0b18..ffef5b4eb7 100644 --- a/src/backends/backendsCommon/WorkloadFactory.cpp +++ b/src/backends/backendsCommon/WorkloadFactory.cpp @@ -515,6 +515,16 @@ bool IWorkloadFactory::IsLayerSupported(const BackendId& backendId, reason); break; } + case LayerType::MemImport: + { + const TensorInfo& input = layer.GetInputSlot(0).GetConnection()->GetTensorInfo(); + const TensorInfo& output = layer.GetOutputSlot(0).GetTensorInfo(); + + result = layerSupportObject->IsMemImportSupported(OverrideDataType(input, dataType), + OverrideDataType(output, dataType), + reason); + break; + } case LayerType::Merge: { const TensorInfo& input0 = layer.GetInputSlot(0).GetConnection()->GetTensorInfo(); @@ -1092,6 +1102,12 @@ std::unique_ptr IWorkloadFactory::CreateMemCopy(const MemCopyQueueDes return std::unique_ptr(); } +std::unique_ptr IWorkloadFactory::CreateMemImport(const MemImportQueueDescriptor& descriptor, + const WorkloadInfo& info) const +{ + return std::unique_ptr(); +} + std::unique_ptr IWorkloadFactory::CreateMerge(const MergeQueueDescriptor& descriptor, const WorkloadInfo& info) const { diff --git a/src/backends/backendsCommon/WorkloadFactory.hpp b/src/backends/backendsCommon/WorkloadFactory.hpp index bd7f1c627b..a9c6049c37 100644 --- a/src/backends/backendsCommon/WorkloadFactory.hpp +++ b/src/backends/backendsCommon/WorkloadFactory.hpp @@ -124,6 +124,9 @@ public: virtual std::unique_ptr CreateMemCopy(const MemCopyQueueDescriptor& descriptor, const WorkloadInfo& info) const; + virtual std::unique_ptr CreateMemImport(const MemImportQueueDescriptor& descriptor, + const WorkloadInfo& info) const; + virtual std::unique_ptr CreateMerge(const MergeQueueDescriptor& descriptor, const WorkloadInfo& info) const; diff --git a/src/backends/backendsCommon/common.mk b/src/backends/backendsCommon/common.mk index 69bde81b0a..eee1dae0ff 100644 --- a/src/backends/backendsCommon/common.mk +++ b/src/backends/backendsCommon/common.mk @@ -14,6 +14,8 @@ COMMON_SOURCES := \ ITensorHandleFactory.cpp \ LayerSupportBase.cpp \ MemCopyWorkload.cpp \ + MemImportWorkload.cpp \ + MemSyncWorkload.cpp \ OptimizationViews.cpp \ OutputHandler.cpp \ TensorHandleFactoryRegistry.cpp \ diff --git a/src/backends/backendsCommon/test/IsLayerSupportedTestImpl.hpp b/src/backends/backendsCommon/test/IsLayerSupportedTestImpl.hpp index 451c585adc..1f43c989d6 100644 --- a/src/backends/backendsCommon/test/IsLayerSupportedTestImpl.hpp +++ b/src/backends/backendsCommon/test/IsLayerSupportedTestImpl.hpp @@ -405,6 +405,8 @@ DECLARE_LAYER_POLICY_2_PARAM(Convolution2d) DECLARE_LAYER_POLICY_1_PARAM(MemCopy) +DECLARE_LAYER_POLICY_1_PARAM(MemImport) + DECLARE_LAYER_POLICY_1_PARAM(Debug) DECLARE_LAYER_POLICY_2_PARAM(DepthwiseConvolution2d) diff --git a/src/backends/cl/ClLayerSupport.cpp b/src/backends/cl/ClLayerSupport.cpp index 4ea6f2db3a..cca265f30c 100644 --- a/src/backends/cl/ClLayerSupport.cpp +++ b/src/backends/cl/ClLayerSupport.cpp @@ -448,15 +448,6 @@ bool ClLayerSupport::IsMeanSupported(const TensorInfo& input, descriptor); } -bool ClLayerSupport::IsMemCopySupported(const TensorInfo &input, - const TensorInfo &output, - Optional reasonIfUnsupported) const -{ - ignore_unused(input); - ignore_unused(output); - return true; -} - bool ClLayerSupport::IsMergerSupported(const std::vector inputs, const TensorInfo& output, const MergerDescriptor& descriptor, diff --git a/src/backends/cl/ClLayerSupport.hpp b/src/backends/cl/ClLayerSupport.hpp index a367085eef..69c2280a59 100644 --- a/src/backends/cl/ClLayerSupport.hpp +++ b/src/backends/cl/ClLayerSupport.hpp @@ -127,10 +127,6 @@ public: const MeanDescriptor& descriptor, Optional reasonIfUnsupported = EmptyOptional()) const override; - bool IsMemCopySupported(const TensorInfo& input, - const TensorInfo& output, - Optional reasonIfUnsupported = EmptyOptional()) const override; - ARMNN_DEPRECATED_MSG("Use IsConcatSupported instead") bool IsMergerSupported(const std::vector inputs, const TensorInfo& output, diff --git a/src/backends/cl/ClWorkloadFactory.cpp b/src/backends/cl/ClWorkloadFactory.cpp index d72fa92a30..6e91dd07a5 100644 --- a/src/backends/cl/ClWorkloadFactory.cpp +++ b/src/backends/cl/ClWorkloadFactory.cpp @@ -13,6 +13,7 @@ #include #include #include +#include #include #include @@ -257,6 +258,17 @@ std::unique_ptr ClWorkloadFactory::CreateMemCopy(const MemCopy return MakeWorkload(descriptor, info); } +std::unique_ptr ClWorkloadFactory::CreateMemImport(const MemImportQueueDescriptor& descriptor, + const WorkloadInfo& info) const +{ + if (descriptor.m_Inputs.empty() || !descriptor.m_Inputs[0]) + { + throw InvalidArgumentException("ClWorkloadFactory: Invalid null input for MemImport workload"); + } + + return std::make_unique(descriptor, info); +} + std::unique_ptr ClWorkloadFactory::CreateResize(const ResizeQueueDescriptor& descriptor, const WorkloadInfo& info) const { diff --git a/src/backends/cl/ClWorkloadFactory.hpp b/src/backends/cl/ClWorkloadFactory.hpp index 01bfb8db9f..3a55ac5a2a 100644 --- a/src/backends/cl/ClWorkloadFactory.hpp +++ b/src/backends/cl/ClWorkloadFactory.hpp @@ -94,6 +94,9 @@ public: std::unique_ptr CreateMemCopy(const MemCopyQueueDescriptor& descriptor, const WorkloadInfo& info) const override; + std::unique_ptr CreateMemImport(const MemImportQueueDescriptor& descriptor, + const WorkloadInfo& info) const override; + std::unique_ptr CreateResize(const ResizeQueueDescriptor& descriptor, const WorkloadInfo& info) const override; diff --git a/src/backends/neon/NeonLayerSupport.cpp b/src/backends/neon/NeonLayerSupport.cpp index fd09265d1e..dac3525f60 100644 --- a/src/backends/neon/NeonLayerSupport.cpp +++ b/src/backends/neon/NeonLayerSupport.cpp @@ -384,15 +384,6 @@ bool NeonLayerSupport::IsMeanSupported(const TensorInfo& input, descriptor); } -bool NeonLayerSupport::IsMemCopySupported(const TensorInfo &input, - const TensorInfo &output, - Optional reasonIfUnsupported) const -{ - ignore_unused(input); - ignore_unused(output); - return true; -} - bool NeonLayerSupport::IsMergerSupported(const std::vector inputs, const TensorInfo& output, const MergerDescriptor& descriptor, diff --git a/src/backends/neon/NeonLayerSupport.hpp b/src/backends/neon/NeonLayerSupport.hpp index c37ac2a3fc..078d2f619b 100644 --- a/src/backends/neon/NeonLayerSupport.hpp +++ b/src/backends/neon/NeonLayerSupport.hpp @@ -117,10 +117,6 @@ public: const MeanDescriptor& descriptor, Optional reasonIfUnsupported = EmptyOptional()) const override; - bool IsMemCopySupported(const TensorInfo& input, - const TensorInfo& output, - Optional reasonIfUnsupported = EmptyOptional()) const override; - ARMNN_DEPRECATED_MSG("Use IsConcatSupported instead") bool IsMergerSupported(const std::vector inputs, const TensorInfo& output, diff --git a/src/backends/neon/NeonWorkloadFactory.cpp b/src/backends/neon/NeonWorkloadFactory.cpp index cdc84ca62e..fd0381c26d 100644 --- a/src/backends/neon/NeonWorkloadFactory.cpp +++ b/src/backends/neon/NeonWorkloadFactory.cpp @@ -14,6 +14,7 @@ #include #include #include +#include #include #include @@ -224,6 +225,17 @@ std::unique_ptr NeonWorkloadFactory::CreateMemCopy(const MemCo return MakeWorkloadHelper(descriptor, info); } +std::unique_ptr NeonWorkloadFactory::CreateMemImport(const MemImportQueueDescriptor& descriptor, + const WorkloadInfo& info) const +{ + if (descriptor.m_Inputs.empty() || !descriptor.m_Inputs[0]) + { + throw InvalidArgumentException("NeonWorkloadFactory: Invalid null input for MemImport workload"); + } + + return std::make_unique(descriptor, info); +} + std::unique_ptr NeonWorkloadFactory::CreateResize(const ResizeQueueDescriptor& descriptor, const WorkloadInfo& info) const { diff --git a/src/backends/neon/NeonWorkloadFactory.hpp b/src/backends/neon/NeonWorkloadFactory.hpp index d6a1b74941..360dc7c61b 100644 --- a/src/backends/neon/NeonWorkloadFactory.hpp +++ b/src/backends/neon/NeonWorkloadFactory.hpp @@ -95,6 +95,9 @@ public: std::unique_ptr CreateMemCopy(const MemCopyQueueDescriptor& descriptor, const WorkloadInfo& info) const override; + std::unique_ptr CreateMemImport(const MemImportQueueDescriptor& descriptor, + const WorkloadInfo& info) const override; + std::unique_ptr CreateResize(const ResizeQueueDescriptor& descriptor, const WorkloadInfo& info) const override; diff --git a/src/backends/reference/RefLayerSupport.cpp b/src/backends/reference/RefLayerSupport.cpp index d42404d25b..187cc01c77 100644 --- a/src/backends/reference/RefLayerSupport.cpp +++ b/src/backends/reference/RefLayerSupport.cpp @@ -14,6 +14,7 @@ #include #include +#include #include #include @@ -65,155 +66,6 @@ std::string CreateIncorrectDimensionsErrorMsg(unsigned int expected, } // anonymous namespace -namespace -{ -template -bool CheckSupportRule(F rule, Optional reasonIfUnsupported, const char* reason) -{ - bool supported = rule(); - if (!supported && reason) - { - reasonIfUnsupported.value() += std::string(reason) + "\n"; // Append the reason on a new line - } - return supported; -} - -struct Rule -{ - bool operator()() const - { - return m_Res; - } - - bool m_Res = true; -}; - -template -bool AllTypesAreEqualImpl(T t) -{ - return true; -} - -template -bool AllTypesAreEqualImpl(T t1, T t2, Rest... rest) -{ - static_assert(std::is_same::value, "Type T must be a TensorInfo"); - - return (t1.GetDataType() == t2.GetDataType()) && AllTypesAreEqualImpl(t2, rest...); -} - -struct TypesAreEqual : public Rule -{ - template - TypesAreEqual(const Ts&... ts) - { - m_Res = AllTypesAreEqualImpl(ts...); - } -}; - -struct QuantizationParametersAreEqual : public Rule -{ - QuantizationParametersAreEqual(const TensorInfo& info0, const TensorInfo& info1) - { - m_Res = info0.GetQuantizationScale() == info1.GetQuantizationScale() && - info0.GetQuantizationOffset() == info1.GetQuantizationOffset(); - } -}; - -struct TypeAnyOf : public Rule -{ - template - TypeAnyOf(const TensorInfo& info, const Container& c) - { - m_Res = std::any_of(c.begin(), c.end(), [&info](DataType dt) - { - return dt == info.GetDataType(); - }); - } -}; - -struct TypeIs : public Rule -{ - TypeIs(const TensorInfo& info, DataType dt) - { - m_Res = dt == info.GetDataType(); - } -}; - -struct BiasAndWeightsTypesMatch : public Rule -{ - BiasAndWeightsTypesMatch(const TensorInfo& biases, const TensorInfo& weights) - { - m_Res = biases.GetDataType() == GetBiasTypeFromWeightsType(weights.GetDataType()).value(); - } -}; - -struct BiasAndWeightsTypesCompatible : public Rule -{ - template - BiasAndWeightsTypesCompatible(const TensorInfo& info, const Container& c) - { - m_Res = std::any_of(c.begin(), c.end(), [&info](DataType dt) - { - return dt == GetBiasTypeFromWeightsType(info.GetDataType()).value(); - }); - } -}; - -struct ShapesAreSameRank : public Rule -{ - ShapesAreSameRank(const TensorInfo& info0, const TensorInfo& info1) - { - m_Res = info0.GetShape().GetNumDimensions() == info1.GetShape().GetNumDimensions(); - } -}; - -struct ShapesAreSameTotalSize : public Rule -{ - ShapesAreSameTotalSize(const TensorInfo& info0, const TensorInfo& info1) - { - m_Res = info0.GetNumElements() == info1.GetNumElements(); - } -}; - -struct ShapesAreBroadcastCompatible : public Rule -{ - unsigned int CalcInputSize(const TensorShape& in, const TensorShape& out, unsigned int idx) - { - unsigned int offset = out.GetNumDimensions() - in.GetNumDimensions(); - unsigned int sizeIn = (idx < offset) ? 1 : in[idx-offset]; - return sizeIn; - } - - ShapesAreBroadcastCompatible(const TensorInfo& in0, const TensorInfo& in1, const TensorInfo& out) - { - const TensorShape& shape0 = in0.GetShape(); - const TensorShape& shape1 = in1.GetShape(); - const TensorShape& outShape = out.GetShape(); - - for (unsigned int i=0; i < outShape.GetNumDimensions() && m_Res; i++) - { - unsigned int sizeOut = outShape[i]; - unsigned int sizeIn0 = CalcInputSize(shape0, outShape, i); - unsigned int sizeIn1 = CalcInputSize(shape1, outShape, i); - - m_Res &= ((sizeIn0 == sizeOut) || (sizeIn0 == 1)) && - ((sizeIn1 == sizeOut) || (sizeIn1 == 1)); - } - } -}; - -struct TensorNumDimensionsAreCorrect : public Rule -{ - TensorNumDimensionsAreCorrect(const TensorInfo& info, unsigned int expectedNumDimensions) - { - m_Res = info.GetNumDimensions() == expectedNumDimensions; - } -}; - -} // namespace - - bool RefLayerSupport::IsActivationSupported(const TensorInfo& input, const TensorInfo& output, const ActivationDescriptor& descriptor, diff --git a/src/backends/reference/RefWorkloadFactory.cpp b/src/backends/reference/RefWorkloadFactory.cpp index 240acecbad..fff2fd2694 100644 --- a/src/backends/reference/RefWorkloadFactory.cpp +++ b/src/backends/reference/RefWorkloadFactory.cpp @@ -5,6 +5,7 @@ #include #include #include +#include #include #include "RefWorkloadFactory.hpp" #include "RefBackendId.hpp" @@ -250,6 +251,16 @@ std::unique_ptr RefWorkloadFactory::CreateMemCopy(const MemCop return std::make_unique(descriptor, info); } +std::unique_ptr RefWorkloadFactory::CreateMemImport(const MemImportQueueDescriptor& descriptor, + const WorkloadInfo& info) const +{ + if (descriptor.m_Inputs.empty()) + { + throw InvalidArgumentException("RefWorkloadFactory: CreateMemImport() expected an input tensor."); + } + return std::make_unique(descriptor, info); +} + std::unique_ptr RefWorkloadFactory::CreateResize(const ResizeQueueDescriptor& descriptor, const WorkloadInfo& info) const { diff --git a/src/backends/reference/RefWorkloadFactory.hpp b/src/backends/reference/RefWorkloadFactory.hpp index b012fbc6f6..314e11788e 100644 --- a/src/backends/reference/RefWorkloadFactory.hpp +++ b/src/backends/reference/RefWorkloadFactory.hpp @@ -110,6 +110,9 @@ public: std::unique_ptr CreateMemCopy(const MemCopyQueueDescriptor& descriptor, const WorkloadInfo& info) const override; + std::unique_ptr CreateMemImport(const MemImportQueueDescriptor& descriptor, + const WorkloadInfo& info) const override; + std::unique_ptr CreateResize(const ResizeQueueDescriptor& descriptor, const WorkloadInfo& info) const override; -- cgit v1.2.1