aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDerek Lamberti <derek.lamberti@arm.com>2019-08-01 15:56:25 +0100
committerÁron Virginás-Tar <aron.virginas-tar@arm.com>2019-08-05 13:51:42 +0000
commitf674aa0fd2809126debdaaeb8067067790d86907 (patch)
treed86d0261c7a25149217918986043c76d0823ee44
parent737d9ff58b348b11234b6c2363390607d576177d (diff)
downloadarmnn-f674aa0fd2809126debdaaeb8067067790d86907.tar.gz
IVGCVSW-3277 Mem export/import suppor for Tensors
* Rename MemoryStrategy to EdgeStrategy * Add MemImportLayer * Import memory rather than copy when possible Change-Id: I1d3a9414f2cbe517dc2aae9bbd4fdd92712b38ef Signed-off-by: Derek Lamberti <derek.lamberti@arm.com>
-rw-r--r--Android.mk1
-rw-r--r--CMakeLists.txt3
-rw-r--r--include/armnn/ILayerSupport.hpp4
-rw-r--r--include/armnn/MemorySources.hpp51
-rw-r--r--src/armnn/Graph.cpp85
-rw-r--r--src/armnn/Graph.hpp4
-rw-r--r--src/armnn/InternalTypes.hpp1
-rw-r--r--src/armnn/Layer.cpp18
-rw-r--r--src/armnn/Layer.hpp8
-rw-r--r--src/armnn/LayerSupport.cpp9
-rw-r--r--src/armnn/LayersFwd.hpp2
-rw-r--r--src/armnn/LoadedNetwork.cpp54
-rw-r--r--src/armnn/Network.cpp51
-rw-r--r--src/armnn/layers/MemImportLayer.cpp54
-rw-r--r--src/armnn/layers/MemImportLayer.hpp42
-rw-r--r--src/armnn/test/GraphTests.cpp34
-rw-r--r--src/armnn/test/TensorHandleStrategyTest.cpp119
-rw-r--r--src/backends/backendsCommon/CMakeLists.txt5
-rw-r--r--src/backends/backendsCommon/ITensorHandle.hpp11
-rw-r--r--src/backends/backendsCommon/ITensorHandleFactory.hpp19
-rw-r--r--src/backends/backendsCommon/LayerSupportBase.cpp15
-rw-r--r--src/backends/backendsCommon/LayerSupportBase.hpp4
-rw-r--r--src/backends/backendsCommon/LayerSupportRules.hpp185
-rw-r--r--src/backends/backendsCommon/MemImportWorkload.cpp34
-rw-r--r--src/backends/backendsCommon/MemImportWorkload.hpp27
-rw-r--r--src/backends/backendsCommon/MemSyncWorkload.cpp33
-rw-r--r--src/backends/backendsCommon/MemSyncWorkload.hpp26
-rw-r--r--src/backends/backendsCommon/WorkloadData.cpp103
-rw-r--r--src/backends/backendsCommon/WorkloadData.hpp10
-rw-r--r--src/backends/backendsCommon/WorkloadFactory.cpp16
-rw-r--r--src/backends/backendsCommon/WorkloadFactory.hpp3
-rw-r--r--src/backends/backendsCommon/common.mk2
-rw-r--r--src/backends/backendsCommon/test/IsLayerSupportedTestImpl.hpp2
-rw-r--r--src/backends/cl/ClLayerSupport.cpp9
-rw-r--r--src/backends/cl/ClLayerSupport.hpp4
-rw-r--r--src/backends/cl/ClWorkloadFactory.cpp12
-rw-r--r--src/backends/cl/ClWorkloadFactory.hpp3
-rw-r--r--src/backends/neon/NeonLayerSupport.cpp9
-rw-r--r--src/backends/neon/NeonLayerSupport.hpp4
-rw-r--r--src/backends/neon/NeonWorkloadFactory.cpp12
-rw-r--r--src/backends/neon/NeonWorkloadFactory.hpp3
-rw-r--r--src/backends/reference/RefLayerSupport.cpp150
-rw-r--r--src/backends/reference/RefWorkloadFactory.cpp11
-rw-r--r--src/backends/reference/RefWorkloadFactory.hpp3
44 files changed, 964 insertions, 291 deletions
diff --git a/Android.mk b/Android.mk
index 8a24e7b9eb..bb7ed86283 100644
--- a/Android.mk
+++ b/Android.mk
@@ -114,6 +114,7 @@ LOCAL_SRC_FILES := \
src/armnn/layers/MaximumLayer.cpp \
src/armnn/layers/MeanLayer.cpp \
src/armnn/layers/MemCopyLayer.cpp \
+ src/armnn/layers/MemImportLayer.cpp \
src/armnn/layers/MergeLayer.cpp \
src/armnn/layers/MinimumLayer.cpp \
src/armnn/layers/MultiplicationLayer.cpp \
diff --git a/CMakeLists.txt b/CMakeLists.txt
index c823b817cf..eaaf2d6813 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -212,6 +212,7 @@ list(APPEND armnn_sources
include/armnn/LayerSupport.hpp
include/armnn/LayerVisitorBase.hpp
include/armnn/LstmParams.hpp
+ include/armnn/MemorySources.hpp
include/armnn/NetworkFwd.hpp
include/armnn/Optional.hpp
include/armnn/QuantizedLstmParams.hpp
@@ -275,6 +276,8 @@ list(APPEND armnn_sources
src/armnn/layers/MeanLayer.cpp
src/armnn/layers/MemCopyLayer.hpp
src/armnn/layers/MemCopyLayer.cpp
+ src/armnn/layers/MemImportLayer.hpp
+ src/armnn/layers/MemImportLayer.cpp
src/armnn/layers/MergeLayer.hpp
src/armnn/layers/MergeLayer.cpp
src/armnn/layers/MinimumLayer.cpp
diff --git a/include/armnn/ILayerSupport.hpp b/include/armnn/ILayerSupport.hpp
index 45360984ff..33f86dea59 100644
--- a/include/armnn/ILayerSupport.hpp
+++ b/include/armnn/ILayerSupport.hpp
@@ -172,6 +172,10 @@ public:
const TensorInfo& output,
Optional<std::string&> reasonIfUnsupported = EmptyOptional()) const = 0;
+ virtual bool IsMemImportSupported(const TensorInfo& input,
+ const TensorInfo& output,
+ Optional<std::string&> reasonIfUnsupported = EmptyOptional()) const = 0;
+
virtual bool IsMergeSupported(const TensorInfo& input0,
const TensorInfo& input1,
const TensorInfo& output,
diff --git a/include/armnn/MemorySources.hpp b/include/armnn/MemorySources.hpp
new file mode 100644
index 0000000000..e138f56fd4
--- /dev/null
+++ b/include/armnn/MemorySources.hpp
@@ -0,0 +1,51 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#pragma once
+
+#include <type_traits>
+
+namespace armnn
+{
+
+enum class MemorySource
+{
+ Malloc = 1,
+ DmaBuf = 2,
+ DmaBufProtected = 4
+};
+
+using MemorySourceFlags = unsigned int;
+
+template<typename T>
+struct IsMemorySource
+{
+ static const bool value = false;
+};
+
+template<>
+struct IsMemorySource<MemorySource>
+{
+ static const bool value = true;
+};
+
+template <typename Arg, typename std::enable_if<IsMemorySource<Arg>::value>::type* = nullptr>
+MemorySourceFlags Combine(Arg sourceA, Arg sourceB)
+{
+ return static_cast<MemorySourceFlags>(sourceA) | static_cast<MemorySourceFlags>(sourceB);
+}
+
+template <typename Arg, typename ... Args, typename std::enable_if<IsMemorySource<Arg>::value>::type* = nullptr>
+MemorySourceFlags Combine(Arg source, Args... rest)
+{
+ return static_cast<MemorySourceFlags>(source) | Combine(rest...);
+}
+
+inline bool CheckFlag(MemorySourceFlags flags, MemorySource source)
+{
+ return (static_cast<MemorySourceFlags>(source) & flags) != 0;
+}
+
+} //namespace armnn \ No newline at end of file
diff --git a/src/armnn/Graph.cpp b/src/armnn/Graph.cpp
index 9e00f5ec01..6212c49eba 100644
--- a/src/armnn/Graph.cpp
+++ b/src/armnn/Graph.cpp
@@ -255,26 +255,31 @@ const Graph& Graph::TopologicalSort() const
return *this;
}
-void Graph::AddCopyLayers(std::map<BackendId, std::unique_ptr<IBackendInternal>>& backends,
- TensorHandleFactoryRegistry& registry)
+void Graph::AddCompatibilityLayers(std::map<BackendId, std::unique_ptr<IBackendInternal>>& backends,
+ TensorHandleFactoryRegistry& registry)
{
- // Returns true if the given layer could potentially need an intermediate copy layer (depending on its
- // connections to other layers). At the time of writing, copy layers will be inserted in the following situations:
- // CPU -> CL (and viceversa)
- // CPU -> Neon (and viceversa)
- auto MayNeedCopyLayer = [](const Layer& layer)
+ // Returns true if the given layer could potentially need an intermediate copy/import layer (depending on its
+ // connections to other layers).
+ auto MayNeedCompatibilityLayer = [](const Layer& layer)
{
// All layers should have been associated with a valid compute device at this point.
BOOST_ASSERT(layer.GetBackendId() != Compute::Undefined);
- // Does not need another copy layer if a copy layer is already present.
- return layer.GetType() != LayerType::MemCopy;
+ // Does not need another compatibility layer if a copy or import layer is already present.
+ return layer.GetType() != LayerType::MemCopy &&
+ layer.GetType() != LayerType::MemImport;
};
- ForEachLayer([this, &backends, &registry, MayNeedCopyLayer](Layer* srcLayer)
+ auto IsCompatibilityStrategy = [](EdgeStrategy strategy)
+ {
+ return strategy == EdgeStrategy::CopyToTarget ||
+ strategy == EdgeStrategy::ExportToTarget;
+ };
+
+ ForEachLayer([this, &backends, &registry, MayNeedCompatibilityLayer, IsCompatibilityStrategy](Layer* srcLayer)
{
BOOST_ASSERT(srcLayer);
- if (!MayNeedCopyLayer(*srcLayer))
+ if (!MayNeedCompatibilityLayer(*srcLayer))
{
// The current layer does not need copy layers, move to the next one
return;
@@ -285,33 +290,43 @@ void Graph::AddCopyLayers(std::map<BackendId, std::unique_ptr<IBackendInternal>>
{
OutputSlot& srcOutputSlot = srcLayer->GetOutputSlot(srcOutputIndex);
const std::vector<InputSlot*> srcConnections = srcOutputSlot.GetConnections();
- const std::vector<MemoryStrategy> srcMemoryStrategies = srcOutputSlot.GetMemoryStrategies();
+ const std::vector<EdgeStrategy> srcEdgeStrategies = srcOutputSlot.GetEdgeStrategies();
for (unsigned int srcConnectionIndex = 0; srcConnectionIndex < srcConnections.size(); srcConnectionIndex++)
{
InputSlot* dstInputSlot = srcConnections[srcConnectionIndex];
BOOST_ASSERT(dstInputSlot);
- MemoryStrategy strategy = srcMemoryStrategies[srcConnectionIndex];
- BOOST_ASSERT_MSG(strategy != MemoryStrategy::Undefined,
+ EdgeStrategy strategy = srcEdgeStrategies[srcConnectionIndex];
+ BOOST_ASSERT_MSG(strategy != EdgeStrategy::Undefined,
"Undefined memory strategy found while adding copy layers for compatibility");
const Layer& dstLayer = dstInputSlot->GetOwningLayer();
- if (MayNeedCopyLayer(dstLayer) &&
- strategy == MemoryStrategy::CopyToTarget)
+ if (MayNeedCompatibilityLayer(dstLayer) &&
+ IsCompatibilityStrategy(strategy))
{
// A copy layer is needed in between the source and destination layers.
// Record the operation rather than attempting to modify the graph as we go.
// (invalidating iterators)
- const std::string copyLayerName = boost::str(boost::format("[ %1% (%2%) -> %3% (%4%) ]")
+ const std::string compLayerName = boost::str(boost::format("[ %1% (%2%) -> %3% (%4%) ]")
% srcLayer->GetName()
% srcOutputIndex
% dstLayer.GetName()
% dstInputSlot->GetSlotIndex());
- MemCopyLayer* const copyLayer = InsertNewLayer<MemCopyLayer>(*dstInputSlot, copyLayerName.c_str());
- copyLayer->SetBackendId(dstLayer.GetBackendId());
+ Layer* compLayer = nullptr;
+ if (strategy == EdgeStrategy::CopyToTarget)
+ {
+ compLayer = InsertNewLayer<MemCopyLayer>(*dstInputSlot, compLayerName.c_str());
+ }
+ else
+ {
+ BOOST_ASSERT_MSG(strategy == EdgeStrategy::ExportToTarget, "Invalid edge strategy found.");
+ compLayer = InsertNewLayer<MemImportLayer>(*dstInputSlot, compLayerName.c_str());
+ }
+
+ compLayer->SetBackendId(dstLayer.GetBackendId());
- OutputSlot& copyOutputSlot = copyLayer->GetOutputSlot(0);
+ OutputSlot& compOutputSlot = compLayer->GetOutputSlot(0);
auto backendIt = backends.find(dstLayer.GetBackendId());
if (backendIt != backends.end() &&
backendIt->second &&
@@ -325,34 +340,40 @@ void Graph::AddCopyLayers(std::map<BackendId, std::unique_ptr<IBackendInternal>>
for (auto preference : tensorHandleFactoryIds)
{
auto factory = registry.GetFactory(preference);
- if (factory && factory->SupportsMapUnmap())
+ if (factory)
{
- copyOutputSlot.SetTensorHandleFactory(preference);
- found = true;
- break;
+ auto srcPref = srcOutputSlot.GetTensorHandleFactoryId();
+ auto srcFactory = registry.GetFactory(srcPref);
+ bool canExportImport = (factory->GetImportFlags() & srcFactory->GetExportFlags()) != 0;
+ if (factory->SupportsMapUnmap() || canExportImport)
+ {
+ compOutputSlot.SetTensorHandleFactory(preference);
+ found = true;
+ break;
+ }
}
}
- BOOST_ASSERT_MSG(found, "Could not find a mappable TensorHandle for copy layer");
+ BOOST_ASSERT_MSG(found, "Could not find a valid TensorHandle for compatibilty layer");
}
else
{
- copyOutputSlot.SetTensorHandleFactory(ITensorHandleFactory::LegacyFactoryId);
+ compOutputSlot.SetTensorHandleFactory(ITensorHandleFactory::LegacyFactoryId);
}
- // The output strategy of a copy layer is always DirectCompatibility.
- copyOutputSlot.SetMemoryStrategy(0, MemoryStrategy::DirectCompatibility);
+ // The output strategy of a compatibility layer is always DirectCompatibility.
+ compOutputSlot.SetEdgeStrategy(0, EdgeStrategy::DirectCompatibility);
// Recalculate the connection index on the previous layer as we have just inserted into it.
const std::vector<InputSlot*>& newSourceConnections = srcOutputSlot.GetConnections();
long newSrcConnectionIndex = std::distance(newSourceConnections.begin(),
std::find(newSourceConnections.begin(),
newSourceConnections.end(),
- &copyLayer->GetInputSlot(0)));
+ &compLayer->GetInputSlot(0)));
- // The input strategy of a copy layer is always DirectCompatibilty.
- srcOutputSlot.SetMemoryStrategy(boost::numeric_cast<unsigned int>(newSrcConnectionIndex),
- MemoryStrategy::DirectCompatibility);
+ // The input strategy of a compatibility layer is always DirectCompatibilty.
+ srcOutputSlot.SetEdgeStrategy(boost::numeric_cast<unsigned int>(newSrcConnectionIndex),
+ EdgeStrategy::DirectCompatibility);
}
}
}
diff --git a/src/armnn/Graph.hpp b/src/armnn/Graph.hpp
index f8113375c9..c65f12bbc3 100644
--- a/src/armnn/Graph.hpp
+++ b/src/armnn/Graph.hpp
@@ -191,8 +191,8 @@ public:
/// Modifies the graph in-place, removing edges connecting layers using different compute devices,
/// and relinking them via an intermediary copy layers.
- void AddCopyLayers(std::map<BackendId, std::unique_ptr<class IBackendInternal>>& backends,
- TensorHandleFactoryRegistry& registry);
+ void AddCompatibilityLayers(std::map<BackendId, std::unique_ptr<class IBackendInternal>>& backends,
+ TensorHandleFactoryRegistry& registry);
/// Substitutes the given sub-graph with either a new layer or a new sub-graph.
/// In either case, the given layer or all the layers in the given sub-graph must belong to this graph.
diff --git a/src/armnn/InternalTypes.hpp b/src/armnn/InternalTypes.hpp
index b0fea7c8c2..7a0f9a1cb0 100644
--- a/src/armnn/InternalTypes.hpp
+++ b/src/armnn/InternalTypes.hpp
@@ -40,6 +40,7 @@ enum class LayerType
Maximum,
Mean,
MemCopy,
+ MemImport,
Merge,
Minimum,
Multiplication,
diff --git a/src/armnn/Layer.cpp b/src/armnn/Layer.cpp
index a287220702..528020bab5 100644
--- a/src/armnn/Layer.cpp
+++ b/src/armnn/Layer.cpp
@@ -31,7 +31,7 @@ void InputSlot::Insert(Layer& layer)
// Connects inserted layer to parent.
BOOST_ASSERT(layer.GetNumInputSlots() == 1);
int idx = prevSlot->Connect(layer.GetInputSlot(0));
- prevSlot->SetMemoryStrategy(boost::numeric_cast<unsigned int>(idx), MemoryStrategy::Undefined);
+ prevSlot->SetEdgeStrategy(boost::numeric_cast<unsigned int>(idx), EdgeStrategy::Undefined);
// Sets tensor info for inserted layer.
const TensorInfo& tensorInfo = prevSlot->GetTensorInfo();
@@ -40,7 +40,7 @@ void InputSlot::Insert(Layer& layer)
// Connects inserted layer to this.
layer.GetOutputSlot(0).Connect(*this);
- layer.GetOutputSlot(0).SetMemoryStrategy(0, MemoryStrategy::Undefined);
+ layer.GetOutputSlot(0).SetEdgeStrategy(0, EdgeStrategy::Undefined);
}
const InputSlot* OutputSlot::GetConnection(unsigned int index) const
@@ -80,7 +80,7 @@ int OutputSlot::Connect(InputSlot& destination)
{
destination.SetConnection(this);
m_Connections.push_back(&destination);
- m_MemoryStrategies.push_back(MemoryStrategy::Undefined);
+ m_EdgeStrategies.push_back(EdgeStrategy::Undefined);
return boost::numeric_cast<int>(m_Connections.size() - 1);
}
@@ -97,7 +97,7 @@ void OutputSlot::Disconnect(InputSlot& slot)
auto idx = std::distance(m_Connections.begin(), it);
m_Connections.erase(std::remove(m_Connections.begin(), m_Connections.end(), &slot), m_Connections.end());
- m_MemoryStrategies.erase(m_MemoryStrategies.begin() + idx);
+ m_EdgeStrategies.erase(m_EdgeStrategies.begin() + idx);
}
void OutputSlot::DisconnectAll()
@@ -113,7 +113,7 @@ void OutputSlot::MoveAllConnections(OutputSlot& destination)
{
while (GetNumConnections() > 0)
{
- BOOST_ASSERT_MSG(m_MemoryStrategies[0] == MemoryStrategy::Undefined,
+ BOOST_ASSERT_MSG(m_EdgeStrategies[0] == EdgeStrategy::Undefined,
"Cannot move connections once memory strategies have be established.");
InputSlot& connection = *GetConnection(0);
@@ -174,14 +174,14 @@ ITensorHandleFactory::FactoryId OutputSlot::GetTensorHandleFactoryId() const
return m_TensorHandleFactoryId;
}
-void OutputSlot::SetMemoryStrategy(unsigned int connectionIndex, MemoryStrategy strategy)
+void OutputSlot::SetEdgeStrategy(unsigned int connectionIndex, EdgeStrategy strategy)
{
- m_MemoryStrategies[connectionIndex] = strategy;
+ m_EdgeStrategies[connectionIndex] = strategy;
}
-MemoryStrategy OutputSlot::GetMemoryStrategyForConnection(unsigned int connectionIdx) const
+EdgeStrategy OutputSlot::GetEdgeStrategyForConnection(unsigned int connectionIdx) const
{
- return m_MemoryStrategies[connectionIdx];
+ return m_EdgeStrategies[connectionIdx];
}
namespace {
diff --git a/src/armnn/Layer.hpp b/src/armnn/Layer.hpp
index b90d040475..5944ea83ed 100644
--- a/src/armnn/Layer.hpp
+++ b/src/armnn/Layer.hpp
@@ -123,7 +123,7 @@ public:
void Disconnect(InputSlot& slot);
const std::vector<InputSlot*>& GetConnections() const { return m_Connections; }
- const std::vector<MemoryStrategy>& GetMemoryStrategies() const { return m_MemoryStrategies; }
+ const std::vector<EdgeStrategy>& GetEdgeStrategies() const { return m_EdgeStrategies; }
bool ValidateTensorShape(const TensorShape& shape) const;
@@ -160,8 +160,8 @@ public:
void SetTensorHandleFactory(const ITensorHandleFactory::FactoryId& id);
ITensorHandleFactory::FactoryId GetTensorHandleFactoryId() const;
- void SetMemoryStrategy(unsigned int connectionIndex, MemoryStrategy strategy);
- MemoryStrategy GetMemoryStrategyForConnection(unsigned int connectionIdx) const;
+ void SetEdgeStrategy(unsigned int connectionIndex, EdgeStrategy strategy);
+ EdgeStrategy GetEdgeStrategyForConnection(unsigned int connectionIdx) const;
private:
void ValidateConnectionIndex(unsigned int index) const;
@@ -171,7 +171,7 @@ private:
std::vector<InputSlot*> m_Connections;
ITensorHandleFactory::FactoryId m_TensorHandleFactoryId;
- std::vector<MemoryStrategy> m_MemoryStrategies;
+ std::vector<EdgeStrategy> m_EdgeStrategies;
};
// InputSlot inlines that need OutputSlot declaration.
diff --git a/src/armnn/LayerSupport.cpp b/src/armnn/LayerSupport.cpp
index 047c80a8c4..5a756b9544 100644
--- a/src/armnn/LayerSupport.cpp
+++ b/src/armnn/LayerSupport.cpp
@@ -371,6 +371,15 @@ bool IsMemCopySupported(const BackendId &backend,
FORWARD_LAYER_SUPPORT_FUNC(backend, IsMemCopySupported, input, output);
}
+bool IsMemImportSupported(const BackendId &backend,
+ const TensorInfo &input,
+ const TensorInfo &output,
+ char *reasonIfUnsupported,
+ size_t reasonIfUnsupportedMaxLength)
+{
+ FORWARD_LAYER_SUPPORT_FUNC(backend, IsMemImportSupported, input, output);
+}
+
bool IsMergeSupported(const BackendId& backend,
const TensorInfo& input0,
const TensorInfo& input1,
diff --git a/src/armnn/LayersFwd.hpp b/src/armnn/LayersFwd.hpp
index 2c8d5d2e07..cadcd49acb 100644
--- a/src/armnn/LayersFwd.hpp
+++ b/src/armnn/LayersFwd.hpp
@@ -32,6 +32,7 @@
#include "layers/MaximumLayer.hpp"
#include "layers/MeanLayer.hpp"
#include "layers/MemCopyLayer.hpp"
+#include "layers/MemImportLayer.hpp"
#include "layers/MergeLayer.hpp"
#include "layers/MinimumLayer.hpp"
#include "layers/MultiplicationLayer.hpp"
@@ -110,6 +111,7 @@ DECLARE_LAYER(Lstm)
DECLARE_LAYER(Maximum)
DECLARE_LAYER(Mean)
DECLARE_LAYER(MemCopy)
+DECLARE_LAYER(MemImport)
DECLARE_LAYER(Merge)
DECLARE_LAYER(Minimum)
DECLARE_LAYER(Multiplication)
diff --git a/src/armnn/LoadedNetwork.cpp b/src/armnn/LoadedNetwork.cpp
index 7873e48780..a81528aa65 100644
--- a/src/armnn/LoadedNetwork.cpp
+++ b/src/armnn/LoadedNetwork.cpp
@@ -14,6 +14,8 @@
#include <backendsCommon/CpuTensorHandle.hpp>
#include <backendsCommon/BackendRegistry.hpp>
#include <backendsCommon/IMemoryManager.hpp>
+#include <backendsCommon/MemCopyWorkload.hpp>
+#include <backendsCommon/MemSyncWorkload.hpp>
#include <boost/polymorphic_cast.hpp>
#include <boost/assert.hpp>
@@ -389,8 +391,22 @@ void LoadedNetwork::EnqueueInput(const BindableLayer& layer, ITensorHandle* tens
inputQueueDescriptor.m_Outputs.push_back(outputTensorHandle);
info.m_OutputTensorInfos.push_back(outputTensorInfo);
- const IWorkloadFactory& workloadFactory = GetWorkloadFactory(layer);
- auto inputWorkload = workloadFactory.CreateInput(inputQueueDescriptor, info);
+ MemorySourceFlags importFlags = outputTensorHandle->GetImportFlags();
+ if (CheckFlag(importFlags, MemorySource::Malloc)) // Try import the input tensor
+ {
+ // This assumes a CPU Tensor handle
+ void* mem = tensorHandle->Map(false);
+ if (outputTensorHandle->Import(mem, MemorySource::Malloc))
+ {
+ tensorHandle->Unmap();
+ return; // No need for a workload since the import has been done.
+ }
+ tensorHandle->Unmap();
+ }
+
+ // Create a mem copy workload for input since we could not import
+ auto inputWorkload = std::make_unique<CopyMemGenericWorkload>(inputQueueDescriptor, info);
+
BOOST_ASSERT_MSG(inputWorkload, "No input workload created");
m_InputQueue.push_back(move(inputWorkload));
}
@@ -422,11 +438,41 @@ void LoadedNetwork::EnqueueOutput(const BindableLayer& layer, ITensorHandle* ten
ITensorHandle* inputTensorHandle = outputHandler.GetData();
BOOST_ASSERT_MSG(inputTensorHandle != nullptr, "Data should have been allocated.");
+ // Try import the output tensor.
+ // Note: We can only import the output pointer if all of the following hold true:
+ // a) The imported pointer is aligned sufficiently
+ // b) The tensor has zero padding
+ // c) There is only one connection to the OutputSlot and it is to an OutputLayer.
+ // d) The output pointer is allocated via malloc. (Other types will be supported in a later release)
+ if (layer.GetInputSlots()[0].GetConnectedOutputSlot()->GetNumConnections() == 1)
+ {
+ MemorySourceFlags importFlags = inputTensorHandle->GetImportFlags();
+ if (CheckFlag(importFlags, MemorySource::Malloc))
+ {
+ void* mem = tensorHandle->Map(false);
+ bool importOk = inputTensorHandle->Import(mem, MemorySource::Malloc);
+ tensorHandle->Unmap();
+
+ if (importOk)
+ {
+ // Insert synchronization workload
+ MemSyncQueueDescriptor syncDesc;
+ syncDesc.m_Inputs.push_back(inputTensorHandle);
+ info.m_InputTensorInfos.push_back(inputTensorInfo);
+ auto syncWorkload = std::make_unique<SyncMemGenericWorkload>(syncDesc, info);
+ BOOST_ASSERT_MSG(syncWorkload, "No sync workload created");
+ m_OutputQueue.push_back(move(syncWorkload));
+
+ return; //No need to add the output workload below
+ }
+ }
+ }
+
+ // If we got here then we couldn't import the memory, so add an output workload which performs a memcopy.
outputQueueDescriptor.m_Inputs.push_back(inputTensorHandle);
info.m_InputTensorInfos.push_back(inputTensorInfo);
- const IWorkloadFactory& workloadFactory = GetWorkloadFactory(layer);
- auto outputWorkload = workloadFactory.CreateOutput(outputQueueDescriptor, info);
+ auto outputWorkload = std::make_unique<CopyMemGenericWorkload>(outputQueueDescriptor, info);
BOOST_ASSERT_MSG(outputWorkload, "No output workload created");
m_OutputQueue.push_back(move(outputWorkload));
}
diff --git a/src/armnn/Network.cpp b/src/armnn/Network.cpp
index 2195c71735..b30cd9f3c2 100644
--- a/src/armnn/Network.cpp
+++ b/src/armnn/Network.cpp
@@ -441,7 +441,7 @@ bool RequiresCopy(ITensorHandleFactory::FactoryId src,
ITensorHandleFactory* srcFactory = registry.GetFactory(src);
ITensorHandleFactory* dstFactory = registry.GetFactory(dst);
- if (srcFactory->SupportsExport() && dstFactory->SupportsImport())
+ if ((srcFactory->GetExportFlags() & dstFactory->GetImportFlags()) != 0)
{
return false;
}
@@ -493,11 +493,14 @@ ITensorHandleFactory::FactoryId CalculateSlotOptionForInput(BackendsMap& backend
auto dstPrefs = toBackend->second.get()->GetHandleFactoryPreferences();
for (auto&& dst : dstPrefs)
{
- // Input layers use the mem copy workload, so the selected factory must support map/unmap API
+ // Input layers use the mem copy workload or import, so the selected factory must
+ // support either the map/unmap API or Import API
ITensorHandleFactory* factory = registry.GetFactory(dst);
- if (!factory->SupportsMapUnmap())
+ if (!factory->SupportsMapUnmap() &&
+ !CheckFlag(factory->GetImportFlags(), MemorySource::Malloc)) // Just support cpu mem imports for now
{
- // The current tensor handle factory does not support the map/unmap strategy, move to the next one
+ // The current tensor handle factory does not support the map/unmap or import
+ // strategy, move to the next one
continue;
}
@@ -648,11 +651,11 @@ ITensorHandleFactory::FactoryId CalculateSlotOption(BackendsMap& backends,
return ITensorHandleFactory::LegacyFactoryId;
}
-MemoryStrategy CalculateStrategy(BackendsMap& backends,
- ITensorHandleFactory::FactoryId srcFactoryId,
- const Layer& layer,
- const Layer& connectedLayer,
- TensorHandleFactoryRegistry& registry)
+EdgeStrategy CalculateEdgeStrategy(BackendsMap& backends,
+ ITensorHandleFactory::FactoryId srcFactoryId,
+ const Layer& layer,
+ const Layer& connectedLayer,
+ TensorHandleFactoryRegistry& registry)
{
auto toBackend = backends.find(connectedLayer.GetBackendId());
BOOST_ASSERT_MSG(toBackend != backends.end(), "Backend id not found for the connected layer");
@@ -664,19 +667,19 @@ MemoryStrategy CalculateStrategy(BackendsMap& backends,
{
if (layer.GetBackendId() != connectedLayer.GetBackendId())
{
- return MemoryStrategy::CopyToTarget;
+ return EdgeStrategy::CopyToTarget;
}
else
{
- return MemoryStrategy::DirectCompatibility;
+ return EdgeStrategy::DirectCompatibility;
}
}
// TensorHandleFactory API present, so perform more sophisticated strategies.
- // Dst Output layers don't require copy because they use map/unmap
+ // Dst Output layers don't require copy because they use import or map/unmap
if (connectedLayer.GetType() == LayerType::Output)
{
- return MemoryStrategy::DirectCompatibility;
+ return EdgeStrategy::DirectCompatibility;
}
// Search for direct match in prefs
@@ -684,20 +687,20 @@ MemoryStrategy CalculateStrategy(BackendsMap& backends,
{
if (pref == srcFactoryId)
{
- return MemoryStrategy::DirectCompatibility;
+ return EdgeStrategy::DirectCompatibility;
}
}
// Search for export/import options
ITensorHandleFactory* srcFactory = registry.GetFactory(srcFactoryId);
- if (srcFactory->SupportsExport())
+ if (srcFactory->GetExportFlags() != 0)
{
for (auto&& pref : dstPrefs)
{
ITensorHandleFactory* dstFactory = registry.GetFactory(pref);
- if (dstFactory->SupportsImport())
+ if ((dstFactory->GetImportFlags() & srcFactory->GetExportFlags()) != 0)
{
- return MemoryStrategy::ExportToTarget;
+ return EdgeStrategy::ExportToTarget;
}
}
}
@@ -710,12 +713,12 @@ MemoryStrategy CalculateStrategy(BackendsMap& backends,
ITensorHandleFactory* dstFactory = registry.GetFactory(pref);
if (dstFactory->SupportsMapUnmap())
{
- return MemoryStrategy::CopyToTarget;
+ return EdgeStrategy::CopyToTarget;
}
}
}
- return MemoryStrategy::Undefined;
+ return EdgeStrategy::Undefined;
}
// Select the TensorHandleFactories and the corresponding memory strategy
@@ -756,15 +759,15 @@ OptimizationResult SelectTensorHandleStrategy(Graph& optGraph,
}
outputSlot.SetTensorHandleFactory(slotOption);
- // Now determine the "best" memory strategy for each connection given the slotOption.
+ // Now determine the "best" edge strategy for each connection given the slotOption.
unsigned int connectionIdx = 0;
for (auto&& connection : outputSlot.GetConnections())
{
const Layer& connectedLayer = connection->GetOwningLayer();
- MemoryStrategy strategy = CalculateStrategy(backends, slotOption, *layer, connectedLayer, registry);
+ EdgeStrategy strategy = CalculateEdgeStrategy(backends, slotOption, *layer, connectedLayer, registry);
- if (strategy == MemoryStrategy::Undefined)
+ if (strategy == EdgeStrategy::Undefined)
{
result.m_Error = true;
if (errMessages)
@@ -775,7 +778,7 @@ OptimizationResult SelectTensorHandleStrategy(Graph& optGraph,
return;
}
- outputSlot.SetMemoryStrategy(connectionIdx, strategy);
+ outputSlot.SetEdgeStrategy(connectionIdx, strategy);
connectionIdx++;
}
@@ -887,7 +890,7 @@ IOptimizedNetworkPtr Optimize(const INetwork& inNetwork,
}
// Based on the tensor handle strategy determined above, insert copy layers where required.
- optGraph.AddCopyLayers(backends, tensorHandleFactoryRegistry);
+ optGraph.AddCompatibilityLayers(backends, tensorHandleFactoryRegistry);
// Convert constants
Optimizer::Pass(optGraph, MakeOptimizations(ConvertConstantsFloatToHalf()));
diff --git a/src/armnn/layers/MemImportLayer.cpp b/src/armnn/layers/MemImportLayer.cpp
new file mode 100644
index 0000000000..7a922f5a7c
--- /dev/null
+++ b/src/armnn/layers/MemImportLayer.cpp
@@ -0,0 +1,54 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+#include "MemImportLayer.hpp"
+
+#include "LayerCloneBase.hpp"
+
+#include <armnn/TypesUtils.hpp>
+#include <backendsCommon/WorkloadData.hpp>
+#include <backendsCommon/WorkloadFactory.hpp>
+#include <backendsCommon/MemImportWorkload.hpp>
+
+namespace armnn
+{
+
+MemImportLayer::MemImportLayer(const char* name)
+ : Layer(1, 1, LayerType::MemImport, name)
+{
+}
+
+MemImportLayer* MemImportLayer::Clone(Graph& graph) const
+{
+ return CloneBase<MemImportLayer>(graph, GetName());
+}
+
+std::unique_ptr<IWorkload> MemImportLayer::CreateWorkload(const Graph& graph, const IWorkloadFactory& factory) const
+{
+ MemImportQueueDescriptor descriptor;
+
+ //This is different from other workloads. Does not get created by the workload factory.
+ return std::make_unique<ImportMemGenericWorkload>(descriptor, PrepInfoAndDesc(descriptor, graph));
+}
+
+void MemImportLayer::ValidateTensorShapesFromInputs()
+{
+ VerifyLayerConnections(1, CHECK_LOCATION());
+
+ auto inferredShapes = InferOutputShapes({ GetInputSlot(0).GetConnection()->GetTensorInfo().GetShape() });
+
+ BOOST_ASSERT(inferredShapes.size() == 1);
+
+ ConditionalThrowIfNotEqual<LayerValidationException>(
+ "MemImportLayer: TensorShape set on OutputSlot[0] does not match the inferred shape.",
+ GetOutputSlot(0).GetTensorInfo().GetShape(),
+ inferredShapes[0]);
+}
+
+void MemImportLayer::Accept(ILayerVisitor& visitor) const
+{
+ throw armnn::Exception("MemImportLayer should not appear in an input graph");
+}
+
+} // namespace armnn
diff --git a/src/armnn/layers/MemImportLayer.hpp b/src/armnn/layers/MemImportLayer.hpp
new file mode 100644
index 0000000000..2d02c1fb41
--- /dev/null
+++ b/src/armnn/layers/MemImportLayer.hpp
@@ -0,0 +1,42 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+#pragma once
+
+#include <Layer.hpp>
+
+namespace armnn
+{
+
+/// This layer represents a memory import operation.
+class MemImportLayer : public Layer
+{
+public:
+ /// Makes a workload for the MemImport type.
+ /// @param [in] graph The graph where this layer can be found.
+ /// @param [in] factory The workload factory which will create the workload.
+ /// @return A pointer to the created workload, or nullptr if not created.
+ virtual std::unique_ptr<IWorkload>CreateWorkload(const Graph& graph,
+ const IWorkloadFactory& factory) const override;
+
+ /// Creates a dynamically-allocated copy of this layer.
+ /// @param [in] graph The graph into which this layer is being cloned.
+ MemImportLayer* Clone(Graph& graph) const override;
+
+ /// Check if the input tensor shape(s)
+ /// will lead to a valid configuration of @ref MemImportLayer.
+ void ValidateTensorShapesFromInputs() override;
+
+ void Accept(ILayerVisitor& visitor) const override;
+
+protected:
+ /// Constructor to create a MemImportLayer.
+ /// @param [in] name Optional name for the layer.
+ MemImportLayer(const char* name);
+
+ /// Default destructor
+ ~MemImportLayer() = default;
+};
+
+} // namespace
diff --git a/src/armnn/test/GraphTests.cpp b/src/armnn/test/GraphTests.cpp
index 7950ec49f4..7bd6aac98b 100644
--- a/src/armnn/test/GraphTests.cpp
+++ b/src/armnn/test/GraphTests.cpp
@@ -495,13 +495,13 @@ struct CopyLayersFixture
// Set the memory strategies - for this test should be DirectCompatibility for same backends,
// and CopyToTarget for different backends
- inputLayer->GetOutputSlot(0).SetMemoryStrategy(0, MemoryStrategy::DirectCompatibility);
- convLayer1->GetOutputSlot(0).SetMemoryStrategy(0, MemoryStrategy::CopyToTarget);
- convLayer1->GetOutputSlot(0).SetMemoryStrategy(1, MemoryStrategy::DirectCompatibility);
- convLayer2->GetOutputSlot(0).SetMemoryStrategy(0, MemoryStrategy::CopyToTarget);
- concatLayer->GetOutputSlot(0).SetMemoryStrategy(0, MemoryStrategy::DirectCompatibility);
- actLayer->GetOutputSlot(0).SetMemoryStrategy(0, MemoryStrategy::DirectCompatibility);
- softmaxLayer->GetOutputSlot(0).SetMemoryStrategy(0, MemoryStrategy::CopyToTarget);
+ inputLayer->GetOutputSlot(0).SetEdgeStrategy(0, EdgeStrategy::DirectCompatibility);
+ convLayer1->GetOutputSlot(0).SetEdgeStrategy(0, EdgeStrategy::CopyToTarget);
+ convLayer1->GetOutputSlot(0).SetEdgeStrategy(1, EdgeStrategy::DirectCompatibility);
+ convLayer2->GetOutputSlot(0).SetEdgeStrategy(0, EdgeStrategy::CopyToTarget);
+ concatLayer->GetOutputSlot(0).SetEdgeStrategy(0, EdgeStrategy::DirectCompatibility);
+ actLayer->GetOutputSlot(0).SetEdgeStrategy(0, EdgeStrategy::DirectCompatibility);
+ softmaxLayer->GetOutputSlot(0).SetEdgeStrategy(0, EdgeStrategy::CopyToTarget);
}
armnn::TensorInfo m_TensorDesc;
@@ -529,7 +529,7 @@ BOOST_FIXTURE_TEST_CASE(AddCopyLayers, CopyLayersFixture)
{
InitialiseTestGraph();
const armnn::Graph origGraph(m_Graph);
- m_Graph.AddCopyLayers(m_Backends, m_FactoryRegistry);
+ m_Graph.AddCompatibilityLayers(m_Backends, m_FactoryRegistry);
TestGraphAfterAddingCopyLayers(m_Graph, origGraph);
}
@@ -537,13 +537,13 @@ BOOST_FIXTURE_TEST_CASE(AddCopyLayers, CopyLayersFixture)
BOOST_FIXTURE_TEST_CASE(AddCopyLayersSeveralTimes, CopyLayersFixture)
{
InitialiseTestGraph();
- m_Graph.AddCopyLayers(m_Backends, m_FactoryRegistry);
+ m_Graph.AddCompatibilityLayers(m_Backends, m_FactoryRegistry);
- // Calling AddCopyLayers() several times should not change the connections.
+ // Calling AddCompatibilityLayers() several times should not change the connections.
const std::vector<Edge> edges = GetEdgeList(m_Graph);
for (int i = 0; i < 4; ++i)
{
- m_Graph.AddCopyLayers(m_Backends, m_FactoryRegistry);
+ m_Graph.AddCompatibilityLayers(m_Backends, m_FactoryRegistry);
const std::vector<Edge> otherEdges = GetEdgeList(m_Graph);
BOOST_TEST((edges == otherEdges));
}
@@ -571,18 +571,18 @@ BOOST_FIXTURE_TEST_CASE(CopyLayersAddedBetweenSameLayersHaveDifferentNames, Copy
splitterLayer->GetOutputSlot(1).Connect(additionLayer->GetInputSlot(1));
additionLayer->GetOutputSlot(0).Connect(outputLayer->GetInputSlot(0));
- inputLayer->GetOutputSlot(0).SetMemoryStrategy(0, armnn::MemoryStrategy::DirectCompatibility);
- splitterLayer->GetOutputSlot(0).SetMemoryStrategy(0, armnn::MemoryStrategy::CopyToTarget);
- splitterLayer->GetOutputSlot(1).SetMemoryStrategy(0, armnn::MemoryStrategy::CopyToTarget);
- additionLayer->GetOutputSlot(0).SetMemoryStrategy(0, armnn::MemoryStrategy::DirectCompatibility);
+ inputLayer->GetOutputSlot(0).SetEdgeStrategy(0, armnn::EdgeStrategy::DirectCompatibility);
+ splitterLayer->GetOutputSlot(0).SetEdgeStrategy(0, armnn::EdgeStrategy::CopyToTarget);
+ splitterLayer->GetOutputSlot(1).SetEdgeStrategy(0, armnn::EdgeStrategy::CopyToTarget);
+ additionLayer->GetOutputSlot(0).SetEdgeStrategy(0, armnn::EdgeStrategy::DirectCompatibility);
- graph.AddCopyLayers(m_Backends, m_FactoryRegistry);
+ graph.AddCompatibilityLayers(m_Backends, m_FactoryRegistry);
std::vector<Edge> edges = GetEdgeList(graph);
BOOST_CHECK(edges.size() == 6u);
std::sort(edges.begin(), edges.end());
auto last = std::unique(edges.begin(), edges.end());
- BOOST_CHECK_MESSAGE(last == edges.end(), "Found duplicated edges after AddCopyLayers()");
+ BOOST_CHECK_MESSAGE(last == edges.end(), "Found duplicated edges after AddCompatibilityLayers()");
}
BOOST_AUTO_TEST_CASE(DuplicateLayerNames)
diff --git a/src/armnn/test/TensorHandleStrategyTest.cpp b/src/armnn/test/TensorHandleStrategyTest.cpp
index 3bb1c68169..c391b04d97 100644
--- a/src/armnn/test/TensorHandleStrategyTest.cpp
+++ b/src/armnn/test/TensorHandleStrategyTest.cpp
@@ -50,9 +50,11 @@ public:
return nullptr;
}
- virtual const FactoryId GetId() const override { return m_Id; }
+ const FactoryId GetId() const override { return m_Id; }
- virtual bool SupportsSubTensors() const override { return true; }
+ bool SupportsSubTensors() const override { return true; }
+
+ MemorySourceFlags GetExportFlags() const override { return 1; }
private:
FactoryId m_Id = "UninitializedId";
@@ -60,6 +62,38 @@ private:
std::weak_ptr<IMemoryManager> m_MemMgr;
};
+class TestFactoryImport : public ITensorHandleFactory
+{
+public:
+ TestFactoryImport(std::weak_ptr<IMemoryManager> mgr, ITensorHandleFactory::FactoryId id)
+ : m_Id(id)
+ , m_MemMgr(mgr)
+ {}
+
+ std::unique_ptr<ITensorHandle> CreateSubTensorHandle(ITensorHandle& parent,
+ TensorShape const& subTensorShape,
+ unsigned int const* subTensorOrigin) const override
+ {
+ return nullptr;
+ }
+
+ std::unique_ptr<ITensorHandle> CreateTensorHandle(const TensorInfo& tensorInfo) const override
+ {
+ return nullptr;
+ }
+
+ const FactoryId GetId() const override { return m_Id; }
+
+ bool SupportsSubTensors() const override { return true; }
+
+ MemorySourceFlags GetImportFlags() const override { return 1; }
+
+private:
+ FactoryId m_Id = "ImporterId";
+
+ std::weak_ptr<IMemoryManager> m_MemMgr;
+};
+
class TestBackendA : public IBackendInternal
{
public:
@@ -173,6 +207,42 @@ private:
BackendId m_Id = "BackendC";
};
+class TestBackendD : public IBackendInternal
+{
+public:
+ TestBackendD() = default;
+
+ const BackendId& GetId() const override { return m_Id; }
+
+ IWorkloadFactoryPtr CreateWorkloadFactory(const IMemoryManagerSharedPtr& memoryManager = nullptr) const override
+ {
+ return IWorkloadFactoryPtr{};
+ }
+
+ IBackendInternal::ILayerSupportSharedPtr GetLayerSupport() const override
+ {
+ return ILayerSupportSharedPtr{};
+ }
+
+ std::vector<ITensorHandleFactory::FactoryId> GetHandleFactoryPreferences() const override
+ {
+ return std::vector<ITensorHandleFactory::FactoryId>{
+ "TestHandleFactoryD1"
+ };
+ }
+
+ void RegisterTensorHandleFactories(TensorHandleFactoryRegistry& registry) override
+ {
+ auto mgr = std::make_shared<TestMemMgr>();
+
+ registry.RegisterMemoryManager(mgr);
+ registry.RegisterFactory(std::make_unique<TestFactoryImport>(mgr, "TestHandleFactoryD1"));
+ }
+
+private:
+ BackendId m_Id = "BackendD";
+};
+
BOOST_AUTO_TEST_SUITE(TensorHandle)
@@ -200,16 +270,19 @@ BOOST_AUTO_TEST_CASE(TensorHandleSelectionStrategy)
auto backendA = std::make_unique<TestBackendA>();
auto backendB = std::make_unique<TestBackendB>();
auto backendC = std::make_unique<TestBackendC>();
+ auto backendD = std::make_unique<TestBackendD>();
TensorHandleFactoryRegistry registry;
backendA->RegisterTensorHandleFactories(registry);
backendB->RegisterTensorHandleFactories(registry);
backendC->RegisterTensorHandleFactories(registry);
+ backendD->RegisterTensorHandleFactories(registry);
BackendsMap backends;
backends["BackendA"] = std::move(backendA);
backends["BackendB"] = std::move(backendB);
backends["BackendC"] = std::move(backendC);
+ backends["BackendD"] = std::move(backendD);
armnn::Graph graph;
@@ -226,13 +299,17 @@ BOOST_AUTO_TEST_CASE(TensorHandleSelectionStrategy)
armnn::SoftmaxLayer* const softmaxLayer3 = graph.AddLayer<armnn::SoftmaxLayer>(smDesc, "softmax3");
softmaxLayer3->SetBackendId("BackendC");
+ armnn::SoftmaxLayer* const softmaxLayer4 = graph.AddLayer<armnn::SoftmaxLayer>(smDesc, "softmax4");
+ softmaxLayer4->SetBackendId("BackendD");
+
armnn::OutputLayer* const outputLayer = graph.AddLayer<armnn::OutputLayer>(0, "output");
outputLayer->SetBackendId("BackendA");
inputLayer->GetOutputSlot(0).Connect(softmaxLayer1->GetInputSlot(0));
softmaxLayer1->GetOutputSlot(0).Connect(softmaxLayer2->GetInputSlot(0));
softmaxLayer2->GetOutputSlot(0).Connect(softmaxLayer3->GetInputSlot(0));
- softmaxLayer3->GetOutputSlot(0).Connect(outputLayer->GetInputSlot(0));
+ softmaxLayer3->GetOutputSlot(0).Connect(softmaxLayer4->GetInputSlot(0));
+ softmaxLayer4->GetOutputSlot(0).Connect(outputLayer->GetInputSlot(0));
graph.TopologicalSort();
@@ -246,29 +323,45 @@ BOOST_AUTO_TEST_CASE(TensorHandleSelectionStrategy)
OutputSlot& softmaxLayer1Out = softmaxLayer1->GetOutputSlot(0);
OutputSlot& softmaxLayer2Out = softmaxLayer2->GetOutputSlot(0);
OutputSlot& softmaxLayer3Out = softmaxLayer3->GetOutputSlot(0);
+ OutputSlot& softmaxLayer4Out = softmaxLayer4->GetOutputSlot(0);
// Check that the correct factory was selected
BOOST_TEST(inputLayerOut.GetTensorHandleFactoryId() == "TestHandleFactoryA1");
BOOST_TEST(softmaxLayer1Out.GetTensorHandleFactoryId() == "TestHandleFactoryB1");
BOOST_TEST(softmaxLayer2Out.GetTensorHandleFactoryId() == "TestHandleFactoryB1");
BOOST_TEST(softmaxLayer3Out.GetTensorHandleFactoryId() == "TestHandleFactoryC1");
+ BOOST_TEST(softmaxLayer4Out.GetTensorHandleFactoryId() == "TestHandleFactoryD1");
// Check that the correct strategy was selected
- BOOST_TEST((inputLayerOut.GetMemoryStrategyForConnection(0) == MemoryStrategy::DirectCompatibility));
- BOOST_TEST((softmaxLayer1Out.GetMemoryStrategyForConnection(0) == MemoryStrategy::DirectCompatibility));
- BOOST_TEST((softmaxLayer2Out.GetMemoryStrategyForConnection(0) == MemoryStrategy::CopyToTarget));
- BOOST_TEST((softmaxLayer3Out.GetMemoryStrategyForConnection(0) == MemoryStrategy::DirectCompatibility));
-
- graph.AddCopyLayers(backends, registry);
- int count= 0;
- graph.ForEachLayer([&count](Layer* layer)
+ BOOST_TEST((inputLayerOut.GetEdgeStrategyForConnection(0) == EdgeStrategy::DirectCompatibility));
+ BOOST_TEST((softmaxLayer1Out.GetEdgeStrategyForConnection(0) == EdgeStrategy::DirectCompatibility));
+ BOOST_TEST((softmaxLayer2Out.GetEdgeStrategyForConnection(0) == EdgeStrategy::CopyToTarget));
+ BOOST_TEST((softmaxLayer3Out.GetEdgeStrategyForConnection(0) == EdgeStrategy::ExportToTarget));
+ BOOST_TEST((softmaxLayer4Out.GetEdgeStrategyForConnection(0) == EdgeStrategy::DirectCompatibility));
+
+ graph.AddCompatibilityLayers(backends, registry);
+
+ // Test for copy layers
+ int copyCount= 0;
+ graph.ForEachLayer([&copyCount](Layer* layer)
{
if (layer->GetType() == LayerType::MemCopy)
{
- count++;
+ copyCount++;
+ }
+ });
+ BOOST_TEST(copyCount == 1);
+
+ // Test for import layers
+ int importCount= 0;
+ graph.ForEachLayer([&importCount](Layer *layer)
+ {
+ if (layer->GetType() == LayerType::MemImport)
+ {
+ importCount++;
}
});
- BOOST_TEST(count == 1);
+ BOOST_TEST(importCount == 1);
}
BOOST_AUTO_TEST_SUITE_END()
diff --git a/src/backends/backendsCommon/CMakeLists.txt b/src/backends/backendsCommon/CMakeLists.txt
index 653f3727ee..44131ea1b5 100644
--- a/src/backends/backendsCommon/CMakeLists.txt
+++ b/src/backends/backendsCommon/CMakeLists.txt
@@ -20,11 +20,16 @@ list(APPEND armnnBackendsCommon_sources
ITensorHandleFactory.hpp
LayerSupportBase.cpp
LayerSupportBase.hpp
+ LayerSupportRules.hpp
IMemoryManager.hpp
ITensorHandle.hpp
MakeWorkloadHelper.hpp
MemCopyWorkload.cpp
MemCopyWorkload.hpp
+ MemImportWorkload.cpp
+ MemImportWorkload.hpp
+ MemSyncWorkload.cpp
+ MemSyncWorkload.hpp
OptimizationViews.cpp
OptimizationViews.hpp
OutputHandler.cpp
diff --git a/src/backends/backendsCommon/ITensorHandle.hpp b/src/backends/backendsCommon/ITensorHandle.hpp
index 176b021d76..e1b80b874a 100644
--- a/src/backends/backendsCommon/ITensorHandle.hpp
+++ b/src/backends/backendsCommon/ITensorHandle.hpp
@@ -4,6 +4,8 @@
//
#pragma once
+#include <armnn/MemorySources.hpp>
+
namespace armnn
{
@@ -61,6 +63,15 @@ public:
// Testing support to be able to verify and set tensor data content
virtual void CopyOutTo(void* memory) const = 0;
virtual void CopyInFrom(const void* memory) = 0;
+
+ /// Get flags describing supported import sources.
+ virtual unsigned int GetImportFlags() const { return 0; }
+
+ /// Import externally allocated memory
+ /// \param memory base address of the memory being imported.
+ /// \param source source of the allocation for the memory being imported.
+ /// \return true on success or false on failure
+ virtual bool Import(void* memory, MemorySource source) { return false; };
};
}
diff --git a/src/backends/backendsCommon/ITensorHandleFactory.hpp b/src/backends/backendsCommon/ITensorHandleFactory.hpp
index 7685061eb3..89a2a7fa3b 100644
--- a/src/backends/backendsCommon/ITensorHandleFactory.hpp
+++ b/src/backends/backendsCommon/ITensorHandleFactory.hpp
@@ -5,8 +5,9 @@
#pragma once
-#include <armnn/Types.hpp>
#include <armnn/IRuntime.hpp>
+#include <armnn/MemorySources.hpp>
+#include <armnn/Types.hpp>
namespace armnn
{
@@ -20,7 +21,6 @@ public:
virtual ~ITensorHandleFactory() {}
-
virtual std::unique_ptr<ITensorHandle> CreateSubTensorHandle(ITensorHandle& parent,
TensorShape const& subTensorShape,
unsigned int const* subTensorOrigin) const = 0;
@@ -33,17 +33,16 @@ public:
virtual bool SupportsMapUnmap() const final { return true; }
- virtual bool SupportsExport() const final { return false; }
-
- virtual bool SupportsImport() const final { return false; }
+ virtual MemorySourceFlags GetExportFlags() const { return 0; }
+ virtual MemorySourceFlags GetImportFlags() const { return 0; }
};
-enum class MemoryStrategy
+enum class EdgeStrategy
{
- Undefined,
- DirectCompatibility, // Only allocate the tensorhandle using the assigned factory
- CopyToTarget, // Default + Insert MemCopy node before target
- ExportToTarget, // Default + Insert Import node
+ Undefined, /// No strategy has been defined. Used internally to verify integrity of optimizations.
+ DirectCompatibility, /// Destination backend can work directly with tensors on source backend.
+ ExportToTarget, /// Source backends tensor data can be exported to destination backend tensor without copy.
+ CopyToTarget /// Copy contents from source backend tensor to destination backend tensor.
};
} //namespace armnn
diff --git a/src/backends/backendsCommon/LayerSupportBase.cpp b/src/backends/backendsCommon/LayerSupportBase.cpp
index f202fedb4f..ee8dc5f7e9 100644
--- a/src/backends/backendsCommon/LayerSupportBase.cpp
+++ b/src/backends/backendsCommon/LayerSupportBase.cpp
@@ -7,6 +7,8 @@
#include <armnn/Exceptions.hpp>
+#include <boost/core/ignore_unused.hpp>
+
namespace
{
@@ -252,7 +254,18 @@ bool LayerSupportBase::IsMemCopySupported(const armnn::TensorInfo& input,
const armnn::TensorInfo& output,
armnn::Optional<std::string &> reasonIfUnsupported) const
{
- return DefaultLayerSupport(__func__, __FILE__, __LINE__, reasonIfUnsupported);
+ boost::ignore_unused(input);
+ boost::ignore_unused(output);
+ return true;
+}
+
+bool LayerSupportBase::IsMemImportSupported(const armnn::TensorInfo& input,
+ const armnn::TensorInfo& output,
+ armnn::Optional<std::string &> reasonIfUnsupported) const
+{
+ boost::ignore_unused(input);
+ boost::ignore_unused(output);
+ return true;
}
bool LayerSupportBase::IsMergeSupported(const TensorInfo& input0,
diff --git a/src/backends/backendsCommon/LayerSupportBase.hpp b/src/backends/backendsCommon/LayerSupportBase.hpp
index c860e34874..0d5a2af16e 100644
--- a/src/backends/backendsCommon/LayerSupportBase.hpp
+++ b/src/backends/backendsCommon/LayerSupportBase.hpp
@@ -157,6 +157,10 @@ public:
const TensorInfo& output,
Optional<std::string&> reasonIfUnsupported = EmptyOptional()) const override;
+ bool IsMemImportSupported(const TensorInfo& input,
+ const TensorInfo& output,
+ Optional<std::string&> reasonIfUnsupported = EmptyOptional()) const override;
+
bool IsMergeSupported(const TensorInfo& input0,
const TensorInfo& input1,
const TensorInfo& output,
diff --git a/src/backends/backendsCommon/LayerSupportRules.hpp b/src/backends/backendsCommon/LayerSupportRules.hpp
new file mode 100644
index 0000000000..db3f38ccbb
--- /dev/null
+++ b/src/backends/backendsCommon/LayerSupportRules.hpp
@@ -0,0 +1,185 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#pragma once
+
+#include <boost/assert.hpp>
+#include <algorithm>
+
+namespace armnn
+{
+
+namespace
+{
+
+inline armnn::Optional<armnn::DataType> GetBiasTypeFromWeightsType(armnn::Optional<armnn::DataType> weightsType)
+{
+ if (!weightsType)
+ {
+ return weightsType;
+ }
+
+ switch(weightsType.value())
+ {
+ case armnn::DataType::Float16:
+ case armnn::DataType::Float32:
+ return weightsType;
+ case armnn::DataType::QuantisedAsymm8:
+ return armnn::DataType::Signed32;
+ case armnn::DataType::QuantisedSymm16:
+ return armnn::DataType::Signed32;
+ default:
+ BOOST_ASSERT_MSG(false, "GetBiasTypeFromWeightsType(): Unsupported data type.");
+ }
+ return armnn::EmptyOptional();
+}
+
+} //namespace
+
+template<typename F>
+bool CheckSupportRule(F rule, Optional<std::string&> reasonIfUnsupported, const char* reason)
+{
+ bool supported = rule();
+ if (!supported && reason)
+ {
+ reasonIfUnsupported.value() += std::string(reason) + "\n"; // Append the reason on a new line
+ }
+ return supported;
+}
+
+struct Rule
+{
+ bool operator()() const
+ {
+ return m_Res;
+ }
+
+ bool m_Res = true;
+};
+
+template<typename T>
+bool AllTypesAreEqualImpl(T t)
+{
+ return true;
+}
+
+template<typename T, typename... Rest>
+bool AllTypesAreEqualImpl(T t1, T t2, Rest... rest)
+{
+ static_assert(std::is_same<T, TensorInfo>::value, "Type T must be a TensorInfo");
+
+ return (t1.GetDataType() == t2.GetDataType()) && AllTypesAreEqualImpl(t2, rest...);
+}
+
+struct TypesAreEqual : public Rule
+{
+ template<typename ... Ts>
+ TypesAreEqual(const Ts&... ts)
+ {
+ m_Res = AllTypesAreEqualImpl(ts...);
+ }
+};
+
+struct QuantizationParametersAreEqual : public Rule
+{
+ QuantizationParametersAreEqual(const TensorInfo& info0, const TensorInfo& info1)
+ {
+ m_Res = info0.GetQuantizationScale() == info1.GetQuantizationScale() &&
+ info0.GetQuantizationOffset() == info1.GetQuantizationOffset();
+ }
+};
+
+struct TypeAnyOf : public Rule
+{
+ template<typename Container>
+ TypeAnyOf(const TensorInfo& info, const Container& c)
+ {
+ m_Res = std::any_of(c.begin(), c.end(), [&info](DataType dt)
+ {
+ return dt == info.GetDataType();
+ });
+ }
+};
+
+struct TypeIs : public Rule
+{
+ TypeIs(const TensorInfo& info, DataType dt)
+ {
+ m_Res = dt == info.GetDataType();
+ }
+};
+
+struct BiasAndWeightsTypesMatch : public Rule
+{
+ BiasAndWeightsTypesMatch(const TensorInfo& biases, const TensorInfo& weights)
+ {
+ m_Res = biases.GetDataType() == GetBiasTypeFromWeightsType(weights.GetDataType()).value();
+ }
+};
+
+struct BiasAndWeightsTypesCompatible : public Rule
+{
+ template<typename Container>
+ BiasAndWeightsTypesCompatible(const TensorInfo& info, const Container& c)
+ {
+ m_Res = std::any_of(c.begin(), c.end(), [&info](DataType dt)
+ {
+ return dt == GetBiasTypeFromWeightsType(info.GetDataType()).value();
+ });
+ }
+};
+
+struct ShapesAreSameRank : public Rule
+{
+ ShapesAreSameRank(const TensorInfo& info0, const TensorInfo& info1)
+ {
+ m_Res = info0.GetShape().GetNumDimensions() == info1.GetShape().GetNumDimensions();
+ }
+};
+
+struct ShapesAreSameTotalSize : public Rule
+{
+ ShapesAreSameTotalSize(const TensorInfo& info0, const TensorInfo& info1)
+ {
+ m_Res = info0.GetNumElements() == info1.GetNumElements();
+ }
+};
+
+struct ShapesAreBroadcastCompatible : public Rule
+{
+ unsigned int CalcInputSize(const TensorShape& in, const TensorShape& out, unsigned int idx)
+ {
+ unsigned int offset = out.GetNumDimensions() - in.GetNumDimensions();
+ unsigned int sizeIn = (idx < offset) ? 1 : in[idx-offset];
+ return sizeIn;
+ }
+
+ ShapesAreBroadcastCompatible(const TensorInfo& in0, const TensorInfo& in1, const TensorInfo& out)
+ {
+ const TensorShape& shape0 = in0.GetShape();
+ const TensorShape& shape1 = in1.GetShape();
+ const TensorShape& outShape = out.GetShape();
+
+ for (unsigned int i=0; i < outShape.GetNumDimensions() && m_Res; i++)
+ {
+ unsigned int sizeOut = outShape[i];
+ unsigned int sizeIn0 = CalcInputSize(shape0, outShape, i);
+ unsigned int sizeIn1 = CalcInputSize(shape1, outShape, i);
+
+ m_Res &= ((sizeIn0 == sizeOut) || (sizeIn0 == 1)) &&
+ ((sizeIn1 == sizeOut) || (sizeIn1 == 1));
+ }
+ }
+};
+
+struct TensorNumDimensionsAreCorrect : public Rule
+{
+ TensorNumDimensionsAreCorrect(const TensorInfo& info, unsigned int expectedNumDimensions)
+ {
+ m_Res = info.GetNumDimensions() == expectedNumDimensions;
+ }
+};
+
+} //namespace armnn \ No newline at end of file
diff --git a/src/backends/backendsCommon/MemImportWorkload.cpp b/src/backends/backendsCommon/MemImportWorkload.cpp
new file mode 100644
index 0000000000..ed00241bb6
--- /dev/null
+++ b/src/backends/backendsCommon/MemImportWorkload.cpp
@@ -0,0 +1,34 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#include "MemImportWorkload.hpp"
+
+#include "CpuTensorHandle.hpp"
+
+#include <ResolveType.hpp>
+
+#include <boost/cast.hpp>
+
+#include <cstring>
+
+namespace armnn
+{
+
+ImportMemGenericWorkload::ImportMemGenericWorkload(const MemImportQueueDescriptor& descriptor,
+ const WorkloadInfo& info)
+ : BaseWorkload<MemImportQueueDescriptor>(descriptor, info)
+{
+ m_TensorHandlePairs = std::make_pair(descriptor.m_Inputs[0], descriptor.m_Outputs[0]);
+}
+
+void ImportMemGenericWorkload::Execute() const
+{
+ ARMNN_SCOPED_PROFILING_EVENT(Compute::Undefined, "ImportMemGeneric_Execute");
+
+ m_TensorHandlePairs.second->Import(const_cast<void*>(m_TensorHandlePairs.first->Map(true)), MemorySource::Malloc);
+ m_TensorHandlePairs.first->Unmap();
+}
+
+} //namespace armnn
diff --git a/src/backends/backendsCommon/MemImportWorkload.hpp b/src/backends/backendsCommon/MemImportWorkload.hpp
new file mode 100644
index 0000000000..e16b99e9e0
--- /dev/null
+++ b/src/backends/backendsCommon/MemImportWorkload.hpp
@@ -0,0 +1,27 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+#pragma once
+
+#include "CpuTensorHandleFwd.hpp"
+#include "Workload.hpp"
+#include "WorkloadUtils.hpp"
+
+#include <utility>
+
+namespace armnn
+{
+
+class ImportMemGenericWorkload : public BaseWorkload<MemImportQueueDescriptor>
+{
+public:
+ ImportMemGenericWorkload(const MemImportQueueDescriptor& descriptor, const WorkloadInfo& info);
+ void Execute() const override;
+
+private:
+ using TensorHandlePair = std::pair<const ITensorHandle*, ITensorHandle*>;
+ TensorHandlePair m_TensorHandlePairs;
+};
+
+} //namespace armnn
diff --git a/src/backends/backendsCommon/MemSyncWorkload.cpp b/src/backends/backendsCommon/MemSyncWorkload.cpp
new file mode 100644
index 0000000000..a1d309cefb
--- /dev/null
+++ b/src/backends/backendsCommon/MemSyncWorkload.cpp
@@ -0,0 +1,33 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#include "MemSyncWorkload.hpp"
+
+#include "CpuTensorHandle.hpp"
+
+#include <ResolveType.hpp>
+
+#include <boost/cast.hpp>
+
+#include <cstring>
+
+namespace armnn
+{
+
+SyncMemGenericWorkload::SyncMemGenericWorkload(const MemSyncQueueDescriptor& descriptor,
+ const WorkloadInfo& info)
+ : BaseWorkload<MemSyncQueueDescriptor>(descriptor, info)
+{
+ m_TensorHandle = descriptor.m_Inputs[0];
+}
+
+void SyncMemGenericWorkload::Execute() const
+{
+ ARMNN_SCOPED_PROFILING_EVENT(Compute::Undefined, "SyncMemGeneric_Execute");
+ m_TensorHandle->Map(true);
+ m_TensorHandle->Unmap();
+}
+
+} //namespace armnn
diff --git a/src/backends/backendsCommon/MemSyncWorkload.hpp b/src/backends/backendsCommon/MemSyncWorkload.hpp
new file mode 100644
index 0000000000..3a167d2a00
--- /dev/null
+++ b/src/backends/backendsCommon/MemSyncWorkload.hpp
@@ -0,0 +1,26 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+#pragma once
+
+#include "CpuTensorHandleFwd.hpp"
+#include "Workload.hpp"
+#include "WorkloadUtils.hpp"
+
+#include <utility>
+
+namespace armnn
+{
+
+class SyncMemGenericWorkload : public BaseWorkload<MemSyncQueueDescriptor>
+{
+public:
+ SyncMemGenericWorkload(const MemSyncQueueDescriptor& descriptor, const WorkloadInfo& info);
+ void Execute() const override;
+
+private:
+ ITensorHandle* m_TensorHandle;
+};
+
+} //namespace armnn
diff --git a/src/backends/backendsCommon/WorkloadData.cpp b/src/backends/backendsCommon/WorkloadData.cpp
index a4d35827fa..1c607da707 100644
--- a/src/backends/backendsCommon/WorkloadData.cpp
+++ b/src/backends/backendsCommon/WorkloadData.cpp
@@ -351,6 +351,109 @@ void MemCopyQueueDescriptor::Validate(const WorkloadInfo& workloadInfo) const
}
}
+//---------------------------------------------------------------
+void MemImportQueueDescriptor::Validate(const WorkloadInfo& workloadInfo) const
+{
+ ValidateNumInputs(workloadInfo, "MemImportQueueDescriptor", 1);
+ ValidateNumOutputs(workloadInfo, "MemImportQueueDescriptor" , 1);
+
+ if (workloadInfo.m_InputTensorInfos.size() != 1)
+ {
+ throw InvalidArgumentException(boost::str(
+ boost::format("Number of input infos (%1%) is not 1.")
+ % workloadInfo.m_InputTensorInfos.size()));
+
+ }
+
+ if (workloadInfo.m_InputTensorInfos.size() != workloadInfo.m_OutputTensorInfos.size())
+ {
+ throw InvalidArgumentException(boost::str(
+ boost::format("Number of input infos (%1%) does not match the number of output infos (%2%)")
+ % workloadInfo.m_InputTensorInfos.size() % workloadInfo.m_OutputTensorInfos.size()));
+ }
+
+ for (std::size_t i = 0; i < workloadInfo.m_InputTensorInfos.size(); ++i)
+ {
+ if (workloadInfo.m_InputTensorInfos[i].GetNumElements() !=
+ workloadInfo.m_OutputTensorInfos[i].GetNumElements())
+ {
+ throw InvalidArgumentException(boost::str(
+ boost::format("Number of elements for tensor input and output %1% does not match")
+ % i ));
+ }
+ }
+
+ if (m_Inputs.size() != 1)
+ {
+ throw InvalidArgumentException(boost::str(
+ boost::format("Number of inputs (%1%) is not 1.")
+ % m_Inputs.size()));
+ }
+
+ if (m_Inputs.size() != m_Outputs.size())
+ {
+ throw InvalidArgumentException(boost::str(
+ boost::format("Number of inputs (%1%) does not match the number of outputs (%2%)")
+ % m_Inputs.size() % m_Outputs.size()));
+ }
+
+ for (unsigned int i = 0; i < m_Inputs.size(); ++i)
+ {
+ if (!m_Inputs[i])
+ {
+ throw InvalidArgumentException(boost::str(boost::format("Invalid null input %1%") % i));
+ }
+
+ if (!m_Outputs[i])
+ {
+ throw InvalidArgumentException(boost::str(boost::format("Invalid null output %1%") % i));
+ }
+ }
+}
+
+//---------------------------------------------------------------
+void MemSyncQueueDescriptor::Validate(const WorkloadInfo& workloadInfo) const
+{
+ ValidateNumInputs(workloadInfo, "MemSyncQueueDescriptor", 1);
+ ValidateNumOutputs(workloadInfo, "MemSyncQueueDescriptor" , 1);
+
+ if (workloadInfo.m_InputTensorInfos.size() != 1)
+ {
+ throw InvalidArgumentException(boost::str(
+ boost::format("Number of input infos (%1%) is not 1.")
+ % workloadInfo.m_InputTensorInfos.size()));
+
+ }
+
+ if (workloadInfo.m_OutputTensorInfos.size() != 0)
+ {
+ throw InvalidArgumentException(boost::str(
+ boost::format("Number of output infos (%1%) is not 0.")
+ % workloadInfo.m_InputTensorInfos.size()));
+
+ }
+
+ if (m_Inputs.size() != 1)
+ {
+ throw InvalidArgumentException(boost::str(
+ boost::format("Number of inputs (%1%) is not 1.")
+ % m_Inputs.size()));
+ }
+
+ if (m_Outputs.size() != 0)
+ {
+ throw InvalidArgumentException(boost::str(
+ boost::format("Number of outputs (%1%) is not 0.")
+ % m_Inputs.size() % m_Outputs.size()));
+ }
+
+ if (!m_Inputs[0])
+ {
+ throw InvalidArgumentException(boost::str(boost::format("Invalid null input 0")));
+ }
+}
+
+//---------------------------------------------------------------
void ActivationQueueDescriptor::Validate(const WorkloadInfo& workloadInfo) const
{
const std::string descriptorName{"ActivationQueueDescriptor"};
diff --git a/src/backends/backendsCommon/WorkloadData.hpp b/src/backends/backendsCommon/WorkloadData.hpp
index d790dafd58..c055beb88d 100644
--- a/src/backends/backendsCommon/WorkloadData.hpp
+++ b/src/backends/backendsCommon/WorkloadData.hpp
@@ -63,6 +63,16 @@ struct MemCopyQueueDescriptor : QueueDescriptor
using InputQueueDescriptor = MemCopyQueueDescriptor;
using OutputQueueDescriptor = MemCopyQueueDescriptor;
+struct MemImportQueueDescriptor : QueueDescriptor
+{
+ void Validate(const WorkloadInfo& workloadInfo) const;
+};
+
+struct MemSyncQueueDescriptor : QueueDescriptor
+{
+ void Validate(const WorkloadInfo& workloadInfo) const;
+};
+
// Softmax layer workload data.
struct SoftmaxQueueDescriptor : QueueDescriptorWithParameters<SoftmaxDescriptor>
{
diff --git a/src/backends/backendsCommon/WorkloadFactory.cpp b/src/backends/backendsCommon/WorkloadFactory.cpp
index 1f616f0b18..ffef5b4eb7 100644
--- a/src/backends/backendsCommon/WorkloadFactory.cpp
+++ b/src/backends/backendsCommon/WorkloadFactory.cpp
@@ -515,6 +515,16 @@ bool IWorkloadFactory::IsLayerSupported(const BackendId& backendId,
reason);
break;
}
+ case LayerType::MemImport:
+ {
+ const TensorInfo& input = layer.GetInputSlot(0).GetConnection()->GetTensorInfo();
+ const TensorInfo& output = layer.GetOutputSlot(0).GetTensorInfo();
+
+ result = layerSupportObject->IsMemImportSupported(OverrideDataType(input, dataType),
+ OverrideDataType(output, dataType),
+ reason);
+ break;
+ }
case LayerType::Merge:
{
const TensorInfo& input0 = layer.GetInputSlot(0).GetConnection()->GetTensorInfo();
@@ -1092,6 +1102,12 @@ std::unique_ptr<IWorkload> IWorkloadFactory::CreateMemCopy(const MemCopyQueueDes
return std::unique_ptr<IWorkload>();
}
+std::unique_ptr<IWorkload> IWorkloadFactory::CreateMemImport(const MemImportQueueDescriptor& descriptor,
+ const WorkloadInfo& info) const
+{
+ return std::unique_ptr<IWorkload>();
+}
+
std::unique_ptr<IWorkload> IWorkloadFactory::CreateMerge(const MergeQueueDescriptor& descriptor,
const WorkloadInfo& info) const
{
diff --git a/src/backends/backendsCommon/WorkloadFactory.hpp b/src/backends/backendsCommon/WorkloadFactory.hpp
index bd7f1c627b..a9c6049c37 100644
--- a/src/backends/backendsCommon/WorkloadFactory.hpp
+++ b/src/backends/backendsCommon/WorkloadFactory.hpp
@@ -124,6 +124,9 @@ public:
virtual std::unique_ptr<IWorkload> CreateMemCopy(const MemCopyQueueDescriptor& descriptor,
const WorkloadInfo& info) const;
+ virtual std::unique_ptr<IWorkload> CreateMemImport(const MemImportQueueDescriptor& descriptor,
+ const WorkloadInfo& info) const;
+
virtual std::unique_ptr<IWorkload> CreateMerge(const MergeQueueDescriptor& descriptor,
const WorkloadInfo& info) const;
diff --git a/src/backends/backendsCommon/common.mk b/src/backends/backendsCommon/common.mk
index 69bde81b0a..eee1dae0ff 100644
--- a/src/backends/backendsCommon/common.mk
+++ b/src/backends/backendsCommon/common.mk
@@ -14,6 +14,8 @@ COMMON_SOURCES := \
ITensorHandleFactory.cpp \
LayerSupportBase.cpp \
MemCopyWorkload.cpp \
+ MemImportWorkload.cpp \
+ MemSyncWorkload.cpp \
OptimizationViews.cpp \
OutputHandler.cpp \
TensorHandleFactoryRegistry.cpp \
diff --git a/src/backends/backendsCommon/test/IsLayerSupportedTestImpl.hpp b/src/backends/backendsCommon/test/IsLayerSupportedTestImpl.hpp
index 451c585adc..1f43c989d6 100644
--- a/src/backends/backendsCommon/test/IsLayerSupportedTestImpl.hpp
+++ b/src/backends/backendsCommon/test/IsLayerSupportedTestImpl.hpp
@@ -405,6 +405,8 @@ DECLARE_LAYER_POLICY_2_PARAM(Convolution2d)
DECLARE_LAYER_POLICY_1_PARAM(MemCopy)
+DECLARE_LAYER_POLICY_1_PARAM(MemImport)
+
DECLARE_LAYER_POLICY_1_PARAM(Debug)
DECLARE_LAYER_POLICY_2_PARAM(DepthwiseConvolution2d)
diff --git a/src/backends/cl/ClLayerSupport.cpp b/src/backends/cl/ClLayerSupport.cpp
index 4ea6f2db3a..cca265f30c 100644
--- a/src/backends/cl/ClLayerSupport.cpp
+++ b/src/backends/cl/ClLayerSupport.cpp
@@ -448,15 +448,6 @@ bool ClLayerSupport::IsMeanSupported(const TensorInfo& input,
descriptor);
}
-bool ClLayerSupport::IsMemCopySupported(const TensorInfo &input,
- const TensorInfo &output,
- Optional<std::string &> reasonIfUnsupported) const
-{
- ignore_unused(input);
- ignore_unused(output);
- return true;
-}
-
bool ClLayerSupport::IsMergerSupported(const std::vector<const TensorInfo*> inputs,
const TensorInfo& output,
const MergerDescriptor& descriptor,
diff --git a/src/backends/cl/ClLayerSupport.hpp b/src/backends/cl/ClLayerSupport.hpp
index a367085eef..69c2280a59 100644
--- a/src/backends/cl/ClLayerSupport.hpp
+++ b/src/backends/cl/ClLayerSupport.hpp
@@ -127,10 +127,6 @@ public:
const MeanDescriptor& descriptor,
Optional<std::string&> reasonIfUnsupported = EmptyOptional()) const override;
- bool IsMemCopySupported(const TensorInfo& input,
- const TensorInfo& output,
- Optional<std::string&> reasonIfUnsupported = EmptyOptional()) const override;
-
ARMNN_DEPRECATED_MSG("Use IsConcatSupported instead")
bool IsMergerSupported(const std::vector<const TensorInfo*> inputs,
const TensorInfo& output,
diff --git a/src/backends/cl/ClWorkloadFactory.cpp b/src/backends/cl/ClWorkloadFactory.cpp
index d72fa92a30..6e91dd07a5 100644
--- a/src/backends/cl/ClWorkloadFactory.cpp
+++ b/src/backends/cl/ClWorkloadFactory.cpp
@@ -13,6 +13,7 @@
#include <backendsCommon/CpuTensorHandle.hpp>
#include <backendsCommon/MakeWorkloadHelper.hpp>
#include <backendsCommon/MemCopyWorkload.hpp>
+#include <backendsCommon/MemImportWorkload.hpp>
#include <cl/ClTensorHandle.hpp>
#include <cl/workloads/ClWorkloads.hpp>
@@ -257,6 +258,17 @@ std::unique_ptr<armnn::IWorkload> ClWorkloadFactory::CreateMemCopy(const MemCopy
return MakeWorkload<CopyMemGenericWorkload, CopyMemGenericWorkload>(descriptor, info);
}
+std::unique_ptr<armnn::IWorkload> ClWorkloadFactory::CreateMemImport(const MemImportQueueDescriptor& descriptor,
+ const WorkloadInfo& info) const
+{
+ if (descriptor.m_Inputs.empty() || !descriptor.m_Inputs[0])
+ {
+ throw InvalidArgumentException("ClWorkloadFactory: Invalid null input for MemImport workload");
+ }
+
+ return std::make_unique<ImportMemGenericWorkload>(descriptor, info);
+}
+
std::unique_ptr<armnn::IWorkload> ClWorkloadFactory::CreateResize(const ResizeQueueDescriptor& descriptor,
const WorkloadInfo& info) const
{
diff --git a/src/backends/cl/ClWorkloadFactory.hpp b/src/backends/cl/ClWorkloadFactory.hpp
index 01bfb8db9f..3a55ac5a2a 100644
--- a/src/backends/cl/ClWorkloadFactory.hpp
+++ b/src/backends/cl/ClWorkloadFactory.hpp
@@ -94,6 +94,9 @@ public:
std::unique_ptr<IWorkload> CreateMemCopy(const MemCopyQueueDescriptor& descriptor,
const WorkloadInfo& info) const override;
+ std::unique_ptr<IWorkload> CreateMemImport(const MemImportQueueDescriptor& descriptor,
+ const WorkloadInfo& info) const override;
+
std::unique_ptr<IWorkload> CreateResize(const ResizeQueueDescriptor& descriptor,
const WorkloadInfo& info) const override;
diff --git a/src/backends/neon/NeonLayerSupport.cpp b/src/backends/neon/NeonLayerSupport.cpp
index fd09265d1e..dac3525f60 100644
--- a/src/backends/neon/NeonLayerSupport.cpp
+++ b/src/backends/neon/NeonLayerSupport.cpp
@@ -384,15 +384,6 @@ bool NeonLayerSupport::IsMeanSupported(const TensorInfo& input,
descriptor);
}
-bool NeonLayerSupport::IsMemCopySupported(const TensorInfo &input,
- const TensorInfo &output,
- Optional<std::string &> reasonIfUnsupported) const
-{
- ignore_unused(input);
- ignore_unused(output);
- return true;
-}
-
bool NeonLayerSupport::IsMergerSupported(const std::vector<const TensorInfo*> inputs,
const TensorInfo& output,
const MergerDescriptor& descriptor,
diff --git a/src/backends/neon/NeonLayerSupport.hpp b/src/backends/neon/NeonLayerSupport.hpp
index c37ac2a3fc..078d2f619b 100644
--- a/src/backends/neon/NeonLayerSupport.hpp
+++ b/src/backends/neon/NeonLayerSupport.hpp
@@ -117,10 +117,6 @@ public:
const MeanDescriptor& descriptor,
Optional<std::string&> reasonIfUnsupported = EmptyOptional()) const override;
- bool IsMemCopySupported(const TensorInfo& input,
- const TensorInfo& output,
- Optional<std::string&> reasonIfUnsupported = EmptyOptional()) const override;
-
ARMNN_DEPRECATED_MSG("Use IsConcatSupported instead")
bool IsMergerSupported(const std::vector<const TensorInfo*> inputs,
const TensorInfo& output,
diff --git a/src/backends/neon/NeonWorkloadFactory.cpp b/src/backends/neon/NeonWorkloadFactory.cpp
index cdc84ca62e..fd0381c26d 100644
--- a/src/backends/neon/NeonWorkloadFactory.cpp
+++ b/src/backends/neon/NeonWorkloadFactory.cpp
@@ -14,6 +14,7 @@
#include <backendsCommon/CpuTensorHandle.hpp>
#include <backendsCommon/MakeWorkloadHelper.hpp>
#include <backendsCommon/MemCopyWorkload.hpp>
+#include <backendsCommon/MemImportWorkload.hpp>
#include <neon/workloads/NeonWorkloadUtils.hpp>
#include <neon/workloads/NeonWorkloads.hpp>
@@ -224,6 +225,17 @@ std::unique_ptr<armnn::IWorkload> NeonWorkloadFactory::CreateMemCopy(const MemCo
return MakeWorkloadHelper<CopyMemGenericWorkload, CopyMemGenericWorkload>(descriptor, info);
}
+std::unique_ptr<armnn::IWorkload> NeonWorkloadFactory::CreateMemImport(const MemImportQueueDescriptor& descriptor,
+ const WorkloadInfo& info) const
+{
+ if (descriptor.m_Inputs.empty() || !descriptor.m_Inputs[0])
+ {
+ throw InvalidArgumentException("NeonWorkloadFactory: Invalid null input for MemImport workload");
+ }
+
+ return std::make_unique<ImportMemGenericWorkload>(descriptor, info);
+}
+
std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateResize(const ResizeQueueDescriptor& descriptor,
const WorkloadInfo& info) const
{
diff --git a/src/backends/neon/NeonWorkloadFactory.hpp b/src/backends/neon/NeonWorkloadFactory.hpp
index d6a1b74941..360dc7c61b 100644
--- a/src/backends/neon/NeonWorkloadFactory.hpp
+++ b/src/backends/neon/NeonWorkloadFactory.hpp
@@ -95,6 +95,9 @@ public:
std::unique_ptr<IWorkload> CreateMemCopy(const MemCopyQueueDescriptor& descriptor,
const WorkloadInfo& info) const override;
+ std::unique_ptr<IWorkload> CreateMemImport(const MemImportQueueDescriptor& descriptor,
+ const WorkloadInfo& info) const override;
+
std::unique_ptr<IWorkload> CreateResize(const ResizeQueueDescriptor& descriptor,
const WorkloadInfo& info) const override;
diff --git a/src/backends/reference/RefLayerSupport.cpp b/src/backends/reference/RefLayerSupport.cpp
index d42404d25b..187cc01c77 100644
--- a/src/backends/reference/RefLayerSupport.cpp
+++ b/src/backends/reference/RefLayerSupport.cpp
@@ -14,6 +14,7 @@
#include <armnn/Descriptors.hpp>
#include <backendsCommon/BackendRegistry.hpp>
+#include <backendsCommon/LayerSupportRules.hpp>
#include <backendsCommon/test/WorkloadTestUtils.hpp>
#include <boost/core/ignore_unused.hpp>
@@ -65,155 +66,6 @@ std::string CreateIncorrectDimensionsErrorMsg(unsigned int expected,
} // anonymous namespace
-namespace
-{
-template<typename F>
-bool CheckSupportRule(F rule, Optional<std::string&> reasonIfUnsupported, const char* reason)
-{
- bool supported = rule();
- if (!supported && reason)
- {
- reasonIfUnsupported.value() += std::string(reason) + "\n"; // Append the reason on a new line
- }
- return supported;
-}
-
-struct Rule
-{
- bool operator()() const
- {
- return m_Res;
- }
-
- bool m_Res = true;
-};
-
-template<typename T>
-bool AllTypesAreEqualImpl(T t)
-{
- return true;
-}
-
-template<typename T, typename... Rest>
-bool AllTypesAreEqualImpl(T t1, T t2, Rest... rest)
-{
- static_assert(std::is_same<T, TensorInfo>::value, "Type T must be a TensorInfo");
-
- return (t1.GetDataType() == t2.GetDataType()) && AllTypesAreEqualImpl(t2, rest...);
-}
-
-struct TypesAreEqual : public Rule
-{
- template<typename ... Ts>
- TypesAreEqual(const Ts&... ts)
- {
- m_Res = AllTypesAreEqualImpl(ts...);
- }
-};
-
-struct QuantizationParametersAreEqual : public Rule
-{
- QuantizationParametersAreEqual(const TensorInfo& info0, const TensorInfo& info1)
- {
- m_Res = info0.GetQuantizationScale() == info1.GetQuantizationScale() &&
- info0.GetQuantizationOffset() == info1.GetQuantizationOffset();
- }
-};
-
-struct TypeAnyOf : public Rule
-{
- template<typename Container>
- TypeAnyOf(const TensorInfo& info, const Container& c)
- {
- m_Res = std::any_of(c.begin(), c.end(), [&info](DataType dt)
- {
- return dt == info.GetDataType();
- });
- }
-};
-
-struct TypeIs : public Rule
-{
- TypeIs(const TensorInfo& info, DataType dt)
- {
- m_Res = dt == info.GetDataType();
- }
-};
-
-struct BiasAndWeightsTypesMatch : public Rule
-{
- BiasAndWeightsTypesMatch(const TensorInfo& biases, const TensorInfo& weights)
- {
- m_Res = biases.GetDataType() == GetBiasTypeFromWeightsType(weights.GetDataType()).value();
- }
-};
-
-struct BiasAndWeightsTypesCompatible : public Rule
-{
- template<typename Container>
- BiasAndWeightsTypesCompatible(const TensorInfo& info, const Container& c)
- {
- m_Res = std::any_of(c.begin(), c.end(), [&info](DataType dt)
- {
- return dt == GetBiasTypeFromWeightsType(info.GetDataType()).value();
- });
- }
-};
-
-struct ShapesAreSameRank : public Rule
-{
- ShapesAreSameRank(const TensorInfo& info0, const TensorInfo& info1)
- {
- m_Res = info0.GetShape().GetNumDimensions() == info1.GetShape().GetNumDimensions();
- }
-};
-
-struct ShapesAreSameTotalSize : public Rule
-{
- ShapesAreSameTotalSize(const TensorInfo& info0, const TensorInfo& info1)
- {
- m_Res = info0.GetNumElements() == info1.GetNumElements();
- }
-};
-
-struct ShapesAreBroadcastCompatible : public Rule
-{
- unsigned int CalcInputSize(const TensorShape& in, const TensorShape& out, unsigned int idx)
- {
- unsigned int offset = out.GetNumDimensions() - in.GetNumDimensions();
- unsigned int sizeIn = (idx < offset) ? 1 : in[idx-offset];
- return sizeIn;
- }
-
- ShapesAreBroadcastCompatible(const TensorInfo& in0, const TensorInfo& in1, const TensorInfo& out)
- {
- const TensorShape& shape0 = in0.GetShape();
- const TensorShape& shape1 = in1.GetShape();
- const TensorShape& outShape = out.GetShape();
-
- for (unsigned int i=0; i < outShape.GetNumDimensions() && m_Res; i++)
- {
- unsigned int sizeOut = outShape[i];
- unsigned int sizeIn0 = CalcInputSize(shape0, outShape, i);
- unsigned int sizeIn1 = CalcInputSize(shape1, outShape, i);
-
- m_Res &= ((sizeIn0 == sizeOut) || (sizeIn0 == 1)) &&
- ((sizeIn1 == sizeOut) || (sizeIn1 == 1));
- }
- }
-};
-
-struct TensorNumDimensionsAreCorrect : public Rule
-{
- TensorNumDimensionsAreCorrect(const TensorInfo& info, unsigned int expectedNumDimensions)
- {
- m_Res = info.GetNumDimensions() == expectedNumDimensions;
- }
-};
-
-} // namespace
-
-
bool RefLayerSupport::IsActivationSupported(const TensorInfo& input,
const TensorInfo& output,
const ActivationDescriptor& descriptor,
diff --git a/src/backends/reference/RefWorkloadFactory.cpp b/src/backends/reference/RefWorkloadFactory.cpp
index 240acecbad..fff2fd2694 100644
--- a/src/backends/reference/RefWorkloadFactory.cpp
+++ b/src/backends/reference/RefWorkloadFactory.cpp
@@ -5,6 +5,7 @@
#include <Layer.hpp>
#include <backendsCommon/CpuTensorHandle.hpp>
#include <backendsCommon/MemCopyWorkload.hpp>
+#include <backendsCommon/MemImportWorkload.hpp>
#include <backendsCommon/MakeWorkloadHelper.hpp>
#include "RefWorkloadFactory.hpp"
#include "RefBackendId.hpp"
@@ -250,6 +251,16 @@ std::unique_ptr<armnn::IWorkload> RefWorkloadFactory::CreateMemCopy(const MemCop
return std::make_unique<CopyMemGenericWorkload>(descriptor, info);
}
+std::unique_ptr<armnn::IWorkload> RefWorkloadFactory::CreateMemImport(const MemImportQueueDescriptor& descriptor,
+ const WorkloadInfo& info) const
+{
+ if (descriptor.m_Inputs.empty())
+ {
+ throw InvalidArgumentException("RefWorkloadFactory: CreateMemImport() expected an input tensor.");
+ }
+ return std::make_unique<ImportMemGenericWorkload>(descriptor, info);
+}
+
std::unique_ptr<IWorkload> RefWorkloadFactory::CreateResize(const ResizeQueueDescriptor& descriptor,
const WorkloadInfo& info) const
{
diff --git a/src/backends/reference/RefWorkloadFactory.hpp b/src/backends/reference/RefWorkloadFactory.hpp
index b012fbc6f6..314e11788e 100644
--- a/src/backends/reference/RefWorkloadFactory.hpp
+++ b/src/backends/reference/RefWorkloadFactory.hpp
@@ -110,6 +110,9 @@ public:
std::unique_ptr<IWorkload> CreateMemCopy(const MemCopyQueueDescriptor& descriptor,
const WorkloadInfo& info) const override;
+ std::unique_ptr<IWorkload> CreateMemImport(const MemImportQueueDescriptor& descriptor,
+ const WorkloadInfo& info) const override;
+
std::unique_ptr<IWorkload> CreateResize(const ResizeQueueDescriptor& descriptor,
const WorkloadInfo& info) const override;