aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDavid Monahan <david.monahan@arm.com>2019-08-20 11:25:29 +0100
committerDavid Monahan <david.monahan@arm.com>2019-09-24 10:50:30 +0000
commit3fb7e105ae62cbfb3ebf1edebb90e2b6672b22aa (patch)
tree47793a736e1fb53d51b4c0fd755f4e24f7f93d98
parent93667b1d7c361df68bdb1d733f17aba3ba34e046 (diff)
downloadarmnn-3fb7e105ae62cbfb3ebf1edebb90e2b6672b22aa.tar.gz
IVGCVSW-3623 Implement NeonTensorHandle::Import
Signed-off-by: David Monahan <david.monahan@arm.com> Change-Id: I7213788725fd4e4cf1176998604e999d0b7ed6cc
-rw-r--r--src/armnn/Layer.cpp9
-rw-r--r--src/armnn/Layer.hpp4
-rw-r--r--src/armnn/LoadedNetwork.cpp17
-rw-r--r--src/armnn/layers/ConcatLayer.cpp4
-rw-r--r--src/armnn/layers/ConcatLayer.hpp4
-rw-r--r--src/armnn/layers/OutputLayer.hpp6
-rw-r--r--src/armnn/layers/SplitterLayer.cpp4
-rw-r--r--src/armnn/layers/SplitterLayer.hpp4
-rw-r--r--src/armnn/test/TensorHandleStrategyTest.cpp12
-rw-r--r--src/backends/backendsCommon/ITensorHandleFactory.hpp6
-rw-r--r--src/backends/backendsCommon/OutputHandler.cpp8
-rw-r--r--src/backends/backendsCommon/OutputHandler.hpp4
-rw-r--r--src/backends/backendsCommon/WorkloadFactory.hpp6
-rw-r--r--src/backends/backendsCommon/test/EndToEndTestImpl.hpp64
-rw-r--r--src/backends/cl/ClTensorHandleFactory.cpp18
-rw-r--r--src/backends/cl/ClTensorHandleFactory.hpp6
-rw-r--r--src/backends/cl/ClWorkloadFactory.cpp6
-rw-r--r--src/backends/cl/ClWorkloadFactory.hpp6
-rw-r--r--src/backends/neon/NeonTensorHandle.hpp97
-rw-r--r--src/backends/neon/NeonTensorHandleFactory.cpp22
-rw-r--r--src/backends/neon/NeonTensorHandleFactory.hpp10
-rw-r--r--src/backends/neon/NeonWorkloadFactory.cpp18
-rw-r--r--src/backends/neon/NeonWorkloadFactory.hpp6
-rw-r--r--src/backends/neon/test/NeonEndToEndTests.cpp109
-rw-r--r--src/backends/reference/RefTensorHandleFactory.cpp9
-rw-r--r--src/backends/reference/RefTensorHandleFactory.hpp6
-rw-r--r--src/backends/reference/RefWorkloadFactory.cpp10
-rw-r--r--src/backends/reference/RefWorkloadFactory.hpp6
28 files changed, 380 insertions, 101 deletions
diff --git a/src/armnn/Layer.cpp b/src/armnn/Layer.cpp
index 1e384336c9..dbeda22ca0 100644
--- a/src/armnn/Layer.cpp
+++ b/src/armnn/Layer.cpp
@@ -244,7 +244,9 @@ void Layer::CollectWorkloadOutputs(WorkloadDataCollector& dataCollector, const G
}
}
-void Layer::CreateTensorHandles(const TensorHandleFactoryRegistry& registry, const IWorkloadFactory& workloadFactory)
+void Layer::CreateTensorHandles(const TensorHandleFactoryRegistry& registry,
+ const IWorkloadFactory& workloadFactory,
+ const bool IsMemoryManaged)
{
for (unsigned int idx=0; idx < GetNumOutputSlots(); idx++)
{
@@ -255,14 +257,13 @@ void Layer::CreateTensorHandles(const TensorHandleFactoryRegistry& registry, con
OutputHandler& handler = GetOutputHandler(idx);
if (factoryId == ITensorHandleFactory::LegacyFactoryId)
{
- handler.CreateTensorHandles(workloadFactory);
+ handler.CreateTensorHandles(workloadFactory, IsMemoryManaged);
}
else
{
ITensorHandleFactory* handleFactory = registry.GetFactory(factoryId);
BOOST_ASSERT(handleFactory);
-
- handler.CreateTensorHandles(*handleFactory);
+ handler.CreateTensorHandles(*handleFactory, IsMemoryManaged);
}
}
}
diff --git a/src/armnn/Layer.hpp b/src/armnn/Layer.hpp
index c571e50a95..5f2c070681 100644
--- a/src/armnn/Layer.hpp
+++ b/src/armnn/Layer.hpp
@@ -267,7 +267,9 @@ public:
virtual std::unique_ptr<IWorkload> CreateWorkload(const Graph& graph, const IWorkloadFactory& factory) const = 0;
- virtual void CreateTensorHandles(const TensorHandleFactoryRegistry& registry, const IWorkloadFactory& factory);
+ virtual void CreateTensorHandles(const TensorHandleFactoryRegistry& registry,
+ const IWorkloadFactory& factory,
+ const bool IsMemoryManaged = true);
/// Creates a dynamically-allocated copy of this layer.
/// @param graph - The Graph into which this Layer is being cloned.
diff --git a/src/armnn/LoadedNetwork.cpp b/src/armnn/LoadedNetwork.cpp
index 1000eceda0..7ee4e612e0 100644
--- a/src/armnn/LoadedNetwork.cpp
+++ b/src/armnn/LoadedNetwork.cpp
@@ -120,8 +120,21 @@ LoadedNetwork::LoadedNetwork(std::unique_ptr<OptimizedNetwork> net,
for (auto&& layer : order)
{
- auto& workloadFacory = GetWorkloadFactory(*layer);
- layer->CreateTensorHandles(m_TensorHandleFactoryRegistry, workloadFacory);
+ auto& workloadFactory = GetWorkloadFactory(*layer);
+
+ switch (layer->GetType())
+ {
+ case LayerType::Input:
+ {
+ // If IsImportEnabled is true then we need to set IsMemoryManaged to false when creating TensorHandles
+ layer->CreateTensorHandles(m_TensorHandleFactoryRegistry, workloadFactory, !m_IsImportEnabled);
+ break;
+ }
+ default:
+ {
+ layer->CreateTensorHandles(m_TensorHandleFactoryRegistry, workloadFactory);
+ }
+ }
}
//Then create workloads.
diff --git a/src/armnn/layers/ConcatLayer.cpp b/src/armnn/layers/ConcatLayer.cpp
index 24051a24d2..9b1785850a 100644
--- a/src/armnn/layers/ConcatLayer.cpp
+++ b/src/armnn/layers/ConcatLayer.cpp
@@ -119,8 +119,10 @@ void ConcatLayer::CreateTensors(const FactoryType& factory)
}
void ConcatLayer::CreateTensorHandles(const TensorHandleFactoryRegistry& registry,
- const IWorkloadFactory& workloadFactory)
+ const IWorkloadFactory& workloadFactory,
+ const bool IsMemoryManaged)
{
+ boost::ignore_unused(IsMemoryManaged);
OutputSlot& slot = GetOutputSlot(0);
ITensorHandleFactory::FactoryId factoryId = slot.GetTensorHandleFactoryId();
diff --git a/src/armnn/layers/ConcatLayer.hpp b/src/armnn/layers/ConcatLayer.hpp
index e8ff2e452b..10a7fd8e74 100644
--- a/src/armnn/layers/ConcatLayer.hpp
+++ b/src/armnn/layers/ConcatLayer.hpp
@@ -24,8 +24,10 @@ public:
/// otherwise creates tensor handlers.
/// @param [in] registry Contains all the registered tensor handle factories available for use.
/// @param [in] factory The workload factory which will create the workload.
+ /// @param [in] IsMemoryManaged Determine whether or not to assign a memory manager during creation
virtual void CreateTensorHandles(const TensorHandleFactoryRegistry& registry,
- const IWorkloadFactory& factory) override;
+ const IWorkloadFactory& factory,
+ const bool IsMemoryManaged = true) override;
/// Creates a dynamically-allocated copy of this layer.
/// @param [in] graph The graph into which this layer is being cloned.
diff --git a/src/armnn/layers/OutputLayer.hpp b/src/armnn/layers/OutputLayer.hpp
index 2aa2dbd6c9..c9615cca66 100644
--- a/src/armnn/layers/OutputLayer.hpp
+++ b/src/armnn/layers/OutputLayer.hpp
@@ -24,10 +24,12 @@ public:
/// otherwise creates tensor handlers by default. Ignores parameters for Output type.
/// @param [in] registry Contains all the registered tensor handle factories available for use.
/// @param [in] factory The workload factory which will create the workload.
+ /// @param [in] IsMemoryManaged Determine whether or not to assign a memory manager during creation
virtual void CreateTensorHandles(const TensorHandleFactoryRegistry& registry,
- const IWorkloadFactory& factory) override
+ const IWorkloadFactory& factory,
+ const bool IsMemoryManaged = true) override
{
- boost::ignore_unused(registry, factory);
+ boost::ignore_unused(registry, factory, IsMemoryManaged);
}
/// Creates a dynamically-allocated copy of this layer.
diff --git a/src/armnn/layers/SplitterLayer.cpp b/src/armnn/layers/SplitterLayer.cpp
index dc04b3fd15..e8452462f3 100644
--- a/src/armnn/layers/SplitterLayer.cpp
+++ b/src/armnn/layers/SplitterLayer.cpp
@@ -101,8 +101,10 @@ void SplitterLayer::CreateTensors(const FactoryType& factory)
}
void SplitterLayer::CreateTensorHandles(const TensorHandleFactoryRegistry& registry,
- const IWorkloadFactory& workloadFactory)
+ const IWorkloadFactory& workloadFactory,
+ const bool IsMemoryManaged)
{
+ boost::ignore_unused(IsMemoryManaged);
OutputSlot& slot = GetOutputSlot(0);
ITensorHandleFactory::FactoryId factoryId = slot.GetTensorHandleFactoryId();
diff --git a/src/armnn/layers/SplitterLayer.hpp b/src/armnn/layers/SplitterLayer.hpp
index 9c684d479f..26d5b76a2d 100644
--- a/src/armnn/layers/SplitterLayer.hpp
+++ b/src/armnn/layers/SplitterLayer.hpp
@@ -24,9 +24,11 @@ public:
/// otherwise creates tensor handlers.
/// @param [in] registry Contains all the registered tensor handle factories available for use.
/// @param [in] factory The workload factory which will create the workload.
+ /// @param [in] IsMemoryManaged Determine whether or not to assign a memory manager during creation
//virtual void CreateTensorHandles(Graph& graph, const IWorkloadFactory& factory) override;
virtual void CreateTensorHandles(const TensorHandleFactoryRegistry& registry,
- const IWorkloadFactory& factory) override;
+ const IWorkloadFactory& factory,
+ const bool IsMemoryManaged = true) override;
/// Creates a dynamically-allocated copy of this layer.
/// @param [in] graph The graph into which this layer is being cloned.
diff --git a/src/armnn/test/TensorHandleStrategyTest.cpp b/src/armnn/test/TensorHandleStrategyTest.cpp
index 3c53b13e1a..ceb6e4dbc2 100644
--- a/src/armnn/test/TensorHandleStrategyTest.cpp
+++ b/src/armnn/test/TensorHandleStrategyTest.cpp
@@ -45,13 +45,15 @@ public:
return nullptr;
}
- std::unique_ptr<ITensorHandle> CreateTensorHandle(const TensorInfo& tensorInfo) const override
+ std::unique_ptr<ITensorHandle> CreateTensorHandle(const TensorInfo& tensorInfo,
+ const bool IsMemoryManaged) const override
{
return nullptr;
}
std::unique_ptr<ITensorHandle> CreateTensorHandle(const TensorInfo& tensorInfo,
- DataLayout dataLayout) const override
+ DataLayout dataLayout,
+ const bool IsMemoryManaged) const override
{
return nullptr;
}
@@ -83,13 +85,15 @@ public:
return nullptr;
}
- std::unique_ptr<ITensorHandle> CreateTensorHandle(const TensorInfo& tensorInfo) const override
+ std::unique_ptr<ITensorHandle> CreateTensorHandle(const TensorInfo& tensorInfo,
+ const bool IsMemoryManaged) const override
{
return nullptr;
}
std::unique_ptr<ITensorHandle> CreateTensorHandle(const TensorInfo& tensorInfo,
- DataLayout dataLayout) const override
+ DataLayout dataLayout,
+ const bool IsMemoryManaged) const override
{
return nullptr;
}
diff --git a/src/backends/backendsCommon/ITensorHandleFactory.hpp b/src/backends/backendsCommon/ITensorHandleFactory.hpp
index 26f6c9b9c0..c6deaef6bb 100644
--- a/src/backends/backendsCommon/ITensorHandleFactory.hpp
+++ b/src/backends/backendsCommon/ITensorHandleFactory.hpp
@@ -25,10 +25,12 @@ public:
TensorShape const& subTensorShape,
unsigned int const* subTensorOrigin) const = 0;
- virtual std::unique_ptr<ITensorHandle> CreateTensorHandle(const TensorInfo& tensorInfo) const = 0;
+ virtual std::unique_ptr<ITensorHandle> CreateTensorHandle(const TensorInfo& tensorInfo,
+ const bool IsMemoryManaged = true) const = 0;
virtual std::unique_ptr<ITensorHandle> CreateTensorHandle(const TensorInfo& tensorInfo,
- DataLayout dataLayout) const = 0;
+ DataLayout dataLayout,
+ const bool IsMemoryManaged = true) const = 0;
virtual const FactoryId& GetId() const = 0;
diff --git a/src/backends/backendsCommon/OutputHandler.cpp b/src/backends/backendsCommon/OutputHandler.cpp
index 8f4942d8ba..e3a1b276ea 100644
--- a/src/backends/backendsCommon/OutputHandler.cpp
+++ b/src/backends/backendsCommon/OutputHandler.cpp
@@ -22,14 +22,14 @@ void OutputHandler::SetTensorInfo(const TensorInfo& tensorInfo)
m_bTensorInfoSet = true;
}
-void OutputHandler::CreateTensorHandles(const IWorkloadFactory& factory)
+void OutputHandler::CreateTensorHandles(const IWorkloadFactory& factory, const bool IsMemoryManaged)
{
- m_TensorHandle = factory.CreateTensorHandle(m_TensorInfo);
+ m_TensorHandle = factory.CreateTensorHandle(m_TensorInfo, IsMemoryManaged);
}
-void OutputHandler::CreateTensorHandles(const ITensorHandleFactory& factory)
+void OutputHandler::CreateTensorHandles(const ITensorHandleFactory& factory, const bool IsMemoryManaged)
{
- m_TensorHandle = factory.CreateTensorHandle(m_TensorInfo);
+ m_TensorHandle = factory.CreateTensorHandle(m_TensorInfo, IsMemoryManaged);
}
void OutputHandler::CollectWorkloadOutputs(WorkloadDataCollector& dataCollector) const
diff --git a/src/backends/backendsCommon/OutputHandler.hpp b/src/backends/backendsCommon/OutputHandler.hpp
index 87ced20706..81768c6f5e 100644
--- a/src/backends/backendsCommon/OutputHandler.hpp
+++ b/src/backends/backendsCommon/OutputHandler.hpp
@@ -36,8 +36,8 @@ public:
/// @brief - Creates tensor handles used by the intermediate tensors. Does not allocate memory.
/// @param factory - Factory to be used for handler creation.
- void CreateTensorHandles(const IWorkloadFactory& factory);
- void CreateTensorHandles(const ITensorHandleFactory& factory);
+ void CreateTensorHandles(const IWorkloadFactory& factory, const bool IsMemoryManaged = true);
+ void CreateTensorHandles(const ITensorHandleFactory& factory, const bool IsMemoryManaged = true);
/// @brief - Gets the matching TensorInfo for the output.
/// @return - References to the output TensorInfo.
diff --git a/src/backends/backendsCommon/WorkloadFactory.hpp b/src/backends/backendsCommon/WorkloadFactory.hpp
index 29ebe2af48..2809e2f9e8 100644
--- a/src/backends/backendsCommon/WorkloadFactory.hpp
+++ b/src/backends/backendsCommon/WorkloadFactory.hpp
@@ -44,10 +44,12 @@ public:
virtual std::unique_ptr<IWorkload> CreateInput(const InputQueueDescriptor& descriptor,
const WorkloadInfo& info) const = 0;
- virtual std::unique_ptr<ITensorHandle> CreateTensorHandle(const TensorInfo& tensorInfo) const = 0;
+ virtual std::unique_ptr<ITensorHandle> CreateTensorHandle(const TensorInfo& tensorInfo,
+ const bool IsMemoryManaged = true) const = 0;
virtual std::unique_ptr<ITensorHandle> CreateTensorHandle(const TensorInfo& tensorInfo,
- DataLayout dataLayout) const = 0;
+ DataLayout dataLayout,
+ const bool IsMemoryManaged = true) const = 0;
virtual std::unique_ptr<IWorkload> CreateAbs(const AbsQueueDescriptor& descriptor,
const WorkloadInfo& info) const;
diff --git a/src/backends/backendsCommon/test/EndToEndTestImpl.hpp b/src/backends/backendsCommon/test/EndToEndTestImpl.hpp
index ecc8806626..3bdd48bcfa 100644
--- a/src/backends/backendsCommon/test/EndToEndTestImpl.hpp
+++ b/src/backends/backendsCommon/test/EndToEndTestImpl.hpp
@@ -185,40 +185,42 @@ inline void ImportNonAlignedInputPointerTest(std::vector<BackendId> backends)
IConnectableLayer* input = net->AddInputLayer(0);
- NormalizationDescriptor descriptor;
- IConnectableLayer* norm = net->AddNormalizationLayer(descriptor);
+ ActivationDescriptor descriptor;
+ descriptor.m_Function = ActivationFunction::Square;
+ IConnectableLayer* pooling = net->AddActivationLayer(descriptor);
IConnectableLayer* output = net->AddOutputLayer(0);
- input->GetOutputSlot(0).Connect(norm->GetInputSlot(0));
- norm->GetOutputSlot(0).Connect(output->GetInputSlot(0));
+ input->GetOutputSlot(0).Connect(pooling->GetInputSlot(0));
+ pooling->GetOutputSlot(0).Connect(output->GetInputSlot(0));
- input->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 4, 1 }, DataType::Float32));
- norm->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 4, 1 }, DataType::Float32));
+ input->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 1, 4 }, DataType::Float32));
+ pooling->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 1, 4 }, DataType::Float32));
// Optimize the network
IOptimizedNetworkPtr optNet = Optimize(*net, backends, runtime->GetDeviceSpec());
+ BOOST_CHECK(optNet);
// Loads it into the runtime.
NetworkId netId;
std::string ignoredErrorMessage;
// Enable Importing
- INetworkProperties networkProperties(true, true);
+ INetworkProperties networkProperties(true, false);
runtime->LoadNetwork(netId, std::move(optNet), ignoredErrorMessage, networkProperties);
// Creates structures for input & output
std::vector<float> inputData
{
- 1.0f, 2.0f, 3.0f, 4.0f, 5.0f
+ 1.0f, 2.0f, 3.0f, 4.0f
};
// Misaligned input
float* misalignedInputData = reinterpret_cast<float*>(reinterpret_cast<char*>(inputData.data()) + 1);
- std::vector<float> outputData(5);
+ std::vector<float> outputData(4);
// Aligned output
- float * alignedOutputData = outputData.data();
+ float* alignedOutputData = outputData.data();
InputTensors inputTensors
{
@@ -229,8 +231,6 @@ inline void ImportNonAlignedInputPointerTest(std::vector<BackendId> backends)
{0,armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), alignedOutputData)}
};
- // The result of the inference is not important, just the fact that there
- // should not be CopyMemGeneric workloads.
runtime->GetProfiler(netId)->EnableProfiling(true);
// Do the inference and expect it to fail with a ImportMemoryException
@@ -250,24 +250,26 @@ inline void ImportNonAlignedOutputPointerTest(std::vector<BackendId> backends)
IConnectableLayer* input = net->AddInputLayer(0);
- NormalizationDescriptor descriptor;
- IConnectableLayer* norm = net->AddNormalizationLayer(descriptor);
+ ActivationDescriptor descriptor;
+ descriptor.m_Function = ActivationFunction::Square;
+ IConnectableLayer* pooling = net->AddActivationLayer(descriptor);
IConnectableLayer* output = net->AddOutputLayer(0);
- input->GetOutputSlot(0).Connect(norm->GetInputSlot(0));
- norm->GetOutputSlot(0).Connect(output->GetInputSlot(0));
+ input->GetOutputSlot(0).Connect(pooling->GetInputSlot(0));
+ pooling->GetOutputSlot(0).Connect(output->GetInputSlot(0));
- input->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 4, 1 }, DataType::Float32));
- norm->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 4, 1 }, DataType::Float32));
+ input->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 1, 4 }, DataType::Float32));
+ pooling->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 1, 4 }, DataType::Float32));
// Optimize the network
IOptimizedNetworkPtr optNet = Optimize(*net, backends, runtime->GetDeviceSpec());
+ BOOST_CHECK(optNet);
// Loads it into the runtime.
NetworkId netId;
std::string ignoredErrorMessage;
- // Enable Importing
+ // Enable Importing and Exporting
INetworkProperties networkProperties(true, true);
runtime->LoadNetwork(netId, std::move(optNet), ignoredErrorMessage, networkProperties);
@@ -278,7 +280,7 @@ inline void ImportNonAlignedOutputPointerTest(std::vector<BackendId> backends)
};
// Aligned input
- float * alignedInputData = inputData.data();
+ float* alignedInputData = inputData.data();
std::vector<float> outputData(5);
@@ -294,10 +296,6 @@ inline void ImportNonAlignedOutputPointerTest(std::vector<BackendId> backends)
{0,armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), misalignedOutputData)}
};
- // The result of the inference is not important, just the fact that there
- // should not be CopyMemGeneric workloads.
- runtime->GetProfiler(netId)->EnableProfiling(true);
-
// Do the inference and expect it to fail with a ImportMemoryException
BOOST_CHECK_THROW(runtime->EnqueueWorkload(netId, inputTensors, outputTensors), MemoryExportException);
}
@@ -315,19 +313,21 @@ inline void ImportAlignedPointerTest(std::vector<BackendId> backends)
IConnectableLayer* input = net->AddInputLayer(0);
- NormalizationDescriptor descriptor;
- IConnectableLayer* norm = net->AddNormalizationLayer(descriptor);
+ ActivationDescriptor descriptor;
+ descriptor.m_Function = ActivationFunction::Square;
+ IConnectableLayer* pooling = net->AddActivationLayer(descriptor);
IConnectableLayer* output = net->AddOutputLayer(0);
- input->GetOutputSlot(0).Connect(norm->GetInputSlot(0));
- norm->GetOutputSlot(0).Connect(output->GetInputSlot(0));
+ input->GetOutputSlot(0).Connect(pooling->GetInputSlot(0));
+ pooling->GetOutputSlot(0).Connect(output->GetInputSlot(0));
- input->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 4, 1 }, DataType::Float32));
- norm->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 4, 1 }, DataType::Float32));
+ input->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 1, 4 }, DataType::Float32));
+ pooling->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 1, 4 }, DataType::Float32));
// Optimize the network
IOptimizedNetworkPtr optNet = Optimize(*net, backends, runtime->GetDeviceSpec());
+ BOOST_CHECK(optNet);
// Loads it into the runtime.
NetworkId netId;
@@ -366,8 +366,8 @@ inline void ImportAlignedPointerTest(std::vector<BackendId> backends)
profilerManager.GetProfiler()->Print(ss);;
std::string dump = ss.str();
- // Contains RefNormalizationWorkload
- std::size_t found = dump.find("RefNormalizationWorkload");
+ // Contains ActivationWorkload
+ std::size_t found = dump.find("ActivationWorkload");
BOOST_TEST(found != std::string::npos);
// Contains SyncMemGeneric
found = dump.find("SyncMemGeneric");
diff --git a/src/backends/cl/ClTensorHandleFactory.cpp b/src/backends/cl/ClTensorHandleFactory.cpp
index 87ecdfe1ba..3d9908a1ac 100644
--- a/src/backends/cl/ClTensorHandleFactory.cpp
+++ b/src/backends/cl/ClTensorHandleFactory.cpp
@@ -45,20 +45,26 @@ std::unique_ptr<ITensorHandle> ClTensorHandleFactory::CreateSubTensorHandle(ITen
boost::polymorphic_downcast<IClTensorHandle *>(&parent), shape, coords);
}
-std::unique_ptr<ITensorHandle> ClTensorHandleFactory::CreateTensorHandle(const TensorInfo& tensorInfo) const
+std::unique_ptr<ITensorHandle> ClTensorHandleFactory::CreateTensorHandle(const TensorInfo& tensorInfo,
+ const bool IsMemoryManaged) const
{
std::unique_ptr<ClTensorHandle> tensorHandle = std::make_unique<ClTensorHandle>(tensorInfo);
- tensorHandle->SetMemoryGroup(m_MemoryManager->GetInterLayerMemoryGroup());
-
+ if (IsMemoryManaged)
+ {
+ tensorHandle->SetMemoryGroup(m_MemoryManager->GetInterLayerMemoryGroup());
+ }
return tensorHandle;
}
std::unique_ptr<ITensorHandle> ClTensorHandleFactory::CreateTensorHandle(const TensorInfo& tensorInfo,
- DataLayout dataLayout) const
+ DataLayout dataLayout,
+ const bool IsMemoryManaged) const
{
std::unique_ptr<ClTensorHandle> tensorHandle = std::make_unique<ClTensorHandle>(tensorInfo, dataLayout);
- tensorHandle->SetMemoryGroup(m_MemoryManager->GetInterLayerMemoryGroup());
-
+ if (IsMemoryManaged)
+ {
+ tensorHandle->SetMemoryGroup(m_MemoryManager->GetInterLayerMemoryGroup());
+ }
return tensorHandle;
}
diff --git a/src/backends/cl/ClTensorHandleFactory.hpp b/src/backends/cl/ClTensorHandleFactory.hpp
index 7c3b49bee5..ea3728f7f7 100644
--- a/src/backends/cl/ClTensorHandleFactory.hpp
+++ b/src/backends/cl/ClTensorHandleFactory.hpp
@@ -28,10 +28,12 @@ public:
const TensorShape& subTensorShape,
const unsigned int* subTensorOrigin) const override;
- std::unique_ptr<ITensorHandle> CreateTensorHandle(const TensorInfo& tensorInfo) const override;
+ std::unique_ptr<ITensorHandle> CreateTensorHandle(const TensorInfo& tensorInfo,
+ const bool IsMemoryManaged = true) const override;
std::unique_ptr<ITensorHandle> CreateTensorHandle(const TensorInfo& tensorInfo,
- DataLayout dataLayout) const override;
+ DataLayout dataLayout,
+ const bool IsMemoryManaged = true) const override;
static const FactoryId& GetIdStatic();
diff --git a/src/backends/cl/ClWorkloadFactory.cpp b/src/backends/cl/ClWorkloadFactory.cpp
index 8210be251c..536d4dd058 100644
--- a/src/backends/cl/ClWorkloadFactory.cpp
+++ b/src/backends/cl/ClWorkloadFactory.cpp
@@ -82,7 +82,8 @@ ClWorkloadFactory::ClWorkloadFactory(const std::shared_ptr<ClMemoryManager>& mem
{
}
-std::unique_ptr<ITensorHandle> ClWorkloadFactory::CreateTensorHandle(const TensorInfo& tensorInfo) const
+std::unique_ptr<ITensorHandle> ClWorkloadFactory::CreateTensorHandle(const TensorInfo& tensorInfo,
+ const bool IsMemoryManaged) const
{
std::unique_ptr<ClTensorHandle> tensorHandle = std::make_unique<ClTensorHandle>(tensorInfo);
tensorHandle->SetMemoryGroup(m_MemoryManager->GetInterLayerMemoryGroup());
@@ -91,7 +92,8 @@ std::unique_ptr<ITensorHandle> ClWorkloadFactory::CreateTensorHandle(const Tenso
}
std::unique_ptr<ITensorHandle> ClWorkloadFactory::CreateTensorHandle(const TensorInfo& tensorInfo,
- DataLayout dataLayout) const
+ DataLayout dataLayout,
+ const bool IsMemoryManaged) const
{
std::unique_ptr<ClTensorHandle> tensorHandle = std::make_unique<ClTensorHandle>(tensorInfo, dataLayout);
tensorHandle->SetMemoryGroup(m_MemoryManager->GetInterLayerMemoryGroup());
diff --git a/src/backends/cl/ClWorkloadFactory.hpp b/src/backends/cl/ClWorkloadFactory.hpp
index 493f659eab..c8d58dbb5c 100644
--- a/src/backends/cl/ClWorkloadFactory.hpp
+++ b/src/backends/cl/ClWorkloadFactory.hpp
@@ -31,10 +31,12 @@ public:
TensorShape const& subTensorShape,
unsigned int const* subTensorOrigin) const override;
- std::unique_ptr<ITensorHandle> CreateTensorHandle(const TensorInfo& tensorInfo) const override;
+ std::unique_ptr<ITensorHandle> CreateTensorHandle(const TensorInfo& tensorInfo,
+ const bool IsMemoryManaged = true) const override;
std::unique_ptr<ITensorHandle> CreateTensorHandle(const TensorInfo& tensorInfo,
- DataLayout dataLayout) const override;
+ DataLayout dataLayout,
+ const bool IsMemoryManaged = true) const override;
std::unique_ptr<IWorkload> CreateInput(const InputQueueDescriptor& descriptor,
const WorkloadInfo& info) const override;
diff --git a/src/backends/neon/NeonTensorHandle.hpp b/src/backends/neon/NeonTensorHandle.hpp
index 9077f34888..c3662c1211 100644
--- a/src/backends/neon/NeonTensorHandle.hpp
+++ b/src/backends/neon/NeonTensorHandle.hpp
@@ -24,11 +24,20 @@ class NeonTensorHandle : public IAclTensorHandle
{
public:
NeonTensorHandle(const TensorInfo& tensorInfo)
+ : m_ImportFlags(static_cast<MemorySourceFlags>(MemorySource::Malloc)),
+ m_Imported(false),
+ m_IsImportEnabled(false)
{
armnn::armcomputetensorutils::BuildArmComputeTensor(m_Tensor, tensorInfo);
}
- NeonTensorHandle(const TensorInfo& tensorInfo, DataLayout dataLayout)
+ NeonTensorHandle(const TensorInfo& tensorInfo,
+ DataLayout dataLayout,
+ MemorySourceFlags importFlags = static_cast<MemorySourceFlags>(MemorySource::Malloc))
+ : m_ImportFlags(importFlags),
+ m_Imported(false),
+ m_IsImportEnabled(false)
+
{
armnn::armcomputetensorutils::BuildArmComputeTensor(m_Tensor, tensorInfo, dataLayout);
}
@@ -38,13 +47,21 @@ public:
virtual void Allocate() override
{
- armnn::armcomputetensorutils::InitialiseArmComputeTensorEmpty(m_Tensor);
+ // If we have enabled Importing, don't Allocate the tensor
+ if (!m_IsImportEnabled)
+ {
+ armnn::armcomputetensorutils::InitialiseArmComputeTensorEmpty(m_Tensor);
+ }
};
virtual void Manage() override
{
- BOOST_ASSERT(m_MemoryGroup != nullptr);
- m_MemoryGroup->manage(&m_Tensor);
+ // If we have enabled Importing, don't manage the tensor
+ if (!m_IsImportEnabled)
+ {
+ BOOST_ASSERT(m_MemoryGroup != nullptr);
+ m_MemoryGroup->manage(&m_Tensor);
+ }
}
virtual ITensorHandle* GetParent() const override { return nullptr; }
@@ -63,8 +80,8 @@ public:
{
return static_cast<const void*>(m_Tensor.buffer() + m_Tensor.info()->offset_first_element_in_bytes());
}
- virtual void Unmap() const override {}
+ virtual void Unmap() const override {}
TensorShape GetStrides() const override
{
@@ -76,6 +93,73 @@ public:
return armcomputetensorutils::GetShape(m_Tensor.info()->tensor_shape());
}
+ void SetImportFlags(MemorySourceFlags importFlags)
+ {
+ m_ImportFlags = importFlags;
+ }
+
+ MemorySourceFlags GetImportFlags() const override
+ {
+ return m_ImportFlags;
+ }
+
+ void SetImportEnabledFlag(bool importEnabledFlag)
+ {
+ m_IsImportEnabled = importEnabledFlag;
+ }
+
+ virtual bool Import(void* memory, MemorySource source) override
+ {
+ if (m_ImportFlags & static_cast<MemorySourceFlags>(source))
+ {
+ if (source == MemorySource::Malloc && m_IsImportEnabled)
+ {
+ // Checks the 16 byte memory alignment
+ constexpr uintptr_t alignment = sizeof(size_t);
+ if (reinterpret_cast<uintptr_t>(memory) % alignment)
+ {
+ throw MemoryImportException("NeonTensorHandle::Import Attempting to import unaligned memory");
+ }
+
+ // m_Tensor not yet Allocated
+ if (!m_Imported && !m_Tensor.buffer())
+ {
+ arm_compute::Status status = m_Tensor.allocator()->import_memory(memory);
+ // Use the overloaded bool operator of Status to check if it worked, if not throw an exception
+ // with the Status error message
+ m_Imported = bool(status);
+ if (!m_Imported)
+ {
+ throw MemoryImportException(status.error_description());
+ }
+ return m_Imported;
+ }
+
+ // m_Tensor.buffer() initially allocated with Allocate().
+ if (!m_Imported && m_Tensor.buffer())
+ {
+ throw MemoryImportException(
+ "NeonTensorHandle::Import Attempting to import on an already allocated tensor");
+ }
+
+ // m_Tensor.buffer() previously imported.
+ if (m_Imported)
+ {
+ arm_compute::Status status = m_Tensor.allocator()->import_memory(memory);
+ // Use the overloaded bool operator of Status to check if it worked, if not throw an exception
+ // with the Status error message
+ m_Imported = bool(status);
+ if (!m_Imported)
+ {
+ throw MemoryImportException(status.error_description());
+ }
+ return m_Imported;
+ }
+ }
+ }
+ return false;
+ }
+
private:
// Only used for testing
void CopyOutTo(void* memory) const override
@@ -131,6 +215,9 @@ private:
arm_compute::Tensor m_Tensor;
std::shared_ptr<arm_compute::MemoryGroup> m_MemoryGroup;
+ MemorySourceFlags m_ImportFlags;
+ bool m_Imported;
+ bool m_IsImportEnabled;
};
class NeonSubTensorHandle : public IAclTensorHandle
diff --git a/src/backends/neon/NeonTensorHandleFactory.cpp b/src/backends/neon/NeonTensorHandleFactory.cpp
index ff4e238cd8..8296b8315c 100644
--- a/src/backends/neon/NeonTensorHandleFactory.cpp
+++ b/src/backends/neon/NeonTensorHandleFactory.cpp
@@ -39,19 +39,33 @@ std::unique_ptr<ITensorHandle> NeonTensorHandleFactory::CreateSubTensorHandle(IT
boost::polymorphic_downcast<IAclTensorHandle*>(&parent), shape, coords);
}
-std::unique_ptr<ITensorHandle> NeonTensorHandleFactory::CreateTensorHandle(const TensorInfo& tensorInfo) const
+std::unique_ptr<ITensorHandle> NeonTensorHandleFactory::CreateTensorHandle(const TensorInfo& tensorInfo,
+ const bool IsMemoryManaged) const
{
auto tensorHandle = std::make_unique<NeonTensorHandle>(tensorInfo);
- tensorHandle->SetMemoryGroup(m_MemoryManager->GetInterLayerMemoryGroup());
+ if (IsMemoryManaged)
+ {
+ tensorHandle->SetMemoryGroup(m_MemoryManager->GetInterLayerMemoryGroup());
+ }
+ // If we are not Managing the Memory then we must be importing
+ tensorHandle->SetImportEnabledFlag(!IsMemoryManaged);
+ tensorHandle->SetImportFlags(m_ImportFlags);
return tensorHandle;
}
std::unique_ptr<ITensorHandle> NeonTensorHandleFactory::CreateTensorHandle(const TensorInfo& tensorInfo,
- DataLayout dataLayout) const
+ DataLayout dataLayout,
+ const bool IsMemoryManaged) const
{
auto tensorHandle = std::make_unique<NeonTensorHandle>(tensorInfo, dataLayout);
- tensorHandle->SetMemoryGroup(m_MemoryManager->GetInterLayerMemoryGroup());
+ if (IsMemoryManaged)
+ {
+ tensorHandle->SetMemoryGroup(m_MemoryManager->GetInterLayerMemoryGroup());
+ }
+ // If we are not Managing the Memory then we must be importing
+ tensorHandle->SetImportEnabledFlag(!IsMemoryManaged);
+ tensorHandle->SetImportFlags(m_ImportFlags);
return tensorHandle;
}
diff --git a/src/backends/neon/NeonTensorHandleFactory.hpp b/src/backends/neon/NeonTensorHandleFactory.hpp
index 82e388e629..b03433352e 100644
--- a/src/backends/neon/NeonTensorHandleFactory.hpp
+++ b/src/backends/neon/NeonTensorHandleFactory.hpp
@@ -18,18 +18,20 @@ class NeonTensorHandleFactory : public ITensorHandleFactory
public:
NeonTensorHandleFactory(std::weak_ptr<NeonMemoryManager> mgr)
: m_MemoryManager(mgr),
- m_ImportFlags(static_cast<MemorySourceFlags>(MemorySource::Undefined)),
- m_ExportFlags(static_cast<MemorySourceFlags>(MemorySource::Undefined))
+ m_ImportFlags(static_cast<MemorySourceFlags>(MemorySource::Malloc)),
+ m_ExportFlags(static_cast<MemorySourceFlags>(MemorySource::Malloc))
{}
std::unique_ptr<ITensorHandle> CreateSubTensorHandle(ITensorHandle& parent,
const TensorShape& subTensorShape,
const unsigned int* subTensorOrigin) const override;
- std::unique_ptr<ITensorHandle> CreateTensorHandle(const TensorInfo& tensorInfo) const override;
+ std::unique_ptr<ITensorHandle> CreateTensorHandle(const TensorInfo& tensorInfo,
+ const bool IsMemoryManaged = true) const override;
std::unique_ptr<ITensorHandle> CreateTensorHandle(const TensorInfo& tensorInfo,
- DataLayout dataLayout) const override;
+ DataLayout dataLayout,
+ const bool IsMemoryManaged = true) const override;
static const FactoryId& GetIdStatic();
diff --git a/src/backends/neon/NeonWorkloadFactory.cpp b/src/backends/neon/NeonWorkloadFactory.cpp
index 441e27f5cf..fb81008644 100644
--- a/src/backends/neon/NeonWorkloadFactory.cpp
+++ b/src/backends/neon/NeonWorkloadFactory.cpp
@@ -72,20 +72,26 @@ std::unique_ptr<ITensorHandle> NeonWorkloadFactory::CreateSubTensorHandle(ITenso
boost::polymorphic_downcast<IAclTensorHandle*>(&parent), shape, coords);
}
-std::unique_ptr<ITensorHandle> NeonWorkloadFactory::CreateTensorHandle(const TensorInfo& tensorInfo) const
+std::unique_ptr<ITensorHandle> NeonWorkloadFactory::CreateTensorHandle(const TensorInfo& tensorInfo,
+ const bool IsMemoryManaged) const
{
auto tensorHandle = std::make_unique<NeonTensorHandle>(tensorInfo);
- tensorHandle->SetMemoryGroup(m_MemoryManager->GetInterLayerMemoryGroup());
-
+ if (IsMemoryManaged)
+ {
+ tensorHandle->SetMemoryGroup(m_MemoryManager->GetInterLayerMemoryGroup());
+ }
return tensorHandle;
}
std::unique_ptr<ITensorHandle> NeonWorkloadFactory::CreateTensorHandle(const TensorInfo& tensorInfo,
- DataLayout dataLayout) const
+ DataLayout dataLayout,
+ const bool IsMemoryManaged) const
{
auto tensorHandle = std::make_unique<NeonTensorHandle>(tensorInfo, dataLayout);
- tensorHandle->SetMemoryGroup(m_MemoryManager->GetInterLayerMemoryGroup());
-
+ if (IsMemoryManaged)
+ {
+ tensorHandle->SetMemoryGroup(m_MemoryManager->GetInterLayerMemoryGroup());
+ }
return tensorHandle;
}
diff --git a/src/backends/neon/NeonWorkloadFactory.hpp b/src/backends/neon/NeonWorkloadFactory.hpp
index 935f9e4b4c..0bee365d8a 100644
--- a/src/backends/neon/NeonWorkloadFactory.hpp
+++ b/src/backends/neon/NeonWorkloadFactory.hpp
@@ -32,10 +32,12 @@ public:
TensorShape const& subTensorShape,
unsigned int const* subTensorOrigin) const override;
- std::unique_ptr<ITensorHandle> CreateTensorHandle(const TensorInfo& tensorInfo) const override;
+ std::unique_ptr<ITensorHandle> CreateTensorHandle(const TensorInfo& tensorInfo,
+ const bool IsMemoryManaged = true) const override;
std::unique_ptr<ITensorHandle> CreateTensorHandle(const TensorInfo& tensorInfo,
- DataLayout dataLayout) const override;
+ DataLayout dataLayout,
+ const bool IsMemoryManaged = true) const override;
std::unique_ptr<IWorkload> CreateInput(const InputQueueDescriptor& descriptor,
const WorkloadInfo& info) const override;
diff --git a/src/backends/neon/test/NeonEndToEndTests.cpp b/src/backends/neon/test/NeonEndToEndTests.cpp
index eb41e949b3..a09b95ed0e 100644
--- a/src/backends/neon/test/NeonEndToEndTests.cpp
+++ b/src/backends/neon/test/NeonEndToEndTests.cpp
@@ -312,4 +312,113 @@ BOOST_AUTO_TEST_CASE(NeonTransposeConvolution2dEndToEndUint8NhwcTest)
defaultBackends, armnn::DataLayout::NHWC);
}
+BOOST_AUTO_TEST_CASE(NeonImportNonAlignedInputPointerTest)
+{
+ ImportNonAlignedInputPointerTest(defaultBackends);
+}
+
+// Utility function to find the number of instances of a substring within a string.
+int SubStringCounter(std::string& string, std::string&& substring)
+{
+ std::size_t found = 0;
+ int count = 0;
+ // Look for the substring starting from where we last found the substring
+ while((found = string.find(substring, found)) != std::string::npos)
+ {
+ count++;
+ // Offset by substring length to avoid finding the same substring twice
+ found += substring.length();
+ }
+ return count;
+}
+
+BOOST_AUTO_TEST_CASE(NeonImportOnlyWorkload)
+{
+ using namespace armnn;
+
+ IRuntime::CreationOptions options;
+ IRuntimePtr runtime(IRuntime::Create(options));
+
+ // Builds up the structure of the network.
+ INetworkPtr net(INetwork::Create());
+
+ IConnectableLayer* input = net->AddInputLayer(0);
+
+ ActivationDescriptor descriptor;
+ descriptor.m_Function = ActivationFunction::Square;
+ IConnectableLayer* pooling = net->AddActivationLayer(descriptor);
+
+ IConnectableLayer* output = net->AddOutputLayer(0);
+
+ input->GetOutputSlot(0).Connect(pooling->GetInputSlot(0));
+ pooling->GetOutputSlot(0).Connect(output->GetInputSlot(0));
+
+ input->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 1, 4 }, DataType::Float32));
+ pooling->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 1, 4 }, DataType::Float32));
+
+ // optimize the network
+ std::vector<BackendId> backends = {Compute::CpuAcc};
+ IOptimizedNetworkPtr optNet = Optimize(*net, backends, runtime->GetDeviceSpec());
+
+ BOOST_TEST_CHECKPOINT("Load Network");
+ // Load it into the runtime. It should pass.
+ NetworkId netId;
+ std::string ignoredErrorMessage;
+ INetworkProperties networkProperties(true, false);
+ BOOST_TEST(runtime->LoadNetwork(netId, std::move(optNet),ignoredErrorMessage, networkProperties)
+ == Status::Success);
+
+ BOOST_TEST_CHECKPOINT("Generate Data");
+ // Creates structures for input & output
+ std::vector<float> inputData
+ {
+ 1.0f, 2.0f, 3.0f, 4.0f
+ };
+
+ std::vector<float> outputData(4);
+
+ std::vector<float> expectedOutput
+ {
+ 1.0f, 4.0f, 9.0f, 16.0f
+ };
+
+ BOOST_TEST_CHECKPOINT("Create Network");
+ InputTensors inputTensors
+ {
+ {0,armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 0), inputData.data())},
+ };
+ OutputTensors outputTensors
+ {
+ {0,armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), outputData.data())}
+ };
+
+ BOOST_TEST_CHECKPOINT("Get Profiler");
+
+ runtime->GetProfiler(netId)->EnableProfiling(true);
+
+ BOOST_TEST_CHECKPOINT("Run Inference");
+ // Do the inference
+ runtime->EnqueueWorkload(netId, inputTensors, outputTensors);
+
+ BOOST_TEST_CHECKPOINT("Print Profiler");
+ // Retrieve the Profiler.Print() output to get the workload execution
+ ProfilerManager& profilerManager = armnn::ProfilerManager::GetInstance();
+ std::stringstream ss;
+ profilerManager.GetProfiler()->Print(ss);;
+ std::string dump = ss.str();
+
+ // Check there are no SyncMemGeneric workloads as we didn't export
+ BOOST_TEST_CHECKPOINT("Find SyncMemGeneric");
+ int count = SubStringCounter(dump, "SyncMemGeneric");
+ BOOST_TEST(count == 0);
+
+ // Should only be 1 CopyMemGeneric for the output as we imported
+ BOOST_TEST_CHECKPOINT("Find CopyMemGeneric");
+ count = SubStringCounter(dump, "CopyMemGeneric");
+ BOOST_TEST(count == 1);
+
+ // Check the output is correct
+ BOOST_CHECK_EQUAL_COLLECTIONS(outputData.begin(), outputData.end(), expectedOutput.begin(), expectedOutput.end());
+}
+
BOOST_AUTO_TEST_SUITE_END()
diff --git a/src/backends/reference/RefTensorHandleFactory.cpp b/src/backends/reference/RefTensorHandleFactory.cpp
index c97a779cb3..089f5e3325 100644
--- a/src/backends/reference/RefTensorHandleFactory.cpp
+++ b/src/backends/reference/RefTensorHandleFactory.cpp
@@ -27,15 +27,18 @@ std::unique_ptr<ITensorHandle> RefTensorHandleFactory::CreateSubTensorHandle(ITe
return nullptr;
}
-std::unique_ptr<ITensorHandle> RefTensorHandleFactory::CreateTensorHandle(const TensorInfo& tensorInfo) const
+std::unique_ptr<ITensorHandle> RefTensorHandleFactory::CreateTensorHandle(const TensorInfo& tensorInfo,
+ const bool IsMemoryManaged) const
{
+ boost::ignore_unused(IsMemoryManaged);
return std::make_unique<RefTensorHandle>(tensorInfo, m_MemoryManager, m_ImportFlags);
}
std::unique_ptr<ITensorHandle> RefTensorHandleFactory::CreateTensorHandle(const TensorInfo& tensorInfo,
- DataLayout dataLayout) const
+ DataLayout dataLayout,
+ const bool IsMemoryManaged) const
{
- boost::ignore_unused(dataLayout);
+ boost::ignore_unused(dataLayout, IsMemoryManaged);
return std::make_unique<RefTensorHandle>(tensorInfo, m_MemoryManager, m_ImportFlags);
}
diff --git a/src/backends/reference/RefTensorHandleFactory.hpp b/src/backends/reference/RefTensorHandleFactory.hpp
index 220e6fd0de..ca6af72f71 100644
--- a/src/backends/reference/RefTensorHandleFactory.hpp
+++ b/src/backends/reference/RefTensorHandleFactory.hpp
@@ -28,10 +28,12 @@ public:
TensorShape const& subTensorShape,
unsigned int const* subTensorOrigin) const override;
- std::unique_ptr<ITensorHandle> CreateTensorHandle(const TensorInfo& tensorInfo) const override;
+ std::unique_ptr<ITensorHandle> CreateTensorHandle(const TensorInfo& tensorInfo,
+ const bool IsMemoryManaged = true) const override;
std::unique_ptr<ITensorHandle> CreateTensorHandle(const TensorInfo& tensorInfo,
- DataLayout dataLayout) const override;
+ DataLayout dataLayout,
+ const bool IsMemoryManaged = true) const override;
static const FactoryId& GetIdStatic();
diff --git a/src/backends/reference/RefWorkloadFactory.cpp b/src/backends/reference/RefWorkloadFactory.cpp
index 346fd691f2..480b7e24ac 100644
--- a/src/backends/reference/RefWorkloadFactory.cpp
+++ b/src/backends/reference/RefWorkloadFactory.cpp
@@ -78,14 +78,20 @@ bool RefWorkloadFactory::IsLayerSupported(const Layer& layer,
return IWorkloadFactory::IsLayerSupported(s_Id, layer, dataType, outReasonIfUnsupported);
}
-std::unique_ptr<ITensorHandle> RefWorkloadFactory::CreateTensorHandle(const TensorInfo& tensorInfo) const
+std::unique_ptr<ITensorHandle> RefWorkloadFactory::CreateTensorHandle(const TensorInfo& tensorInfo,
+ const bool IsMemoryManaged) const
{
+ // For Ref it is okay to make the TensorHandle memory managed as it can also store a pointer
+ // to unmanaged memory. This also ensures memory alignment.
return std::make_unique<RefTensorHandle>(tensorInfo, m_MemoryManager);
}
std::unique_ptr<ITensorHandle> RefWorkloadFactory::CreateTensorHandle(const TensorInfo& tensorInfo,
- DataLayout dataLayout) const
+ DataLayout dataLayout,
+ const bool IsMemoryManaged) const
{
+ // For Ref it is okay to make the TensorHandle memory managed as it can also store a pointer
+ // to unmanaged memory. This also ensures memory alignment.
return std::make_unique<RefTensorHandle>(tensorInfo, m_MemoryManager);
}
diff --git a/src/backends/reference/RefWorkloadFactory.hpp b/src/backends/reference/RefWorkloadFactory.hpp
index 606da82d32..033f81779d 100644
--- a/src/backends/reference/RefWorkloadFactory.hpp
+++ b/src/backends/reference/RefWorkloadFactory.hpp
@@ -53,10 +53,12 @@ public:
return nullptr;
}
- std::unique_ptr<ITensorHandle> CreateTensorHandle(const TensorInfo& tensorInfo) const override;
+ std::unique_ptr<ITensorHandle> CreateTensorHandle(const TensorInfo& tensorInfo,
+ const bool IsMemoryManaged = true) const override;
std::unique_ptr<ITensorHandle> CreateTensorHandle(const TensorInfo& tensorInfo,
- DataLayout dataLayout) const override;
+ DataLayout dataLayout,
+ const bool IsMemoryManaged = true) const override;
std::unique_ptr<IWorkload> CreateInput(const InputQueueDescriptor& descriptor,
const WorkloadInfo& info) const override;