diff options
author | David Monahan <david.monahan@arm.com> | 2019-08-20 11:25:29 +0100 |
---|---|---|
committer | David Monahan <david.monahan@arm.com> | 2019-09-24 10:50:30 +0000 |
commit | 3fb7e105ae62cbfb3ebf1edebb90e2b6672b22aa (patch) | |
tree | 47793a736e1fb53d51b4c0fd755f4e24f7f93d98 /src/backends | |
parent | 93667b1d7c361df68bdb1d733f17aba3ba34e046 (diff) | |
download | armnn-3fb7e105ae62cbfb3ebf1edebb90e2b6672b22aa.tar.gz |
IVGCVSW-3623 Implement NeonTensorHandle::Import
Signed-off-by: David Monahan <david.monahan@arm.com>
Change-Id: I7213788725fd4e4cf1176998604e999d0b7ed6cc
Diffstat (limited to 'src/backends')
19 files changed, 333 insertions, 84 deletions
diff --git a/src/backends/backendsCommon/ITensorHandleFactory.hpp b/src/backends/backendsCommon/ITensorHandleFactory.hpp index 26f6c9b9c0..c6deaef6bb 100644 --- a/src/backends/backendsCommon/ITensorHandleFactory.hpp +++ b/src/backends/backendsCommon/ITensorHandleFactory.hpp @@ -25,10 +25,12 @@ public: TensorShape const& subTensorShape, unsigned int const* subTensorOrigin) const = 0; - virtual std::unique_ptr<ITensorHandle> CreateTensorHandle(const TensorInfo& tensorInfo) const = 0; + virtual std::unique_ptr<ITensorHandle> CreateTensorHandle(const TensorInfo& tensorInfo, + const bool IsMemoryManaged = true) const = 0; virtual std::unique_ptr<ITensorHandle> CreateTensorHandle(const TensorInfo& tensorInfo, - DataLayout dataLayout) const = 0; + DataLayout dataLayout, + const bool IsMemoryManaged = true) const = 0; virtual const FactoryId& GetId() const = 0; diff --git a/src/backends/backendsCommon/OutputHandler.cpp b/src/backends/backendsCommon/OutputHandler.cpp index 8f4942d8ba..e3a1b276ea 100644 --- a/src/backends/backendsCommon/OutputHandler.cpp +++ b/src/backends/backendsCommon/OutputHandler.cpp @@ -22,14 +22,14 @@ void OutputHandler::SetTensorInfo(const TensorInfo& tensorInfo) m_bTensorInfoSet = true; } -void OutputHandler::CreateTensorHandles(const IWorkloadFactory& factory) +void OutputHandler::CreateTensorHandles(const IWorkloadFactory& factory, const bool IsMemoryManaged) { - m_TensorHandle = factory.CreateTensorHandle(m_TensorInfo); + m_TensorHandle = factory.CreateTensorHandle(m_TensorInfo, IsMemoryManaged); } -void OutputHandler::CreateTensorHandles(const ITensorHandleFactory& factory) +void OutputHandler::CreateTensorHandles(const ITensorHandleFactory& factory, const bool IsMemoryManaged) { - m_TensorHandle = factory.CreateTensorHandle(m_TensorInfo); + m_TensorHandle = factory.CreateTensorHandle(m_TensorInfo, IsMemoryManaged); } void OutputHandler::CollectWorkloadOutputs(WorkloadDataCollector& dataCollector) const diff --git a/src/backends/backendsCommon/OutputHandler.hpp b/src/backends/backendsCommon/OutputHandler.hpp index 87ced20706..81768c6f5e 100644 --- a/src/backends/backendsCommon/OutputHandler.hpp +++ b/src/backends/backendsCommon/OutputHandler.hpp @@ -36,8 +36,8 @@ public: /// @brief - Creates tensor handles used by the intermediate tensors. Does not allocate memory. /// @param factory - Factory to be used for handler creation. - void CreateTensorHandles(const IWorkloadFactory& factory); - void CreateTensorHandles(const ITensorHandleFactory& factory); + void CreateTensorHandles(const IWorkloadFactory& factory, const bool IsMemoryManaged = true); + void CreateTensorHandles(const ITensorHandleFactory& factory, const bool IsMemoryManaged = true); /// @brief - Gets the matching TensorInfo for the output. /// @return - References to the output TensorInfo. diff --git a/src/backends/backendsCommon/WorkloadFactory.hpp b/src/backends/backendsCommon/WorkloadFactory.hpp index 29ebe2af48..2809e2f9e8 100644 --- a/src/backends/backendsCommon/WorkloadFactory.hpp +++ b/src/backends/backendsCommon/WorkloadFactory.hpp @@ -44,10 +44,12 @@ public: virtual std::unique_ptr<IWorkload> CreateInput(const InputQueueDescriptor& descriptor, const WorkloadInfo& info) const = 0; - virtual std::unique_ptr<ITensorHandle> CreateTensorHandle(const TensorInfo& tensorInfo) const = 0; + virtual std::unique_ptr<ITensorHandle> CreateTensorHandle(const TensorInfo& tensorInfo, + const bool IsMemoryManaged = true) const = 0; virtual std::unique_ptr<ITensorHandle> CreateTensorHandle(const TensorInfo& tensorInfo, - DataLayout dataLayout) const = 0; + DataLayout dataLayout, + const bool IsMemoryManaged = true) const = 0; virtual std::unique_ptr<IWorkload> CreateAbs(const AbsQueueDescriptor& descriptor, const WorkloadInfo& info) const; diff --git a/src/backends/backendsCommon/test/EndToEndTestImpl.hpp b/src/backends/backendsCommon/test/EndToEndTestImpl.hpp index ecc8806626..3bdd48bcfa 100644 --- a/src/backends/backendsCommon/test/EndToEndTestImpl.hpp +++ b/src/backends/backendsCommon/test/EndToEndTestImpl.hpp @@ -185,40 +185,42 @@ inline void ImportNonAlignedInputPointerTest(std::vector<BackendId> backends) IConnectableLayer* input = net->AddInputLayer(0); - NormalizationDescriptor descriptor; - IConnectableLayer* norm = net->AddNormalizationLayer(descriptor); + ActivationDescriptor descriptor; + descriptor.m_Function = ActivationFunction::Square; + IConnectableLayer* pooling = net->AddActivationLayer(descriptor); IConnectableLayer* output = net->AddOutputLayer(0); - input->GetOutputSlot(0).Connect(norm->GetInputSlot(0)); - norm->GetOutputSlot(0).Connect(output->GetInputSlot(0)); + input->GetOutputSlot(0).Connect(pooling->GetInputSlot(0)); + pooling->GetOutputSlot(0).Connect(output->GetInputSlot(0)); - input->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 4, 1 }, DataType::Float32)); - norm->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 4, 1 }, DataType::Float32)); + input->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 1, 4 }, DataType::Float32)); + pooling->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 1, 4 }, DataType::Float32)); // Optimize the network IOptimizedNetworkPtr optNet = Optimize(*net, backends, runtime->GetDeviceSpec()); + BOOST_CHECK(optNet); // Loads it into the runtime. NetworkId netId; std::string ignoredErrorMessage; // Enable Importing - INetworkProperties networkProperties(true, true); + INetworkProperties networkProperties(true, false); runtime->LoadNetwork(netId, std::move(optNet), ignoredErrorMessage, networkProperties); // Creates structures for input & output std::vector<float> inputData { - 1.0f, 2.0f, 3.0f, 4.0f, 5.0f + 1.0f, 2.0f, 3.0f, 4.0f }; // Misaligned input float* misalignedInputData = reinterpret_cast<float*>(reinterpret_cast<char*>(inputData.data()) + 1); - std::vector<float> outputData(5); + std::vector<float> outputData(4); // Aligned output - float * alignedOutputData = outputData.data(); + float* alignedOutputData = outputData.data(); InputTensors inputTensors { @@ -229,8 +231,6 @@ inline void ImportNonAlignedInputPointerTest(std::vector<BackendId> backends) {0,armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), alignedOutputData)} }; - // The result of the inference is not important, just the fact that there - // should not be CopyMemGeneric workloads. runtime->GetProfiler(netId)->EnableProfiling(true); // Do the inference and expect it to fail with a ImportMemoryException @@ -250,24 +250,26 @@ inline void ImportNonAlignedOutputPointerTest(std::vector<BackendId> backends) IConnectableLayer* input = net->AddInputLayer(0); - NormalizationDescriptor descriptor; - IConnectableLayer* norm = net->AddNormalizationLayer(descriptor); + ActivationDescriptor descriptor; + descriptor.m_Function = ActivationFunction::Square; + IConnectableLayer* pooling = net->AddActivationLayer(descriptor); IConnectableLayer* output = net->AddOutputLayer(0); - input->GetOutputSlot(0).Connect(norm->GetInputSlot(0)); - norm->GetOutputSlot(0).Connect(output->GetInputSlot(0)); + input->GetOutputSlot(0).Connect(pooling->GetInputSlot(0)); + pooling->GetOutputSlot(0).Connect(output->GetInputSlot(0)); - input->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 4, 1 }, DataType::Float32)); - norm->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 4, 1 }, DataType::Float32)); + input->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 1, 4 }, DataType::Float32)); + pooling->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 1, 4 }, DataType::Float32)); // Optimize the network IOptimizedNetworkPtr optNet = Optimize(*net, backends, runtime->GetDeviceSpec()); + BOOST_CHECK(optNet); // Loads it into the runtime. NetworkId netId; std::string ignoredErrorMessage; - // Enable Importing + // Enable Importing and Exporting INetworkProperties networkProperties(true, true); runtime->LoadNetwork(netId, std::move(optNet), ignoredErrorMessage, networkProperties); @@ -278,7 +280,7 @@ inline void ImportNonAlignedOutputPointerTest(std::vector<BackendId> backends) }; // Aligned input - float * alignedInputData = inputData.data(); + float* alignedInputData = inputData.data(); std::vector<float> outputData(5); @@ -294,10 +296,6 @@ inline void ImportNonAlignedOutputPointerTest(std::vector<BackendId> backends) {0,armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), misalignedOutputData)} }; - // The result of the inference is not important, just the fact that there - // should not be CopyMemGeneric workloads. - runtime->GetProfiler(netId)->EnableProfiling(true); - // Do the inference and expect it to fail with a ImportMemoryException BOOST_CHECK_THROW(runtime->EnqueueWorkload(netId, inputTensors, outputTensors), MemoryExportException); } @@ -315,19 +313,21 @@ inline void ImportAlignedPointerTest(std::vector<BackendId> backends) IConnectableLayer* input = net->AddInputLayer(0); - NormalizationDescriptor descriptor; - IConnectableLayer* norm = net->AddNormalizationLayer(descriptor); + ActivationDescriptor descriptor; + descriptor.m_Function = ActivationFunction::Square; + IConnectableLayer* pooling = net->AddActivationLayer(descriptor); IConnectableLayer* output = net->AddOutputLayer(0); - input->GetOutputSlot(0).Connect(norm->GetInputSlot(0)); - norm->GetOutputSlot(0).Connect(output->GetInputSlot(0)); + input->GetOutputSlot(0).Connect(pooling->GetInputSlot(0)); + pooling->GetOutputSlot(0).Connect(output->GetInputSlot(0)); - input->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 4, 1 }, DataType::Float32)); - norm->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 4, 1 }, DataType::Float32)); + input->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 1, 4 }, DataType::Float32)); + pooling->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 1, 4 }, DataType::Float32)); // Optimize the network IOptimizedNetworkPtr optNet = Optimize(*net, backends, runtime->GetDeviceSpec()); + BOOST_CHECK(optNet); // Loads it into the runtime. NetworkId netId; @@ -366,8 +366,8 @@ inline void ImportAlignedPointerTest(std::vector<BackendId> backends) profilerManager.GetProfiler()->Print(ss);; std::string dump = ss.str(); - // Contains RefNormalizationWorkload - std::size_t found = dump.find("RefNormalizationWorkload"); + // Contains ActivationWorkload + std::size_t found = dump.find("ActivationWorkload"); BOOST_TEST(found != std::string::npos); // Contains SyncMemGeneric found = dump.find("SyncMemGeneric"); diff --git a/src/backends/cl/ClTensorHandleFactory.cpp b/src/backends/cl/ClTensorHandleFactory.cpp index 87ecdfe1ba..3d9908a1ac 100644 --- a/src/backends/cl/ClTensorHandleFactory.cpp +++ b/src/backends/cl/ClTensorHandleFactory.cpp @@ -45,20 +45,26 @@ std::unique_ptr<ITensorHandle> ClTensorHandleFactory::CreateSubTensorHandle(ITen boost::polymorphic_downcast<IClTensorHandle *>(&parent), shape, coords); } -std::unique_ptr<ITensorHandle> ClTensorHandleFactory::CreateTensorHandle(const TensorInfo& tensorInfo) const +std::unique_ptr<ITensorHandle> ClTensorHandleFactory::CreateTensorHandle(const TensorInfo& tensorInfo, + const bool IsMemoryManaged) const { std::unique_ptr<ClTensorHandle> tensorHandle = std::make_unique<ClTensorHandle>(tensorInfo); - tensorHandle->SetMemoryGroup(m_MemoryManager->GetInterLayerMemoryGroup()); - + if (IsMemoryManaged) + { + tensorHandle->SetMemoryGroup(m_MemoryManager->GetInterLayerMemoryGroup()); + } return tensorHandle; } std::unique_ptr<ITensorHandle> ClTensorHandleFactory::CreateTensorHandle(const TensorInfo& tensorInfo, - DataLayout dataLayout) const + DataLayout dataLayout, + const bool IsMemoryManaged) const { std::unique_ptr<ClTensorHandle> tensorHandle = std::make_unique<ClTensorHandle>(tensorInfo, dataLayout); - tensorHandle->SetMemoryGroup(m_MemoryManager->GetInterLayerMemoryGroup()); - + if (IsMemoryManaged) + { + tensorHandle->SetMemoryGroup(m_MemoryManager->GetInterLayerMemoryGroup()); + } return tensorHandle; } diff --git a/src/backends/cl/ClTensorHandleFactory.hpp b/src/backends/cl/ClTensorHandleFactory.hpp index 7c3b49bee5..ea3728f7f7 100644 --- a/src/backends/cl/ClTensorHandleFactory.hpp +++ b/src/backends/cl/ClTensorHandleFactory.hpp @@ -28,10 +28,12 @@ public: const TensorShape& subTensorShape, const unsigned int* subTensorOrigin) const override; - std::unique_ptr<ITensorHandle> CreateTensorHandle(const TensorInfo& tensorInfo) const override; + std::unique_ptr<ITensorHandle> CreateTensorHandle(const TensorInfo& tensorInfo, + const bool IsMemoryManaged = true) const override; std::unique_ptr<ITensorHandle> CreateTensorHandle(const TensorInfo& tensorInfo, - DataLayout dataLayout) const override; + DataLayout dataLayout, + const bool IsMemoryManaged = true) const override; static const FactoryId& GetIdStatic(); diff --git a/src/backends/cl/ClWorkloadFactory.cpp b/src/backends/cl/ClWorkloadFactory.cpp index 8210be251c..536d4dd058 100644 --- a/src/backends/cl/ClWorkloadFactory.cpp +++ b/src/backends/cl/ClWorkloadFactory.cpp @@ -82,7 +82,8 @@ ClWorkloadFactory::ClWorkloadFactory(const std::shared_ptr<ClMemoryManager>& mem { } -std::unique_ptr<ITensorHandle> ClWorkloadFactory::CreateTensorHandle(const TensorInfo& tensorInfo) const +std::unique_ptr<ITensorHandle> ClWorkloadFactory::CreateTensorHandle(const TensorInfo& tensorInfo, + const bool IsMemoryManaged) const { std::unique_ptr<ClTensorHandle> tensorHandle = std::make_unique<ClTensorHandle>(tensorInfo); tensorHandle->SetMemoryGroup(m_MemoryManager->GetInterLayerMemoryGroup()); @@ -91,7 +92,8 @@ std::unique_ptr<ITensorHandle> ClWorkloadFactory::CreateTensorHandle(const Tenso } std::unique_ptr<ITensorHandle> ClWorkloadFactory::CreateTensorHandle(const TensorInfo& tensorInfo, - DataLayout dataLayout) const + DataLayout dataLayout, + const bool IsMemoryManaged) const { std::unique_ptr<ClTensorHandle> tensorHandle = std::make_unique<ClTensorHandle>(tensorInfo, dataLayout); tensorHandle->SetMemoryGroup(m_MemoryManager->GetInterLayerMemoryGroup()); diff --git a/src/backends/cl/ClWorkloadFactory.hpp b/src/backends/cl/ClWorkloadFactory.hpp index 493f659eab..c8d58dbb5c 100644 --- a/src/backends/cl/ClWorkloadFactory.hpp +++ b/src/backends/cl/ClWorkloadFactory.hpp @@ -31,10 +31,12 @@ public: TensorShape const& subTensorShape, unsigned int const* subTensorOrigin) const override; - std::unique_ptr<ITensorHandle> CreateTensorHandle(const TensorInfo& tensorInfo) const override; + std::unique_ptr<ITensorHandle> CreateTensorHandle(const TensorInfo& tensorInfo, + const bool IsMemoryManaged = true) const override; std::unique_ptr<ITensorHandle> CreateTensorHandle(const TensorInfo& tensorInfo, - DataLayout dataLayout) const override; + DataLayout dataLayout, + const bool IsMemoryManaged = true) const override; std::unique_ptr<IWorkload> CreateInput(const InputQueueDescriptor& descriptor, const WorkloadInfo& info) const override; diff --git a/src/backends/neon/NeonTensorHandle.hpp b/src/backends/neon/NeonTensorHandle.hpp index 9077f34888..c3662c1211 100644 --- a/src/backends/neon/NeonTensorHandle.hpp +++ b/src/backends/neon/NeonTensorHandle.hpp @@ -24,11 +24,20 @@ class NeonTensorHandle : public IAclTensorHandle { public: NeonTensorHandle(const TensorInfo& tensorInfo) + : m_ImportFlags(static_cast<MemorySourceFlags>(MemorySource::Malloc)), + m_Imported(false), + m_IsImportEnabled(false) { armnn::armcomputetensorutils::BuildArmComputeTensor(m_Tensor, tensorInfo); } - NeonTensorHandle(const TensorInfo& tensorInfo, DataLayout dataLayout) + NeonTensorHandle(const TensorInfo& tensorInfo, + DataLayout dataLayout, + MemorySourceFlags importFlags = static_cast<MemorySourceFlags>(MemorySource::Malloc)) + : m_ImportFlags(importFlags), + m_Imported(false), + m_IsImportEnabled(false) + { armnn::armcomputetensorutils::BuildArmComputeTensor(m_Tensor, tensorInfo, dataLayout); } @@ -38,13 +47,21 @@ public: virtual void Allocate() override { - armnn::armcomputetensorutils::InitialiseArmComputeTensorEmpty(m_Tensor); + // If we have enabled Importing, don't Allocate the tensor + if (!m_IsImportEnabled) + { + armnn::armcomputetensorutils::InitialiseArmComputeTensorEmpty(m_Tensor); + } }; virtual void Manage() override { - BOOST_ASSERT(m_MemoryGroup != nullptr); - m_MemoryGroup->manage(&m_Tensor); + // If we have enabled Importing, don't manage the tensor + if (!m_IsImportEnabled) + { + BOOST_ASSERT(m_MemoryGroup != nullptr); + m_MemoryGroup->manage(&m_Tensor); + } } virtual ITensorHandle* GetParent() const override { return nullptr; } @@ -63,8 +80,8 @@ public: { return static_cast<const void*>(m_Tensor.buffer() + m_Tensor.info()->offset_first_element_in_bytes()); } - virtual void Unmap() const override {} + virtual void Unmap() const override {} TensorShape GetStrides() const override { @@ -76,6 +93,73 @@ public: return armcomputetensorutils::GetShape(m_Tensor.info()->tensor_shape()); } + void SetImportFlags(MemorySourceFlags importFlags) + { + m_ImportFlags = importFlags; + } + + MemorySourceFlags GetImportFlags() const override + { + return m_ImportFlags; + } + + void SetImportEnabledFlag(bool importEnabledFlag) + { + m_IsImportEnabled = importEnabledFlag; + } + + virtual bool Import(void* memory, MemorySource source) override + { + if (m_ImportFlags & static_cast<MemorySourceFlags>(source)) + { + if (source == MemorySource::Malloc && m_IsImportEnabled) + { + // Checks the 16 byte memory alignment + constexpr uintptr_t alignment = sizeof(size_t); + if (reinterpret_cast<uintptr_t>(memory) % alignment) + { + throw MemoryImportException("NeonTensorHandle::Import Attempting to import unaligned memory"); + } + + // m_Tensor not yet Allocated + if (!m_Imported && !m_Tensor.buffer()) + { + arm_compute::Status status = m_Tensor.allocator()->import_memory(memory); + // Use the overloaded bool operator of Status to check if it worked, if not throw an exception + // with the Status error message + m_Imported = bool(status); + if (!m_Imported) + { + throw MemoryImportException(status.error_description()); + } + return m_Imported; + } + + // m_Tensor.buffer() initially allocated with Allocate(). + if (!m_Imported && m_Tensor.buffer()) + { + throw MemoryImportException( + "NeonTensorHandle::Import Attempting to import on an already allocated tensor"); + } + + // m_Tensor.buffer() previously imported. + if (m_Imported) + { + arm_compute::Status status = m_Tensor.allocator()->import_memory(memory); + // Use the overloaded bool operator of Status to check if it worked, if not throw an exception + // with the Status error message + m_Imported = bool(status); + if (!m_Imported) + { + throw MemoryImportException(status.error_description()); + } + return m_Imported; + } + } + } + return false; + } + private: // Only used for testing void CopyOutTo(void* memory) const override @@ -131,6 +215,9 @@ private: arm_compute::Tensor m_Tensor; std::shared_ptr<arm_compute::MemoryGroup> m_MemoryGroup; + MemorySourceFlags m_ImportFlags; + bool m_Imported; + bool m_IsImportEnabled; }; class NeonSubTensorHandle : public IAclTensorHandle diff --git a/src/backends/neon/NeonTensorHandleFactory.cpp b/src/backends/neon/NeonTensorHandleFactory.cpp index ff4e238cd8..8296b8315c 100644 --- a/src/backends/neon/NeonTensorHandleFactory.cpp +++ b/src/backends/neon/NeonTensorHandleFactory.cpp @@ -39,19 +39,33 @@ std::unique_ptr<ITensorHandle> NeonTensorHandleFactory::CreateSubTensorHandle(IT boost::polymorphic_downcast<IAclTensorHandle*>(&parent), shape, coords); } -std::unique_ptr<ITensorHandle> NeonTensorHandleFactory::CreateTensorHandle(const TensorInfo& tensorInfo) const +std::unique_ptr<ITensorHandle> NeonTensorHandleFactory::CreateTensorHandle(const TensorInfo& tensorInfo, + const bool IsMemoryManaged) const { auto tensorHandle = std::make_unique<NeonTensorHandle>(tensorInfo); - tensorHandle->SetMemoryGroup(m_MemoryManager->GetInterLayerMemoryGroup()); + if (IsMemoryManaged) + { + tensorHandle->SetMemoryGroup(m_MemoryManager->GetInterLayerMemoryGroup()); + } + // If we are not Managing the Memory then we must be importing + tensorHandle->SetImportEnabledFlag(!IsMemoryManaged); + tensorHandle->SetImportFlags(m_ImportFlags); return tensorHandle; } std::unique_ptr<ITensorHandle> NeonTensorHandleFactory::CreateTensorHandle(const TensorInfo& tensorInfo, - DataLayout dataLayout) const + DataLayout dataLayout, + const bool IsMemoryManaged) const { auto tensorHandle = std::make_unique<NeonTensorHandle>(tensorInfo, dataLayout); - tensorHandle->SetMemoryGroup(m_MemoryManager->GetInterLayerMemoryGroup()); + if (IsMemoryManaged) + { + tensorHandle->SetMemoryGroup(m_MemoryManager->GetInterLayerMemoryGroup()); + } + // If we are not Managing the Memory then we must be importing + tensorHandle->SetImportEnabledFlag(!IsMemoryManaged); + tensorHandle->SetImportFlags(m_ImportFlags); return tensorHandle; } diff --git a/src/backends/neon/NeonTensorHandleFactory.hpp b/src/backends/neon/NeonTensorHandleFactory.hpp index 82e388e629..b03433352e 100644 --- a/src/backends/neon/NeonTensorHandleFactory.hpp +++ b/src/backends/neon/NeonTensorHandleFactory.hpp @@ -18,18 +18,20 @@ class NeonTensorHandleFactory : public ITensorHandleFactory public: NeonTensorHandleFactory(std::weak_ptr<NeonMemoryManager> mgr) : m_MemoryManager(mgr), - m_ImportFlags(static_cast<MemorySourceFlags>(MemorySource::Undefined)), - m_ExportFlags(static_cast<MemorySourceFlags>(MemorySource::Undefined)) + m_ImportFlags(static_cast<MemorySourceFlags>(MemorySource::Malloc)), + m_ExportFlags(static_cast<MemorySourceFlags>(MemorySource::Malloc)) {} std::unique_ptr<ITensorHandle> CreateSubTensorHandle(ITensorHandle& parent, const TensorShape& subTensorShape, const unsigned int* subTensorOrigin) const override; - std::unique_ptr<ITensorHandle> CreateTensorHandle(const TensorInfo& tensorInfo) const override; + std::unique_ptr<ITensorHandle> CreateTensorHandle(const TensorInfo& tensorInfo, + const bool IsMemoryManaged = true) const override; std::unique_ptr<ITensorHandle> CreateTensorHandle(const TensorInfo& tensorInfo, - DataLayout dataLayout) const override; + DataLayout dataLayout, + const bool IsMemoryManaged = true) const override; static const FactoryId& GetIdStatic(); diff --git a/src/backends/neon/NeonWorkloadFactory.cpp b/src/backends/neon/NeonWorkloadFactory.cpp index 441e27f5cf..fb81008644 100644 --- a/src/backends/neon/NeonWorkloadFactory.cpp +++ b/src/backends/neon/NeonWorkloadFactory.cpp @@ -72,20 +72,26 @@ std::unique_ptr<ITensorHandle> NeonWorkloadFactory::CreateSubTensorHandle(ITenso boost::polymorphic_downcast<IAclTensorHandle*>(&parent), shape, coords); } -std::unique_ptr<ITensorHandle> NeonWorkloadFactory::CreateTensorHandle(const TensorInfo& tensorInfo) const +std::unique_ptr<ITensorHandle> NeonWorkloadFactory::CreateTensorHandle(const TensorInfo& tensorInfo, + const bool IsMemoryManaged) const { auto tensorHandle = std::make_unique<NeonTensorHandle>(tensorInfo); - tensorHandle->SetMemoryGroup(m_MemoryManager->GetInterLayerMemoryGroup()); - + if (IsMemoryManaged) + { + tensorHandle->SetMemoryGroup(m_MemoryManager->GetInterLayerMemoryGroup()); + } return tensorHandle; } std::unique_ptr<ITensorHandle> NeonWorkloadFactory::CreateTensorHandle(const TensorInfo& tensorInfo, - DataLayout dataLayout) const + DataLayout dataLayout, + const bool IsMemoryManaged) const { auto tensorHandle = std::make_unique<NeonTensorHandle>(tensorInfo, dataLayout); - tensorHandle->SetMemoryGroup(m_MemoryManager->GetInterLayerMemoryGroup()); - + if (IsMemoryManaged) + { + tensorHandle->SetMemoryGroup(m_MemoryManager->GetInterLayerMemoryGroup()); + } return tensorHandle; } diff --git a/src/backends/neon/NeonWorkloadFactory.hpp b/src/backends/neon/NeonWorkloadFactory.hpp index 935f9e4b4c..0bee365d8a 100644 --- a/src/backends/neon/NeonWorkloadFactory.hpp +++ b/src/backends/neon/NeonWorkloadFactory.hpp @@ -32,10 +32,12 @@ public: TensorShape const& subTensorShape, unsigned int const* subTensorOrigin) const override; - std::unique_ptr<ITensorHandle> CreateTensorHandle(const TensorInfo& tensorInfo) const override; + std::unique_ptr<ITensorHandle> CreateTensorHandle(const TensorInfo& tensorInfo, + const bool IsMemoryManaged = true) const override; std::unique_ptr<ITensorHandle> CreateTensorHandle(const TensorInfo& tensorInfo, - DataLayout dataLayout) const override; + DataLayout dataLayout, + const bool IsMemoryManaged = true) const override; std::unique_ptr<IWorkload> CreateInput(const InputQueueDescriptor& descriptor, const WorkloadInfo& info) const override; diff --git a/src/backends/neon/test/NeonEndToEndTests.cpp b/src/backends/neon/test/NeonEndToEndTests.cpp index eb41e949b3..a09b95ed0e 100644 --- a/src/backends/neon/test/NeonEndToEndTests.cpp +++ b/src/backends/neon/test/NeonEndToEndTests.cpp @@ -312,4 +312,113 @@ BOOST_AUTO_TEST_CASE(NeonTransposeConvolution2dEndToEndUint8NhwcTest) defaultBackends, armnn::DataLayout::NHWC); } +BOOST_AUTO_TEST_CASE(NeonImportNonAlignedInputPointerTest) +{ + ImportNonAlignedInputPointerTest(defaultBackends); +} + +// Utility function to find the number of instances of a substring within a string. +int SubStringCounter(std::string& string, std::string&& substring) +{ + std::size_t found = 0; + int count = 0; + // Look for the substring starting from where we last found the substring + while((found = string.find(substring, found)) != std::string::npos) + { + count++; + // Offset by substring length to avoid finding the same substring twice + found += substring.length(); + } + return count; +} + +BOOST_AUTO_TEST_CASE(NeonImportOnlyWorkload) +{ + using namespace armnn; + + IRuntime::CreationOptions options; + IRuntimePtr runtime(IRuntime::Create(options)); + + // Builds up the structure of the network. + INetworkPtr net(INetwork::Create()); + + IConnectableLayer* input = net->AddInputLayer(0); + + ActivationDescriptor descriptor; + descriptor.m_Function = ActivationFunction::Square; + IConnectableLayer* pooling = net->AddActivationLayer(descriptor); + + IConnectableLayer* output = net->AddOutputLayer(0); + + input->GetOutputSlot(0).Connect(pooling->GetInputSlot(0)); + pooling->GetOutputSlot(0).Connect(output->GetInputSlot(0)); + + input->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 1, 4 }, DataType::Float32)); + pooling->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 1, 4 }, DataType::Float32)); + + // optimize the network + std::vector<BackendId> backends = {Compute::CpuAcc}; + IOptimizedNetworkPtr optNet = Optimize(*net, backends, runtime->GetDeviceSpec()); + + BOOST_TEST_CHECKPOINT("Load Network"); + // Load it into the runtime. It should pass. + NetworkId netId; + std::string ignoredErrorMessage; + INetworkProperties networkProperties(true, false); + BOOST_TEST(runtime->LoadNetwork(netId, std::move(optNet),ignoredErrorMessage, networkProperties) + == Status::Success); + + BOOST_TEST_CHECKPOINT("Generate Data"); + // Creates structures for input & output + std::vector<float> inputData + { + 1.0f, 2.0f, 3.0f, 4.0f + }; + + std::vector<float> outputData(4); + + std::vector<float> expectedOutput + { + 1.0f, 4.0f, 9.0f, 16.0f + }; + + BOOST_TEST_CHECKPOINT("Create Network"); + InputTensors inputTensors + { + {0,armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 0), inputData.data())}, + }; + OutputTensors outputTensors + { + {0,armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), outputData.data())} + }; + + BOOST_TEST_CHECKPOINT("Get Profiler"); + + runtime->GetProfiler(netId)->EnableProfiling(true); + + BOOST_TEST_CHECKPOINT("Run Inference"); + // Do the inference + runtime->EnqueueWorkload(netId, inputTensors, outputTensors); + + BOOST_TEST_CHECKPOINT("Print Profiler"); + // Retrieve the Profiler.Print() output to get the workload execution + ProfilerManager& profilerManager = armnn::ProfilerManager::GetInstance(); + std::stringstream ss; + profilerManager.GetProfiler()->Print(ss);; + std::string dump = ss.str(); + + // Check there are no SyncMemGeneric workloads as we didn't export + BOOST_TEST_CHECKPOINT("Find SyncMemGeneric"); + int count = SubStringCounter(dump, "SyncMemGeneric"); + BOOST_TEST(count == 0); + + // Should only be 1 CopyMemGeneric for the output as we imported + BOOST_TEST_CHECKPOINT("Find CopyMemGeneric"); + count = SubStringCounter(dump, "CopyMemGeneric"); + BOOST_TEST(count == 1); + + // Check the output is correct + BOOST_CHECK_EQUAL_COLLECTIONS(outputData.begin(), outputData.end(), expectedOutput.begin(), expectedOutput.end()); +} + BOOST_AUTO_TEST_SUITE_END() diff --git a/src/backends/reference/RefTensorHandleFactory.cpp b/src/backends/reference/RefTensorHandleFactory.cpp index c97a779cb3..089f5e3325 100644 --- a/src/backends/reference/RefTensorHandleFactory.cpp +++ b/src/backends/reference/RefTensorHandleFactory.cpp @@ -27,15 +27,18 @@ std::unique_ptr<ITensorHandle> RefTensorHandleFactory::CreateSubTensorHandle(ITe return nullptr; } -std::unique_ptr<ITensorHandle> RefTensorHandleFactory::CreateTensorHandle(const TensorInfo& tensorInfo) const +std::unique_ptr<ITensorHandle> RefTensorHandleFactory::CreateTensorHandle(const TensorInfo& tensorInfo, + const bool IsMemoryManaged) const { + boost::ignore_unused(IsMemoryManaged); return std::make_unique<RefTensorHandle>(tensorInfo, m_MemoryManager, m_ImportFlags); } std::unique_ptr<ITensorHandle> RefTensorHandleFactory::CreateTensorHandle(const TensorInfo& tensorInfo, - DataLayout dataLayout) const + DataLayout dataLayout, + const bool IsMemoryManaged) const { - boost::ignore_unused(dataLayout); + boost::ignore_unused(dataLayout, IsMemoryManaged); return std::make_unique<RefTensorHandle>(tensorInfo, m_MemoryManager, m_ImportFlags); } diff --git a/src/backends/reference/RefTensorHandleFactory.hpp b/src/backends/reference/RefTensorHandleFactory.hpp index 220e6fd0de..ca6af72f71 100644 --- a/src/backends/reference/RefTensorHandleFactory.hpp +++ b/src/backends/reference/RefTensorHandleFactory.hpp @@ -28,10 +28,12 @@ public: TensorShape const& subTensorShape, unsigned int const* subTensorOrigin) const override; - std::unique_ptr<ITensorHandle> CreateTensorHandle(const TensorInfo& tensorInfo) const override; + std::unique_ptr<ITensorHandle> CreateTensorHandle(const TensorInfo& tensorInfo, + const bool IsMemoryManaged = true) const override; std::unique_ptr<ITensorHandle> CreateTensorHandle(const TensorInfo& tensorInfo, - DataLayout dataLayout) const override; + DataLayout dataLayout, + const bool IsMemoryManaged = true) const override; static const FactoryId& GetIdStatic(); diff --git a/src/backends/reference/RefWorkloadFactory.cpp b/src/backends/reference/RefWorkloadFactory.cpp index 346fd691f2..480b7e24ac 100644 --- a/src/backends/reference/RefWorkloadFactory.cpp +++ b/src/backends/reference/RefWorkloadFactory.cpp @@ -78,14 +78,20 @@ bool RefWorkloadFactory::IsLayerSupported(const Layer& layer, return IWorkloadFactory::IsLayerSupported(s_Id, layer, dataType, outReasonIfUnsupported); } -std::unique_ptr<ITensorHandle> RefWorkloadFactory::CreateTensorHandle(const TensorInfo& tensorInfo) const +std::unique_ptr<ITensorHandle> RefWorkloadFactory::CreateTensorHandle(const TensorInfo& tensorInfo, + const bool IsMemoryManaged) const { + // For Ref it is okay to make the TensorHandle memory managed as it can also store a pointer + // to unmanaged memory. This also ensures memory alignment. return std::make_unique<RefTensorHandle>(tensorInfo, m_MemoryManager); } std::unique_ptr<ITensorHandle> RefWorkloadFactory::CreateTensorHandle(const TensorInfo& tensorInfo, - DataLayout dataLayout) const + DataLayout dataLayout, + const bool IsMemoryManaged) const { + // For Ref it is okay to make the TensorHandle memory managed as it can also store a pointer + // to unmanaged memory. This also ensures memory alignment. return std::make_unique<RefTensorHandle>(tensorInfo, m_MemoryManager); } diff --git a/src/backends/reference/RefWorkloadFactory.hpp b/src/backends/reference/RefWorkloadFactory.hpp index 606da82d32..033f81779d 100644 --- a/src/backends/reference/RefWorkloadFactory.hpp +++ b/src/backends/reference/RefWorkloadFactory.hpp @@ -53,10 +53,12 @@ public: return nullptr; } - std::unique_ptr<ITensorHandle> CreateTensorHandle(const TensorInfo& tensorInfo) const override; + std::unique_ptr<ITensorHandle> CreateTensorHandle(const TensorInfo& tensorInfo, + const bool IsMemoryManaged = true) const override; std::unique_ptr<ITensorHandle> CreateTensorHandle(const TensorInfo& tensorInfo, - DataLayout dataLayout) const override; + DataLayout dataLayout, + const bool IsMemoryManaged = true) const override; std::unique_ptr<IWorkload> CreateInput(const InputQueueDescriptor& descriptor, const WorkloadInfo& info) const override; |