From b8d771ac2e6f847a64b3b203591c5b1c3e198d3a Mon Sep 17 00:00:00 2001 From: Narumol Prangnawarat Date: Fri, 14 Aug 2020 11:51:12 +0100 Subject: IVGCVSW-5012 Enable zero copy for Neon * Allow memory import if padding is not required in Neon * AddMockImportBackend for fallback tests * Refactor GraphUtils * Memory import unit tests * Fallback unit tests Signed-off-by: Narumol Prangnawarat Change-Id: Ic2e141e12774bf6d915e77745b6f6d2d83d9b82d --- src/armnn/LoadedNetwork.cpp | 1 + src/armnn/Network.cpp | 10 +- src/armnn/test/GraphTests.cpp | 13 - src/armnn/test/GraphUtils.cpp | 13 + src/armnn/test/GraphUtils.hpp | 2 + src/backends/backendsCommon/test/CMakeLists.txt | 3 + .../test/mockBackend/MockImportBackend.cpp | 115 +++++ .../test/mockBackend/MockImportBackend.hpp | 53 ++ .../test/mockBackend/MockImportLayerSupport.hpp | 46 ++ src/backends/neon/NeonTensorHandleFactory.cpp | 4 +- src/backends/neon/NeonTensorHandleFactory.hpp | 6 +- src/backends/neon/test/CMakeLists.txt | 1 + src/backends/neon/test/NeonEndToEndTests.cpp | 10 +- src/backends/neon/test/NeonFallbackTests.cpp | 547 +++++++++++++++++++++ src/backends/neon/test/NeonTensorHandleTests.cpp | 74 +++ 15 files changed, 876 insertions(+), 22 deletions(-) create mode 100644 src/backends/backendsCommon/test/mockBackend/MockImportBackend.cpp create mode 100644 src/backends/backendsCommon/test/mockBackend/MockImportBackend.hpp create mode 100644 src/backends/backendsCommon/test/mockBackend/MockImportLayerSupport.hpp create mode 100644 src/backends/neon/test/NeonFallbackTests.cpp diff --git a/src/armnn/LoadedNetwork.cpp b/src/armnn/LoadedNetwork.cpp index 593539d3ee..4a293b92d9 100644 --- a/src/armnn/LoadedNetwork.cpp +++ b/src/armnn/LoadedNetwork.cpp @@ -168,6 +168,7 @@ LoadedNetwork::LoadedNetwork(std::unique_ptr net, switch (layer->GetType()) { case LayerType::Input: + case LayerType::MemImport: { // If IsImportEnabled is true then we need to set IsMemoryManaged to false when creating TensorHandles layer->CreateTensorHandles(m_TensorHandleFactoryRegistry, workloadFactory, !m_IsImportEnabled); diff --git a/src/armnn/Network.cpp b/src/armnn/Network.cpp index 132924a19a..94a9961a81 100644 --- a/src/armnn/Network.cpp +++ b/src/armnn/Network.cpp @@ -912,7 +912,15 @@ EdgeStrategy CalculateEdgeStrategy(BackendsMap& backends, if ((dstFactory->GetImportFlags() & srcFactory->GetExportFlags()) != 0) { - return EdgeStrategy::ExportToTarget; + auto srcCapability = srcFactory->GetCapabilities(&layer, &layer, CapabilityClass::PaddingRequired); + auto dstCapability = dstFactory->GetCapabilities(&connectedLayer, + &connectedLayer, + CapabilityClass::PaddingRequired); + // Do not require memory copy if the source and destination do not require padding. + if (srcCapability.empty() && dstCapability.empty()) + { + return EdgeStrategy::ExportToTarget; + } } } } diff --git a/src/armnn/test/GraphTests.cpp b/src/armnn/test/GraphTests.cpp index a3c42b6ce7..5a17c1c227 100644 --- a/src/armnn/test/GraphTests.cpp +++ b/src/armnn/test/GraphTests.cpp @@ -19,19 +19,6 @@ #include #include -/// Checks that first comes before second in the order. -bool CheckOrder(const armnn::Graph& graph, const armnn::Layer* first, const armnn::Layer* second) -{ - graph.Print(); - - const auto& order = graph.TopologicalSort(); - - auto firstPos = std::find(order.begin(), order.end(), first); - auto secondPos = std::find(firstPos, order.end(), second); - - return (secondPos != order.end()); -} - BOOST_AUTO_TEST_SUITE(Graph) BOOST_AUTO_TEST_CASE(ClassGraph) diff --git a/src/armnn/test/GraphUtils.cpp b/src/armnn/test/GraphUtils.cpp index 36db900a2d..bc6b562c9d 100644 --- a/src/armnn/test/GraphUtils.cpp +++ b/src/armnn/test/GraphUtils.cpp @@ -63,3 +63,16 @@ bool IsConnected(armnn::Layer* srcLayer, armnn::Layer* destLayer, } return false; } + +/// Checks that first comes before second in the order. +bool CheckOrder(const armnn::Graph& graph, const armnn::Layer* first, const armnn::Layer* second) +{ + graph.Print(); + + const auto& order = graph.TopologicalSort(); + + auto firstPos = std::find(order.begin(), order.end(), first); + auto secondPos = std::find(firstPos, order.end(), second); + + return (secondPos != order.end()); +} diff --git a/src/armnn/test/GraphUtils.hpp b/src/armnn/test/GraphUtils.hpp index b51e4d179e..60d03dca23 100644 --- a/src/armnn/test/GraphUtils.hpp +++ b/src/armnn/test/GraphUtils.hpp @@ -21,3 +21,5 @@ bool IsConnected(armnn::Layer* srcLayer, armnn::Layer* destLayer, unsigned int srcSlot, unsigned int destSlot, const armnn::TensorInfo& expectedTensorInfo); +bool CheckOrder(const armnn::Graph& graph, const armnn::Layer* first, const armnn::Layer* second); + diff --git a/src/backends/backendsCommon/test/CMakeLists.txt b/src/backends/backendsCommon/test/CMakeLists.txt index f87a69bc34..ccbfdc6201 100644 --- a/src/backends/backendsCommon/test/CMakeLists.txt +++ b/src/backends/backendsCommon/test/CMakeLists.txt @@ -159,6 +159,9 @@ list(APPEND armnnBackendsCommonUnitTests_sources layerTests/SubtractionTestImpl.hpp layerTests/TransposeConvolution2dTestImpl.cpp layerTests/TransposeConvolution2dTestImpl.hpp + mockBackend/MockImportBackend.cpp + mockBackend/MockImportBackend.hpp + mockBackend/MockImportLayerSupport.hpp ) if (ARMNNREF) diff --git a/src/backends/backendsCommon/test/mockBackend/MockImportBackend.cpp b/src/backends/backendsCommon/test/mockBackend/MockImportBackend.cpp new file mode 100644 index 0000000000..ebe94348fc --- /dev/null +++ b/src/backends/backendsCommon/test/mockBackend/MockImportBackend.cpp @@ -0,0 +1,115 @@ +// +// Copyright © 2020 Arm Ltd and Contributors. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "MockImportBackend.hpp" +#include "MockImportLayerSupport.hpp" + +#include +#include +#include +#include + +#include +#include + +#include + +namespace armnn +{ + +MockImportBackendInitialiser::MockImportBackendInitialiser() +{ + BackendRegistryInstance().Register(MockImportBackend::GetIdStatic(), + []() + { + return IBackendInternalUniquePtr(new MockImportBackend); + }); +} + +MockImportBackendInitialiser::~MockImportBackendInitialiser() +{ + try + { + BackendRegistryInstance().Deregister(MockImportBackend::GetIdStatic()); + } + catch (...) + { + std::cerr << "could not deregister mock import backend" << std::endl; + } +} + +const BackendId& MockImportBackend::GetIdStatic() +{ + static const BackendId s_Id{ MockImportBackendId() }; + return s_Id; +} + +IBackendInternal::IWorkloadFactoryPtr MockImportBackend::CreateWorkloadFactory( + const IBackendInternal::IMemoryManagerSharedPtr& memoryManager) const +{ + return std::make_unique(PolymorphicPointerDowncast(memoryManager)); +} + +IBackendInternal::IWorkloadFactoryPtr MockImportBackend::CreateWorkloadFactory( + class TensorHandleFactoryRegistry& tensorHandleFactoryRegistry) const +{ + auto memoryManager = std::make_shared(); + + tensorHandleFactoryRegistry.RegisterMemoryManager(memoryManager); + tensorHandleFactoryRegistry.RegisterFactory(std::make_unique(memoryManager)); + + return std::make_unique(PolymorphicPointerDowncast(memoryManager)); +} + +IBackendInternal::IBackendContextPtr MockImportBackend::CreateBackendContext(const IRuntime::CreationOptions&) const +{ + return IBackendContextPtr{}; +} + +IBackendInternal::IBackendProfilingContextPtr MockImportBackend::CreateBackendProfilingContext( + const IRuntime::CreationOptions&, IBackendProfilingPtr&) +{ + return IBackendProfilingContextPtr{}; +} + +IBackendInternal::IMemoryManagerUniquePtr MockImportBackend::CreateMemoryManager() const +{ + return std::make_unique(); +} + +IBackendInternal::Optimizations MockImportBackend::GetOptimizations() const +{ + return Optimizations{}; +} + +IBackendInternal::ILayerSupportSharedPtr MockImportBackend::GetLayerSupport() const +{ + static ILayerSupportSharedPtr layerSupport{new MockImportLayerSupport}; + return layerSupport; +} + +OptimizationViews MockImportBackend::OptimizeSubgraphView(const SubgraphView& subgraph) const +{ + OptimizationViews optimizationViews; + + optimizationViews.AddUntouchedSubgraph(SubgraphView(subgraph)); + + return optimizationViews; +} + +std::vector MockImportBackend::GetHandleFactoryPreferences() const +{ + return std::vector { RefTensorHandleFactory::GetIdStatic() }; +} + +void MockImportBackend::RegisterTensorHandleFactories(class TensorHandleFactoryRegistry& registry) +{ + auto memoryManager = std::make_shared(); + + registry.RegisterMemoryManager(memoryManager); + registry.RegisterFactory(std::make_unique(memoryManager)); +} + +} // namespace armnn diff --git a/src/backends/backendsCommon/test/mockBackend/MockImportBackend.hpp b/src/backends/backendsCommon/test/mockBackend/MockImportBackend.hpp new file mode 100644 index 0000000000..ecc661f43b --- /dev/null +++ b/src/backends/backendsCommon/test/mockBackend/MockImportBackend.hpp @@ -0,0 +1,53 @@ +// +// Copyright © 2020 Arm Ltd and Contributors. All rights reserved. +// SPDX-License-Identifier: MIT +// +#pragma once + +#include + +namespace armnn +{ + +constexpr const char* MockImportBackendId() { return "MockRef"; } + +class MockImportBackendInitialiser +{ +public: + MockImportBackendInitialiser(); + ~MockImportBackendInitialiser(); +}; + +class MockImportBackend : public IBackendInternal +{ +public: + MockImportBackend() = default; + ~MockImportBackend() = default; + + static const BackendId& GetIdStatic(); + const BackendId& GetId() const override { return GetIdStatic(); } + + IBackendInternal::IMemoryManagerUniquePtr CreateMemoryManager() const override; + + IBackendInternal::IWorkloadFactoryPtr CreateWorkloadFactory( + const IBackendInternal::IMemoryManagerSharedPtr& memoryManager = nullptr) const override; + + IBackendInternal::IWorkloadFactoryPtr CreateWorkloadFactory( + class TensorHandleFactoryRegistry& tensorHandleFactoryRegistry) const override; + + IBackendInternal::IBackendContextPtr CreateBackendContext(const IRuntime::CreationOptions&) const override; + + IBackendInternal::IBackendProfilingContextPtr CreateBackendProfilingContext( + const IRuntime::CreationOptions& creationOptions, IBackendProfilingPtr& backendProfiling) override; + + IBackendInternal::Optimizations GetOptimizations() const override; + IBackendInternal::ILayerSupportSharedPtr GetLayerSupport() const override; + + OptimizationViews OptimizeSubgraphView(const SubgraphView& subgraph) const override; + + std::vector GetHandleFactoryPreferences() const override; + + void RegisterTensorHandleFactories(class TensorHandleFactoryRegistry& registry) override; +}; + +} // namespace armnn diff --git a/src/backends/backendsCommon/test/mockBackend/MockImportLayerSupport.hpp b/src/backends/backendsCommon/test/mockBackend/MockImportLayerSupport.hpp new file mode 100644 index 0000000000..75e637efdf --- /dev/null +++ b/src/backends/backendsCommon/test/mockBackend/MockImportLayerSupport.hpp @@ -0,0 +1,46 @@ +// +// Copyright © 2020 Arm Ltd and Contributors. All rights reserved. +// SPDX-License-Identifier: MIT +// +#pragma once + +#include + +#include + +namespace armnn +{ + +class MockImportLayerSupport : public LayerSupportBase +{ +public: + bool IsAdditionSupported(const TensorInfo& input0, + const TensorInfo& input1, + const TensorInfo& output, + Optional reasonIfUnsupported = EmptyOptional()) const override + { + IgnoreUnused(input0); + IgnoreUnused(input1); + IgnoreUnused(output); + IgnoreUnused(reasonIfUnsupported); + return true; + } + + bool IsInputSupported(const TensorInfo& input, + Optional reasonIfUnsupported) const override + { + IgnoreUnused(input); + IgnoreUnused(reasonIfUnsupported); + return true; + } + + bool IsOutputSupported(const TensorInfo& output, + Optional reasonIfUnsupported) const override + { + IgnoreUnused(output); + IgnoreUnused(reasonIfUnsupported); + return true; + } +}; + +} // namespace armnn diff --git a/src/backends/neon/NeonTensorHandleFactory.cpp b/src/backends/neon/NeonTensorHandleFactory.cpp index 53d5a04b32..ae6ab59efd 100644 --- a/src/backends/neon/NeonTensorHandleFactory.cpp +++ b/src/backends/neon/NeonTensorHandleFactory.cpp @@ -103,12 +103,12 @@ bool NeonTensorHandleFactory::SupportsSubTensors() const MemorySourceFlags NeonTensorHandleFactory::GetExportFlags() const { - return 0; + return m_ExportFlags; } MemorySourceFlags NeonTensorHandleFactory::GetImportFlags() const { - return 0; + return m_ImportFlags; } std::vector NeonTensorHandleFactory::GetCapabilities(const IConnectableLayer* layer, diff --git a/src/backends/neon/NeonTensorHandleFactory.hpp b/src/backends/neon/NeonTensorHandleFactory.hpp index ae45aadb7c..2ca67c9d6e 100644 --- a/src/backends/neon/NeonTensorHandleFactory.hpp +++ b/src/backends/neon/NeonTensorHandleFactory.hpp @@ -41,7 +41,9 @@ class NeonTensorHandleFactory : public ITensorHandleFactory { public: NeonTensorHandleFactory(std::weak_ptr mgr) - : m_MemoryManager(mgr) + : m_MemoryManager(mgr), + m_ImportFlags(static_cast(MemorySource::Malloc)), + m_ExportFlags(static_cast(MemorySource::Malloc)) {} std::unique_ptr CreateSubTensorHandle(ITensorHandle& parent, @@ -76,6 +78,8 @@ public: private: mutable std::shared_ptr m_MemoryManager; + MemorySourceFlags m_ImportFlags; + MemorySourceFlags m_ExportFlags; }; } // namespace armnn diff --git a/src/backends/neon/test/CMakeLists.txt b/src/backends/neon/test/CMakeLists.txt index 16c066bcbd..dd13b6348c 100644 --- a/src/backends/neon/test/CMakeLists.txt +++ b/src/backends/neon/test/CMakeLists.txt @@ -6,6 +6,7 @@ list(APPEND armnnNeonBackendUnitTests_sources NeonCreateWorkloadTests.cpp NeonEndToEndTests.cpp + NeonFallbackTests.cpp NeonJsonPrinterTests.cpp NeonLayerSupportTests.cpp NeonLayerTests.cpp diff --git a/src/backends/neon/test/NeonEndToEndTests.cpp b/src/backends/neon/test/NeonEndToEndTests.cpp index ffbae51df6..dc0a609ff7 100644 --- a/src/backends/neon/test/NeonEndToEndTests.cpp +++ b/src/backends/neon/test/NeonEndToEndTests.cpp @@ -410,27 +410,27 @@ BOOST_AUTO_TEST_CASE(NeonExportNonAlignedOutputPointerTest) ExportNonAlignedOutputPointerTest(defaultBackends); } -BOOST_AUTO_TEST_CASE(NeonImportAlignedPointerTest, * boost::unit_test::disabled()) +BOOST_AUTO_TEST_CASE(NeonImportAlignedPointerTest) { ImportAlignedPointerTest(defaultBackends); } -BOOST_AUTO_TEST_CASE(NeonImportOnlyWorkload, * boost::unit_test::disabled()) +BOOST_AUTO_TEST_CASE(NeonImportOnlyWorkload) { ImportOnlyWorkload(defaultBackends); } -BOOST_AUTO_TEST_CASE(NeonExportOnlyWorkload, * boost::unit_test::disabled()) +BOOST_AUTO_TEST_CASE(NeonExportOnlyWorkload) { ExportOnlyWorkload(defaultBackends); } -BOOST_AUTO_TEST_CASE(NeonImportAndExportWorkload, * boost::unit_test::disabled()) +BOOST_AUTO_TEST_CASE(NeonImportAndExportWorkload) { ImportAndExportWorkload(defaultBackends); } -BOOST_AUTO_TEST_CASE(NeonExportOutputWithSeveralOutputSlotConnectionsTest, * boost::unit_test::disabled()) +BOOST_AUTO_TEST_CASE(NeonExportOutputWithSeveralOutputSlotConnectionsTest) { ExportOutputWithSeveralOutputSlotConnectionsTest(defaultBackends); } diff --git a/src/backends/neon/test/NeonFallbackTests.cpp b/src/backends/neon/test/NeonFallbackTests.cpp new file mode 100644 index 0000000000..cf4d91b119 --- /dev/null +++ b/src/backends/neon/test/NeonFallbackTests.cpp @@ -0,0 +1,547 @@ +// +// Copyright © 2020 Arm Ltd and Contributors. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include +#include + +#include + +#include + +BOOST_AUTO_TEST_SUITE(NeonFallback) + +std::vector defaultBackends = { armnn::Compute::CpuAcc }; + +BOOST_AUTO_TEST_CASE(FallbackImportToCpuAcc) +{ + using namespace armnn; + + // Create a mock backend object + MockImportBackendInitialiser initialiser; // Register the Mock Backend + auto backendObjPtr = CreateBackendObject(MockImportBackendId()); + BOOST_TEST((backendObjPtr != nullptr)); + + BackendIdSet backendIds = BackendRegistryInstance().GetBackendIds(); + if (backendIds.find("MockRef") == backendIds.end()) + { + std::string message = "Cannot load MockRef"; + BOOST_FAIL(message); + } + + // Create runtime in which test will run and allow fallback to CpuRef. + IRuntime::CreationOptions options; + IRuntimePtr runtime(IRuntime::Create(options)); + + // Builds up the structure of the network. + INetworkPtr net(INetwork::Create()); + + IConnectableLayer* input0 = net->AddInputLayer(0, "input0"); + IConnectableLayer* input1 = net->AddInputLayer(1, "input1"); + IConnectableLayer* input2 = net->AddInputLayer(2, "input2"); + IConnectableLayer* add = net->AddAdditionLayer("add"); + IConnectableLayer* sub = net->AddSubtractionLayer("sub"); + IConnectableLayer* output = net->AddOutputLayer(0, "output"); + + input0->GetOutputSlot(0).Connect(add->GetInputSlot(0)); + input1->GetOutputSlot(0).Connect(add->GetInputSlot(1)); + input2->GetOutputSlot(0).Connect(sub->GetInputSlot(0)); + add->GetOutputSlot(0).Connect(sub->GetInputSlot(1)); + sub->GetOutputSlot(0).Connect(output->GetInputSlot(0)); + + TensorInfo info = TensorInfo({ 1, 2, 3, 2 }, DataType::Float32); + + input0->GetOutputSlot(0).SetTensorInfo(info); + input1->GetOutputSlot(0).SetTensorInfo(info); + input2->GetOutputSlot(0).SetTensorInfo(info); + add->GetOutputSlot(0).SetTensorInfo(info); + sub->GetOutputSlot(0).SetTensorInfo(info); + + // optimize the network + std::vector backends = { "MockRef", Compute::CpuAcc }; + IOptimizedNetworkPtr optNet = Optimize(*net, backends, runtime->GetDeviceSpec()); + + OptimizedNetwork* optNetObjPtr = PolymorphicDowncast(optNet.get()); + Graph& graph = optNetObjPtr->GetGraph(); + + armnn::Layer* const layer0 = GetFirstLayerWithName(graph, "input0"); + armnn::Layer* const layer1 = GetFirstLayerWithName(graph, "input1"); + armnn::Layer* const layer2 = GetFirstLayerWithName(graph, "input2"); + armnn::Layer* const layer3 = GetFirstLayerWithName(graph, "add"); + armnn::Layer* const layer4 = GetFirstLayerWithName(graph, "[ add (0) -> sub (1) ]"); + armnn::Layer* const layer5 = GetFirstLayerWithName(graph, "sub"); + armnn::Layer* const layer6 = GetFirstLayerWithName(graph, "output"); + + // Checks order is valid. + BOOST_TEST(CheckOrder(graph, layer0, layer1)); + BOOST_TEST(CheckOrder(graph, layer1, layer2)); + BOOST_TEST(CheckOrder(graph, layer2, layer3)); + BOOST_TEST(CheckOrder(graph, layer3, layer4)); + BOOST_TEST(CheckOrder(graph, layer4, layer5)); + BOOST_TEST(CheckOrder(graph, layer5, layer6)); + + // Load it into the runtime. It should pass. + NetworkId netId; + std::string ignoredErrorMessage; + INetworkProperties networkProperties(true, true); + + runtime->LoadNetwork(netId, std::move(optNet), ignoredErrorMessage, networkProperties); + + // Creates structures for input & output + std::vector inputData0 + { + 1.0f, 1.0f, 2.0f, 2.0f, 2.0f, 3.0f, 4.0f, 4.0f, 5.0f, 5.0f, 6.0f, 6.0f + }; + std::vector inputData1 + { + 0.0f, 1.0f, 1.0f, 2.0f, 3.0f, 3.0f, 3.0f, 4.0f, 4.0f, 5.0f, 5.0f, 6.0f + }; + std::vector inputData2 + { + 12.0f, 11.0f, 10.0f, 9.0f, 8.0f, 7.0f, 6.0f, 5.0f, 4.0f, 3.0f, 2.0f, 1.0f + }; + + std::vector outputData(12); + + std::vector expectedOutput + { + 11.0f, 9.0f, 7.0f, 5.0f, 3.0f, 1.0f, -1.0f, -3.0f, -5.0f, -7.0f, -9.0f, -11.0f + }; + + InputTensors inputTensors + { + { 0, armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 0), inputData0.data()) }, + { 1, armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 1), inputData1.data()) }, + { 2, armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 2), inputData2.data()) } + }; + OutputTensors outputTensors + { + { 0,armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), outputData.data()) } + }; + + runtime->GetProfiler(netId)->EnableProfiling(true); + + // Do the inference + runtime->EnqueueWorkload(netId, inputTensors, outputTensors); + + // Retrieve the Profiler.Print() output to get the workload execution + ProfilerManager& profilerManager = armnn::ProfilerManager::GetInstance(); + std::stringstream ss; + profilerManager.GetProfiler()->Print(ss);; + std::string dump = ss.str(); + + // Contains ImportMemGeneric + std::size_t found = dump.find("ImportMemGeneric"); + BOOST_TEST(found != std::string::npos); + + // Contains SyncMemGeneric + found = dump.find("SyncMemGeneric"); + BOOST_TEST(found != std::string::npos); + + // Does not contain CopyMemGeneric + found = dump.find("CopyMemGeneric"); + BOOST_TEST(found == std::string::npos); + + // Use memory import between backends + BOOST_TEST((layer4->GetType() == LayerType::MemImport)); + + // Check output is as expected + BOOST_TEST(outputData == expectedOutput); +} + +BOOST_AUTO_TEST_CASE(FallbackPaddingCopyToCpuAcc) +{ + using namespace armnn; + + // Create a mock backend object + MockImportBackendInitialiser initialiser; // Register the Mock Backend + auto backendObjPtr = CreateBackendObject(MockImportBackendId()); + BOOST_TEST((backendObjPtr != nullptr)); + + BackendIdSet backendIds = BackendRegistryInstance().GetBackendIds(); + if (backendIds.find("MockRef") == backendIds.end()) + { + std::string message = "Cannot load MockRef"; + BOOST_FAIL(message); + } + + // Create runtime in which test will run and allow fallback to CpuRef. + IRuntime::CreationOptions options; + IRuntimePtr runtime(IRuntime::Create(options)); + + // Builds up the structure of the network. + INetworkPtr net(INetwork::Create()); + + Pooling2dDescriptor desc; + + IConnectableLayer* input0 = net->AddInputLayer(0, "input0"); + IConnectableLayer* input1 = net->AddInputLayer(1, "input1"); + IConnectableLayer* add = net->AddAdditionLayer("add"); + IConnectableLayer* pooling = net->AddPooling2dLayer(desc, "pooling"); + IConnectableLayer* output = net->AddOutputLayer(0, "output"); + + input0->GetOutputSlot(0).Connect(add->GetInputSlot(0)); + input1->GetOutputSlot(0).Connect(add->GetInputSlot(1)); + add->GetOutputSlot(0).Connect(pooling->GetInputSlot(0)); + pooling->GetOutputSlot(0).Connect(output->GetInputSlot(0)); + + TensorInfo info = TensorInfo({ 1, 2, 3, 2 }, DataType::Float32); + TensorInfo poolingInfo = TensorInfo({ 1, 2, 1, 1 }, DataType::Float32); + + input0->GetOutputSlot(0).SetTensorInfo(info); + input1->GetOutputSlot(0).SetTensorInfo(info); + add->GetOutputSlot(0).SetTensorInfo(info); + pooling->GetOutputSlot(0).SetTensorInfo(poolingInfo); + + // optimize the network + std::vector backends = { "MockRef", Compute::CpuAcc }; + IOptimizedNetworkPtr optNet = Optimize(*net, backends, runtime->GetDeviceSpec()); + + OptimizedNetwork* optNetObjPtr = PolymorphicDowncast(optNet.get()); + Graph& graph = optNetObjPtr->GetGraph(); + + armnn::Layer* const layer0 = GetFirstLayerWithName(graph, "input0"); + armnn::Layer* const layer1 = GetFirstLayerWithName(graph, "input1"); + armnn::Layer* const layer2 = GetFirstLayerWithName(graph, "add"); + armnn::Layer* const layer3 = GetFirstLayerWithName(graph, "[ add (0) -> pooling (0) ]"); + armnn::Layer* const layer4 = GetFirstLayerWithName(graph, "pooling"); + armnn::Layer* const layer5 = GetFirstLayerWithName(graph, "output"); + + // Checks order is valid. + BOOST_TEST(CheckOrder(graph, layer0, layer1)); + BOOST_TEST(CheckOrder(graph, layer1, layer2)); + BOOST_TEST(CheckOrder(graph, layer2, layer3)); + BOOST_TEST(CheckOrder(graph, layer3, layer4)); + BOOST_TEST(CheckOrder(graph, layer4, layer5)); + + // Load it into the runtime. It should pass. + NetworkId netId; + std::string ignoredErrorMessage; + INetworkProperties networkProperties(true, true); + + runtime->LoadNetwork(netId, std::move(optNet), ignoredErrorMessage, networkProperties); + + // Creates structures for input & output + std::vector inputData0 + { + 1.0f, 1.0f, 2.0f, 2.0f, 2.0f, 3.0f, 4.0f, 4.0f, 5.0f, 5.0f, 6.0f, 6.0f + }; + std::vector inputData1 + { + 0.0f, 1.0f, 1.0f, 2.0f, 3.0f, 3.0f, 3.0f, 4.0f, 4.0f, 5.0f, 5.0f, 6.0f + }; + + std::vector outputData(2); + + std::vector expectedOutput + { + 6.0f, 12.0f + }; + + InputTensors inputTensors + { + { 0, armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 0), inputData0.data()) }, + { 1, armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 1), inputData1.data()) } + }; + OutputTensors outputTensors + { + { 0, armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), outputData.data()) } + }; + + runtime->GetProfiler(netId)->EnableProfiling(true); + + // Do the inference + runtime->EnqueueWorkload(netId, inputTensors, outputTensors); + + // Retrieve the Profiler.Print() output to get the workload execution + ProfilerManager& profilerManager = armnn::ProfilerManager::GetInstance(); + std::stringstream ss; + profilerManager.GetProfiler()->Print(ss);; + std::string dump = ss.str(); + + // Contains CopyMemGeneric between the backends + std::size_t found = dump.find("CopyMemGeneric"); + BOOST_TEST(found != std::string::npos); + + // Contains SyncMemGeneric for the output + found = dump.find("SyncMemGeneric"); + BOOST_TEST(found != std::string::npos); + + // Does not contain ImportMemGeneric + found = dump.find("ImportMemGeneric"); + BOOST_TEST(found == std::string::npos); + + // Use memory import between backends + BOOST_TEST((layer3->GetType() == LayerType::MemCopy)); + + // Check output is as expected + BOOST_TEST(outputData == expectedOutput); +} + +BOOST_AUTO_TEST_CASE(FallbackImportFromCpuAcc) +{ + using namespace armnn; + + // Create a mock backend object + MockImportBackendInitialiser initialiser; // Register the Mock Backend + auto backendObjPtr = CreateBackendObject(MockImportBackendId()); + BOOST_TEST((backendObjPtr != nullptr)); + + BackendIdSet backendIds = BackendRegistryInstance().GetBackendIds(); + if (backendIds.find("MockRef") == backendIds.end()) + { + std::string message = "Cannot load MockRef"; + BOOST_FAIL(message); + } + + // Create runtime in which test will run and allow fallback to CpuRef. + IRuntime::CreationOptions options; + IRuntimePtr runtime(IRuntime::Create(options)); + + // Builds up the structure of the network. + INetworkPtr net(INetwork::Create()); + + IConnectableLayer* input0 = net->AddInputLayer(0, "input0"); + IConnectableLayer* input1 = net->AddInputLayer(1, "input1"); + IConnectableLayer* input2 = net->AddInputLayer(2, "input2"); + IConnectableLayer* sub = net->AddSubtractionLayer("sub"); + IConnectableLayer* add = net->AddAdditionLayer("add"); + IConnectableLayer* output = net->AddOutputLayer(0, "output"); + + input0->GetOutputSlot(0).Connect(sub->GetInputSlot(0)); + input1->GetOutputSlot(0).Connect(sub->GetInputSlot(1)); + input2->GetOutputSlot(0).Connect(add->GetInputSlot(0)); + sub->GetOutputSlot(0).Connect(add->GetInputSlot(1)); + add->GetOutputSlot(0).Connect(output->GetInputSlot(0)); + + TensorInfo info = TensorInfo({ 1, 2, 3, 2 }, DataType::Float32); + + input0->GetOutputSlot(0).SetTensorInfo(info); + input1->GetOutputSlot(0).SetTensorInfo(info); + input2->GetOutputSlot(0).SetTensorInfo(info); + sub->GetOutputSlot(0).SetTensorInfo(info); + add->GetOutputSlot(0).SetTensorInfo(info); + + // optimize the network + std::vector backends = { "MockRef", Compute::CpuAcc }; + IOptimizedNetworkPtr optNet = Optimize(*net, backends, runtime->GetDeviceSpec()); + + OptimizedNetwork* optNetObjPtr = PolymorphicDowncast(optNet.get()); + Graph& graph = optNetObjPtr->GetGraph(); + + armnn::Layer* const layer0 = GetFirstLayerWithName(graph, "input0"); + armnn::Layer* const layer1 = GetFirstLayerWithName(graph, "input1"); + armnn::Layer* const layer2 = GetFirstLayerWithName(graph, "input2"); + armnn::Layer* const layer3 = GetFirstLayerWithName(graph, "sub"); + armnn::Layer* const layer4 = GetFirstLayerWithName(graph, "[ sub (0) -> add (1) ]"); + armnn::Layer* const layer5 = GetFirstLayerWithName(graph, "add"); + armnn::Layer* const layer6 = GetFirstLayerWithName(graph, "output"); + + // Checks order is valid. + BOOST_TEST(CheckOrder(graph, layer0, layer1)); + BOOST_TEST(CheckOrder(graph, layer1, layer2)); + BOOST_TEST(CheckOrder(graph, layer2, layer3)); + BOOST_TEST(CheckOrder(graph, layer3, layer4)); + BOOST_TEST(CheckOrder(graph, layer4, layer5)); + BOOST_TEST(CheckOrder(graph, layer5, layer6)); + + // Load it into the runtime. It should pass. + NetworkId netId; + std::string ignoredErrorMessage; + INetworkProperties networkProperties(true, true); + + runtime->LoadNetwork(netId, std::move(optNet), ignoredErrorMessage, networkProperties); + + // Creates structures for input & output + std::vector inputData0 + { + 1.0f, 1.0f, 2.0f, 2.0f, 2.0f, 3.0f, 4.0f, 4.0f, 5.0f, 5.0f, 6.0f, 0.0f + }; + std::vector inputData1 + { + 0.0f, 1.0f, 1.0f, 2.0f, 3.0f, 3.0f, 3.0f, 4.0f, 4.0f, 5.0f, 5.0f, 6.0f + }; + std::vector inputData2 + { + 12.0f, 11.0f, 10.0f, 9.0f, 8.0f, 7.0f, 6.0f, 5.0f, 4.0f, 3.0f, 2.0f, 1.0f + }; + + std::vector outputData(12); + + std::vector expectedOutput + { + 13.0f, 11.0f, 11.0f, 9.0f, 7.0f, 7.0f, 7.0f, 5.0f, 5.0f, 3.0f, 3.0f, -5.0f + }; + + InputTensors inputTensors + { + { 0, armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 0), inputData0.data()) }, + { 1, armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 1), inputData1.data()) }, + { 2, armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 2), inputData2.data()) } + }; + OutputTensors outputTensors + { + { 0,armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), outputData.data()) } + }; + + runtime->GetProfiler(netId)->EnableProfiling(true); + + // Do the inference + runtime->EnqueueWorkload(netId, inputTensors, outputTensors); + + // Retrieve the Profiler.Print() output to get the workload execution + ProfilerManager& profilerManager = armnn::ProfilerManager::GetInstance(); + std::stringstream ss; + profilerManager.GetProfiler()->Print(ss);; + std::string dump = ss.str(); + + // Contains ImportMemGeneric + std::size_t found = dump.find("ImportMemGeneric"); + BOOST_TEST(found != std::string::npos); + + // Contains SyncMemGeneric + found = dump.find("SyncMemGeneric"); + BOOST_TEST(found != std::string::npos); + + // Does not contain CopyMemGeneric + found = dump.find("CopyMemGeneric"); + BOOST_TEST(found == std::string::npos); + + // Use memory import between backends + BOOST_TEST((layer4->GetType() == LayerType::MemImport)); + + // Check output is as expected + BOOST_TEST(outputData == expectedOutput); +} + +BOOST_AUTO_TEST_CASE(FallbackPaddingCopyFromCpuAcc) +{ + using namespace armnn; + + // Create a mock backend object + MockImportBackendInitialiser initialiser; // Register the Mock Backend + auto backendObjPtr = CreateBackendObject(MockImportBackendId()); + BOOST_TEST((backendObjPtr != nullptr)); + + BackendIdSet backendIds = BackendRegistryInstance().GetBackendIds(); + if (backendIds.find("MockRef") == backendIds.end()) + { + std::string message = "Cannot load MockRef"; + BOOST_FAIL(message); + } + + // Create runtime in which test will run and allow fallback to CpuRef. + IRuntime::CreationOptions options; + IRuntimePtr runtime(IRuntime::Create(options)); + + // Builds up the structure of the network. + INetworkPtr net(INetwork::Create()); + + Pooling2dDescriptor desc; + + IConnectableLayer* input0 = net->AddInputLayer(0, "input0"); + IConnectableLayer* input1 = net->AddInputLayer(1, "input1"); + IConnectableLayer* pooling = net->AddPooling2dLayer(desc, "pooling"); + IConnectableLayer* add = net->AddAdditionLayer("add"); + IConnectableLayer* output = net->AddOutputLayer(0, "output"); + + input0->GetOutputSlot(0).Connect(pooling->GetInputSlot(0)); + input1->GetOutputSlot(0).Connect(add->GetInputSlot(1)); + pooling->GetOutputSlot(0).Connect(add->GetInputSlot(0)); + add->GetOutputSlot(0).Connect(output->GetInputSlot(0)); + + TensorInfo inputInfo = TensorInfo({ 1, 2, 3, 2 }, DataType::Float32); + TensorInfo poolingInfo = TensorInfo({ 1, 2, 1, 1 }, DataType::Float32); + + input0->GetOutputSlot(0).SetTensorInfo(inputInfo); + input1->GetOutputSlot(0).SetTensorInfo(poolingInfo); + pooling->GetOutputSlot(0).SetTensorInfo(poolingInfo); + add->GetOutputSlot(0).SetTensorInfo(poolingInfo); + + // optimize the network + std::vector backends = { "MockRef", Compute::CpuAcc }; + IOptimizedNetworkPtr optNet = Optimize(*net, backends, runtime->GetDeviceSpec()); + + OptimizedNetwork* optNetObjPtr = PolymorphicDowncast(optNet.get()); + Graph& graph = optNetObjPtr->GetGraph(); + + armnn::Layer* const layer0 = GetFirstLayerWithName(graph, "input0"); + armnn::Layer* const layer1 = GetFirstLayerWithName(graph, "input1"); + armnn::Layer* const layer2 = GetFirstLayerWithName(graph, "pooling"); + armnn::Layer* const layer3 = GetFirstLayerWithName(graph, "[ pooling (0) -> add (0) ]"); + armnn::Layer* const layer4 = GetFirstLayerWithName(graph, "add"); + armnn::Layer* const layer5 = GetFirstLayerWithName(graph, "output"); + + // Checks order is valid. + BOOST_TEST(CheckOrder(graph, layer0, layer1)); + BOOST_TEST(CheckOrder(graph, layer1, layer2)); + BOOST_TEST(CheckOrder(graph, layer2, layer3)); + BOOST_TEST(CheckOrder(graph, layer3, layer4)); + BOOST_TEST(CheckOrder(graph, layer4, layer5)); + + // Load it into the runtime. It should pass. + NetworkId netId; + std::string ignoredErrorMessage; + INetworkProperties networkProperties(true, true); + + runtime->LoadNetwork(netId, std::move(optNet), ignoredErrorMessage, networkProperties); + + // Creates structures for input & output + std::vector inputData0 + { + 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, 9.0f, 10.0f, 11.0f, 12.0f + }; + std::vector inputData1 + { + -1.0f, 3.0f + }; + + std::vector outputData(2); + + std::vector expectedOutput + { + 5.0f, 15.0f + }; + + InputTensors inputTensors + { + { 0, armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 0), inputData0.data()) }, + { 1, armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 1), inputData1.data()) } + }; + OutputTensors outputTensors + { + { 0, armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), outputData.data()) } + }; + + runtime->GetProfiler(netId)->EnableProfiling(true); + + // Do the inference + runtime->EnqueueWorkload(netId, inputTensors, outputTensors); + + // Retrieve the Profiler.Print() output to get the workload execution + ProfilerManager& profilerManager = armnn::ProfilerManager::GetInstance(); + std::stringstream ss; + profilerManager.GetProfiler()->Print(ss);; + std::string dump = ss.str(); + + // Contains CopyMemGeneric between the backends + std::size_t found = dump.find("CopyMemGeneric"); + BOOST_TEST(found != std::string::npos); + + // Contains SyncMemGeneric for the output + found = dump.find("SyncMemGeneric"); + BOOST_TEST(found != std::string::npos); + + // Does not contain ImportMemGeneric + found = dump.find("ImportMemGeneric"); + BOOST_TEST(found == std::string::npos); + + // Use memory import between backends + BOOST_TEST((layer3->GetType() == LayerType::MemCopy)); + + // Check output is as expected + BOOST_TEST(outputData == expectedOutput); +} + +BOOST_AUTO_TEST_SUITE_END() diff --git a/src/backends/neon/test/NeonTensorHandleTests.cpp b/src/backends/neon/test/NeonTensorHandleTests.cpp index 8b3e3fdc99..c6a562f0e5 100644 --- a/src/backends/neon/test/NeonTensorHandleTests.cpp +++ b/src/backends/neon/test/NeonTensorHandleTests.cpp @@ -12,6 +12,7 @@ #include #include +#include #include @@ -160,4 +161,77 @@ BOOST_AUTO_TEST_CASE(ConcatOnXorYSubTensorsNoPaddinRequiredTest) } } +BOOST_AUTO_TEST_CASE(NeonTensorHandleFactoryMemoryManaged) +{ + std::shared_ptr memoryManager = std::make_shared( + std::make_unique(), + BaseMemoryManager::MemoryAffinity::Offset); + NeonTensorHandleFactory handleFactory(memoryManager); + TensorInfo info({ 1, 1, 2, 1 }, DataType::Float32); + + // create TensorHandle with memory managed + auto handle = handleFactory.CreateTensorHandle(info, true); + handle->Manage(); + handle->Allocate(); + + memoryManager->Acquire(); + { + float* buffer = reinterpret_cast(handle->Map()); + BOOST_CHECK(buffer != nullptr); // Yields a valid pointer + buffer[0] = 1.5f; + buffer[1] = 2.5f; + BOOST_CHECK(buffer[0] == 1.5f); // Memory is writable and readable + BOOST_CHECK(buffer[1] == 2.5f); // Memory is writable and readable + } + memoryManager->Release(); + + memoryManager->Acquire(); + { + float* buffer = reinterpret_cast(handle->Map()); + BOOST_CHECK(buffer != nullptr); // Yields a valid pointer + buffer[0] = 3.5f; + buffer[1] = 4.5f; + BOOST_CHECK(buffer[0] == 3.5f); // Memory is writable and readable + BOOST_CHECK(buffer[1] == 4.5f); // Memory is writable and readable + } + memoryManager->Release(); + + float testPtr[2] = { 2.5f, 5.5f }; + // Cannot import as import is disabled + BOOST_CHECK(!handle->Import(static_cast(testPtr), MemorySource::Malloc)); +} + +BOOST_AUTO_TEST_CASE(NeonTensorHandleFactoryImport) +{ + std::shared_ptr memoryManager = std::make_shared( + std::make_unique(), + BaseMemoryManager::MemoryAffinity::Offset); + NeonTensorHandleFactory handleFactory(memoryManager); + TensorInfo info({ 1, 1, 2, 1 }, DataType::Float32); + + // create TensorHandle without memory managed + auto handle = handleFactory.CreateTensorHandle(info, false); + handle->Manage(); + handle->Allocate(); + memoryManager->Acquire(); + + // No buffer allocated when import is enabled + BOOST_CHECK((PolymorphicDowncast(handle.get()))->GetTensor().buffer() == nullptr); + + float testPtr[2] = { 2.5f, 5.5f }; + // Correctly import + BOOST_CHECK(handle->Import(static_cast(testPtr), MemorySource::Malloc)); + float* buffer = reinterpret_cast(handle->Map()); + BOOST_CHECK(buffer != nullptr); // Yields a valid pointer after import + BOOST_CHECK(buffer == testPtr); // buffer is pointing to testPtr + // Memory is writable and readable with correct value + BOOST_CHECK(buffer[0] == 2.5f); + BOOST_CHECK(buffer[1] == 5.5f); + buffer[0] = 3.5f; + buffer[1] = 10.0f; + BOOST_CHECK(buffer[0] == 3.5f); + BOOST_CHECK(buffer[1] == 10.0f); + memoryManager->Release(); +} + BOOST_AUTO_TEST_SUITE_END() -- cgit v1.2.1