From e5f0b2409c2e557a5a78e2f4659d203154289b23 Mon Sep 17 00:00:00 2001 From: Narumol Prangnawarat Date: Fri, 7 May 2021 17:52:36 +0100 Subject: IVGCVSW-5818 Enable import on GPU Signed-off-by: Narumol Prangnawarat Change-Id: I4e4eb107aa2bfa09625840d738001f33152e6792 --- src/backends/backendsCommon/IBackendInternal.cpp | 18 ++++ src/backends/cl/ClBackend.cpp | 41 +++++++- src/backends/cl/ClBackend.hpp | 13 ++- src/backends/cl/ClImportTensorHandleFactory.cpp | 20 ++++ src/backends/cl/ClImportTensorHandleFactory.hpp | 6 ++ src/backends/cl/backend.mk | 1 + src/backends/cl/test/ClFallbackTests.cpp | 91 ++++++++++++----- src/backends/cl/test/ClImportTensorHandleTests.cpp | 112 ++++++++++++++++++++- src/backends/neon/test/NeonFallbackTests.cpp | 73 ++++++++++---- 9 files changed, 324 insertions(+), 51 deletions(-) (limited to 'src/backends') diff --git a/src/backends/backendsCommon/IBackendInternal.cpp b/src/backends/backendsCommon/IBackendInternal.cpp index b08dff84ed..31706eb1e7 100644 --- a/src/backends/backendsCommon/IBackendInternal.cpp +++ b/src/backends/backendsCommon/IBackendInternal.cpp @@ -76,6 +76,17 @@ IBackendInternal::IWorkloadFactoryPtr IBackendInternal::CreateWorkloadFactory( return CreateWorkloadFactory(tensorHandleFactoryRegistry); } +IBackendInternal::IWorkloadFactoryPtr IBackendInternal::CreateWorkloadFactory( + class TensorHandleFactoryRegistry& tensorHandleFactoryRegistry, + const ModelOptions& modelOptions, + MemorySourceFlags inputFlags, + MemorySourceFlags outputFlags) const +{ + IgnoreUnused(inputFlags); + IgnoreUnused(outputFlags); + return CreateWorkloadFactory(tensorHandleFactoryRegistry, modelOptions); +} + IBackendInternal::IBackendContextPtr IBackendInternal::CreateBackendContext(const IRuntime::CreationOptions&) const { return IBackendContextPtr{}; @@ -147,6 +158,13 @@ bool IBackendInternal::SupportsTensorAllocatorAPI() const return !GetHandleFactoryPreferences().empty(); } +void IBackendInternal::RegisterTensorHandleFactories(class TensorHandleFactoryRegistry& registry, + MemorySourceFlags /*inputFlags*/, + MemorySourceFlags /*outputFlags*/) +{ + return RegisterTensorHandleFactories(registry); +} + ITensorHandleFactory::FactoryId IBackendInternal::GetBackwardCompatibleFavoriteHandleFactory() { auto favorites = GetHandleFactoryPreferences(); diff --git a/src/backends/cl/ClBackend.cpp b/src/backends/cl/ClBackend.cpp index f97cb4bba8..35770d9219 100644 --- a/src/backends/cl/ClBackend.cpp +++ b/src/backends/cl/ClBackend.cpp @@ -4,12 +4,13 @@ // #include "ClBackend.hpp" +#include "ClBackendContext.hpp" #include "ClBackendId.hpp" #include "ClBackendModelContext.hpp" -#include "ClWorkloadFactory.hpp" -#include "ClBackendContext.hpp" +#include "ClImportTensorHandleFactory.hpp" #include "ClLayerSupport.hpp" #include "ClTensorHandleFactory.hpp" +#include "ClWorkloadFactory.hpp" #include #include @@ -71,6 +72,8 @@ IBackendInternal::IWorkloadFactoryPtr ClBackend::CreateWorkloadFactory( registry.RegisterMemoryManager(memoryManager); registry.RegisterFactory(std::make_unique(memoryManager)); + registry.RegisterFactory(std::make_unique( + static_cast(MemorySource::Malloc), static_cast(MemorySource::Malloc))); return std::make_unique( PolymorphicPointerDowncast(memoryManager)); @@ -83,6 +86,24 @@ IBackendInternal::IWorkloadFactoryPtr ClBackend::CreateWorkloadFactory( registry.RegisterMemoryManager(memoryManager); registry.RegisterFactory(std::make_unique(memoryManager)); + registry.RegisterFactory(std::make_unique( + static_cast(MemorySource::Malloc), static_cast(MemorySource::Malloc))); + + return std::make_unique( + PolymorphicPointerDowncast(memoryManager), CreateBackendSpecificModelContext(modelOptions)); +} + +IBackendInternal::IWorkloadFactoryPtr ClBackend::CreateWorkloadFactory( + TensorHandleFactoryRegistry& registry, + const ModelOptions& modelOptions, + MemorySourceFlags inputFlags, + MemorySourceFlags outputFlags) const +{ + auto memoryManager = std::make_shared(std::make_unique()); + + registry.RegisterMemoryManager(memoryManager); + registry.RegisterFactory(std::make_unique(memoryManager)); + registry.RegisterFactory(std::make_unique(inputFlags, outputFlags)); return std::make_unique( PolymorphicPointerDowncast(memoryManager), CreateBackendSpecificModelContext(modelOptions)); @@ -90,7 +111,8 @@ IBackendInternal::IWorkloadFactoryPtr ClBackend::CreateWorkloadFactory( std::vector ClBackend::GetHandleFactoryPreferences() const { - return std::vector {ClTensorHandleFactory::GetIdStatic()}; + return std::vector {ClTensorHandleFactory::GetIdStatic(), + ClImportTensorHandleFactory::GetIdStatic()}; } void ClBackend::RegisterTensorHandleFactories(TensorHandleFactoryRegistry& registry) @@ -99,6 +121,19 @@ void ClBackend::RegisterTensorHandleFactories(TensorHandleFactoryRegistry& regis registry.RegisterMemoryManager(mgr); registry.RegisterFactory(std::make_unique(mgr)); + registry.RegisterFactory(std::make_unique( + static_cast(MemorySource::Malloc), static_cast(MemorySource::Malloc))); +} + +void ClBackend::RegisterTensorHandleFactories(TensorHandleFactoryRegistry& registry, + MemorySourceFlags inputFlags, + MemorySourceFlags outputFlags) +{ + auto mgr = std::make_shared(std::make_unique()); + + registry.RegisterMemoryManager(mgr); + registry.RegisterFactory(std::make_unique(mgr)); + registry.RegisterFactory(std::make_unique(inputFlags, outputFlags)); } IBackendInternal::IBackendContextPtr ClBackend::CreateBackendContext(const IRuntime::CreationOptions& options) const diff --git a/src/backends/cl/ClBackend.hpp b/src/backends/cl/ClBackend.hpp index f9a5745eb3..252d87edea 100644 --- a/src/backends/cl/ClBackend.hpp +++ b/src/backends/cl/ClBackend.hpp @@ -30,16 +30,25 @@ public: IBackendInternal::IWorkloadFactoryPtr CreateWorkloadFactory( TensorHandleFactoryRegistry& registry) const override; - IWorkloadFactoryPtr CreateWorkloadFactory( const IMemoryManagerSharedPtr& memoryManager, - const ModelOptions& modelOptions) const override; + IWorkloadFactoryPtr CreateWorkloadFactory(const IMemoryManagerSharedPtr& memoryManager, + const ModelOptions& modelOptions) const override; IWorkloadFactoryPtr CreateWorkloadFactory(class TensorHandleFactoryRegistry& tensorHandleFactoryRegistry, const ModelOptions& modelOptions) const override; + IWorkloadFactoryPtr CreateWorkloadFactory(class TensorHandleFactoryRegistry& tensorHandleFactoryRegistry, + const ModelOptions& modelOptions, + MemorySourceFlags inputFlags, + MemorySourceFlags outputFlags) const override; + std::vector GetHandleFactoryPreferences() const override; void RegisterTensorHandleFactories(TensorHandleFactoryRegistry& registry) override; + void RegisterTensorHandleFactories(TensorHandleFactoryRegistry& registry, + MemorySourceFlags inputFlags, + MemorySourceFlags outputFlags) override; + IBackendInternal::IBackendContextPtr CreateBackendContext(const IRuntime::CreationOptions&) const override; IBackendInternal::IBackendProfilingContextPtr CreateBackendProfilingContext( const IRuntime::CreationOptions&, IBackendProfilingPtr& backendProfiling) override; diff --git a/src/backends/cl/ClImportTensorHandleFactory.cpp b/src/backends/cl/ClImportTensorHandleFactory.cpp index 594e05423e..26d5f9c47a 100644 --- a/src/backends/cl/ClImportTensorHandleFactory.cpp +++ b/src/backends/cl/ClImportTensorHandleFactory.cpp @@ -106,6 +106,11 @@ bool ClImportTensorHandleFactory::SupportsSubTensors() const return true; } +bool ClImportTensorHandleFactory::SupportsMapUnmap() const +{ + return false; +} + MemorySourceFlags ClImportTensorHandleFactory::GetExportFlags() const { return m_ExportFlags; @@ -116,4 +121,19 @@ MemorySourceFlags ClImportTensorHandleFactory::GetImportFlags() const return m_ImportFlags; } +std::vector ClImportTensorHandleFactory::GetCapabilities(const IConnectableLayer* layer, + const IConnectableLayer* connectedLayer, + CapabilityClass capabilityClass) +{ + IgnoreUnused(layer); + IgnoreUnused(connectedLayer); + std::vector capabilities; + if (capabilityClass == CapabilityClass::FallbackImportDisabled) + { + Capability paddingCapability(CapabilityClass::FallbackImportDisabled, true); + capabilities.push_back(paddingCapability); + } + return capabilities; +} + } // namespace armnn \ No newline at end of file diff --git a/src/backends/cl/ClImportTensorHandleFactory.hpp b/src/backends/cl/ClImportTensorHandleFactory.hpp index ee2f84efda..7e22949647 100644 --- a/src/backends/cl/ClImportTensorHandleFactory.hpp +++ b/src/backends/cl/ClImportTensorHandleFactory.hpp @@ -58,10 +58,16 @@ public: bool SupportsSubTensors() const override; + bool SupportsMapUnmap() const override; + MemorySourceFlags GetExportFlags() const override; MemorySourceFlags GetImportFlags() const override; + std::vector GetCapabilities(const IConnectableLayer* layer, + const IConnectableLayer* connectedLayer, + CapabilityClass capabilityClass) override; + private: MemorySourceFlags m_ImportFlags; MemorySourceFlags m_ExportFlags; diff --git a/src/backends/cl/backend.mk b/src/backends/cl/backend.mk index 976f614cff..e6c289cf39 100644 --- a/src/backends/cl/backend.mk +++ b/src/backends/cl/backend.mk @@ -20,6 +20,7 @@ BACKEND_SOURCES := \ ClContextControl.cpp \ ClContextDeserializer.cpp \ ClContextSerializer.cpp \ + ClImportTensorHandleFactory.cpp \ ClLayerSupport.cpp \ ClRegistryInitializer.cpp \ ClTensorHandleFactory.cpp \ diff --git a/src/backends/cl/test/ClFallbackTests.cpp b/src/backends/cl/test/ClFallbackTests.cpp index eec3afe447..183b8caa2e 100644 --- a/src/backends/cl/test/ClFallbackTests.cpp +++ b/src/backends/cl/test/ClFallbackTests.cpp @@ -11,7 +11,7 @@ BOOST_AUTO_TEST_SUITE(ClFallback) -BOOST_AUTO_TEST_CASE(ClImportEnabledFallbackToNeon, * boost::unit_test::disabled()) +BOOST_AUTO_TEST_CASE(ClImportEnabledFallbackToNeon) { using namespace armnn; @@ -34,7 +34,7 @@ BOOST_AUTO_TEST_CASE(ClImportEnabledFallbackToNeon, * boost::unit_test::disabled add->GetOutputSlot(0).Connect(sub->GetInputSlot(1)); sub->GetOutputSlot(0).Connect(output->GetInputSlot(0)); - TensorInfo info = TensorInfo({ 1, 2, 3, 2 }, DataType::Float32); + TensorInfo info = TensorInfo({ 1, 2, 4, 2 }, DataType::Float32); input0->GetOutputSlot(0).SetTensorInfo(info); input1->GetOutputSlot(0).SetTensorInfo(info); @@ -82,30 +82,49 @@ BOOST_AUTO_TEST_CASE(ClImportEnabledFallbackToNeon, * boost::unit_test::disabled runtime->LoadNetwork(netId, std::move(optNet), ignoredErrorMessage, networkProperties); // Creates structures for input & output - std::vector inputData0 + std::vector inputValue0 { - 1.0f, 1.0f, 2.0f, 2.0f, 2.0f, 3.0f, 4.0f, 4.0f, 5.0f, 5.0f, 6.0f, 6.0f + 1.0f, 1.0f, 2.0f, 2.0f, 2.0f, 3.0f, 4.0f, 4.0f, 5.0f, 5.0f, 6.0f, 6.0f, 1.0f, 1.0f, 2.0f, 2.0f }; - std::vector inputData1 + std::vector inputValue1 { - 0.0f, 1.0f, 1.0f, 2.0f, 3.0f, 3.0f, 3.0f, 4.0f, 4.0f, 5.0f, 5.0f, 6.0f + 0.0f, 1.0f, 1.0f, 2.0f, 3.0f, 3.0f, 3.0f, 4.0f, 4.0f, 5.0f, 5.0f, 6.0f, 0.0f, 1.0f, 1.0f, 2.0f }; std::vector inputData2 { - 12.0f, 11.0f, 10.0f, 9.0f, 8.0f, 7.0f, 6.0f, 5.0f, 4.0f, 3.0f, 2.0f, 1.0f + 12.0f, 11.0f, 10.0f, 9.0f, 8.0f, 7.0f, 6.0f, 5.0f, 4.0f, 3.0f, 2.0f, 1.0f, 12.0f, 11.0f, 10.0f, 9.0f }; - std::vector outputData(12); + std::vector outputData(16); std::vector expectedOutput { - 11.0f, 9.0f, 7.0f, 5.0f, 3.0f, 1.0f, -1.0f, -3.0f, -5.0f, -7.0f, -9.0f, -11.0f + 11.0f, 9.0f, 7.0f, 5.0f, 3.0f, 1.0f, -1.0f, -3.0f, -5.0f, -7.0f, -9.0f, -11.0f, 11.0f, 9.0f, 7.0f, 5.0f }; + // Prepare aligned data + unsigned int numElements = info.GetNumElements(); + size_t totalBytes = numElements * sizeof(float); + const size_t alignment = 64; + size_t space = totalBytes + alignment + alignment; + auto inputData0 = std::make_unique(space); + void* alignedInputPtr0 = inputData0.get(); + BOOST_CHECK(std::align(alignment, totalBytes, alignedInputPtr0, space)); + + auto* intputPtr0 = reinterpret_cast(alignedInputPtr0); + std::copy(inputValue0.begin(), inputValue0.end(), intputPtr0); + + auto inputData1 = std::make_unique(space); + void* alignedInputPtr1 = inputData1.get(); + BOOST_CHECK(std::align(alignment, totalBytes, alignedInputPtr1, space)); + + auto* intputPtr1 = reinterpret_cast(alignedInputPtr1); + std::copy(inputValue1.begin(), inputValue1.end(), intputPtr1); + InputTensors inputTensors { - { 0, armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 0), inputData0.data()) }, - { 1, armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 1), inputData1.data()) }, + { 0, armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 0), alignedInputPtr0) }, + { 1, armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 1), alignedInputPtr1) }, { 2, armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 2), inputData2.data()) } }; OutputTensors outputTensors @@ -134,6 +153,8 @@ BOOST_AUTO_TEST_CASE(ClImportEnabledFallbackToNeon, * boost::unit_test::disabled // Check output is as expected BOOST_TEST(outputData == expectedOutput); + + runtime->UnloadNetwork(netId); } BOOST_AUTO_TEST_CASE(ClImportDisabledFallbackToNeon) @@ -258,7 +279,7 @@ BOOST_AUTO_TEST_CASE(ClImportDisabledFallbackToNeon) BOOST_TEST(outputData == expectedOutput); } -BOOST_AUTO_TEST_CASE(ClImportEnabledFallbackSubgraphToNeon, * boost::unit_test::disabled()) +BOOST_AUTO_TEST_CASE(ClImportEnabledFallbackSubgraphToNeon) { using namespace armnn; @@ -269,6 +290,10 @@ BOOST_AUTO_TEST_CASE(ClImportEnabledFallbackSubgraphToNeon, * boost::unit_test:: INetworkPtr net(INetwork::Create()); Pooling2dDescriptor desc; + desc.m_PoolWidth = 2; + desc.m_PoolHeight = 2; + desc.m_StrideX = 2; + desc.m_StrideY = 2; IConnectableLayer* input0 = net->AddInputLayer(0, "input0"); IConnectableLayer* input1 = net->AddInputLayer(1, "input1"); @@ -285,8 +310,8 @@ BOOST_AUTO_TEST_CASE(ClImportEnabledFallbackSubgraphToNeon, * boost::unit_test:: sub->GetOutputSlot(0).Connect(pooling->GetInputSlot(0)); pooling->GetOutputSlot(0).Connect(output->GetInputSlot(0)); - TensorInfo info = TensorInfo({ 1, 2, 3, 2 }, DataType::Float32); - TensorInfo poolingInfo = TensorInfo({ 1, 2, 1, 1 }, DataType::Float32); + TensorInfo info = TensorInfo({ 1, 2, 4, 2 }, DataType::Float32); + TensorInfo poolingInfo = TensorInfo({ 1, 2, 2, 1 }, DataType::Float32); input0->GetOutputSlot(0).SetTensorInfo(info); input1->GetOutputSlot(0).SetTensorInfo(info); @@ -340,27 +365,45 @@ BOOST_AUTO_TEST_CASE(ClImportEnabledFallbackSubgraphToNeon, * boost::unit_test:: runtime->LoadNetwork(netId, std::move(optNet), ignoredErrorMessage, networkProperties); // Creates structures for input & output - std::vector inputData0 + std::vector inputValue0 { - 1.0f, 1.0f, 2.0f, 2.0f, 2.0f, 3.0f, 4.0f, 4.0f, 5.0f, 5.0f, 6.0f, 6.0f + 1.0f, 1.0f, 2.0f, 2.0f, 2.0f, 3.0f, 4.0f, 4.0f, 5.0f, 5.0f, 6.0f, 6.0f, 1.0f, 1.0f, 2.0f, 2.0f }; - std::vector inputData1 + std::vector inputValue1 { - 0.0f, 1.0f, 1.0f, 2.0f, 3.0f, 3.0f, 3.0f, 4.0f, 4.0f, 5.0f, 5.0f, 6.0f + 0.0f, 1.0f, 1.0f, 2.0f, 3.0f, 3.0f, 3.0f, 4.0f, 4.0f, 5.0f, 5.0f, 6.0f, 0.0f, 1.0f, 1.0f, 2.0f }; std::vector inputData2 { - 12.0f, 11.0f, 10.0f, 9.0f, 8.0f, 7.0f, 6.0f, 5.0f, 4.0f, 3.0f, 2.0f, 1.0f + 12.0f, 11.0f, 10.0f, 9.0f, 8.0f, 7.0f, 6.0f, 5.0f, 4.0f, 3.0f, 2.0f, 1.0f, 12.0f, 11.0f, 10.0f, 9.0f }; - std::vector outputData(2); + std::vector outputData(4); - std::vector expectedOutput{ 11.0f, -1.0f }; + std::vector expectedOutput{ 11.0f, 3.0f, -5.0f, 11.0f }; + + unsigned int numElements = info.GetNumElements(); + size_t totalBytes = numElements * sizeof(float); + const size_t alignment = 64; + size_t space = totalBytes + alignment + alignment; + auto inputData0 = std::make_unique(space); + void* alignedInputPtr0 = inputData0.get(); + BOOST_CHECK(std::align(alignment, totalBytes, alignedInputPtr0, space)); + + auto* intputPtr0 = reinterpret_cast(alignedInputPtr0); + std::copy(inputValue0.begin(), inputValue0.end(), intputPtr0); + + auto inputData1 = std::make_unique(space); + void* alignedInputPtr1 = inputData1.get(); + BOOST_CHECK(std::align(alignment, totalBytes, alignedInputPtr1, space)); + + auto* intputPtr1 = reinterpret_cast(alignedInputPtr1); + std::copy(inputValue1.begin(), inputValue1.end(), intputPtr1); InputTensors inputTensors { - { 0, armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 0), inputData0.data()) }, - { 1, armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 1), inputData1.data()) }, + { 0, armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 0), alignedInputPtr0) }, + { 1, armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 1), alignedInputPtr1) }, { 2, armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 2), inputData2.data()) } }; OutputTensors outputTensors @@ -393,6 +436,8 @@ BOOST_AUTO_TEST_CASE(ClImportEnabledFallbackSubgraphToNeon, * boost::unit_test:: // Check output is as expected BOOST_TEST(outputData == expectedOutput); + + runtime->UnloadNetwork(netId); } BOOST_AUTO_TEST_CASE(ClImportDisableFallbackSubgraphToNeon) diff --git a/src/backends/cl/test/ClImportTensorHandleTests.cpp b/src/backends/cl/test/ClImportTensorHandleTests.cpp index bfb74af801..85ff35f0af 100644 --- a/src/backends/cl/test/ClImportTensorHandleTests.cpp +++ b/src/backends/cl/test/ClImportTensorHandleTests.cpp @@ -11,6 +11,9 @@ #include +#include +#include + using namespace armnn; BOOST_AUTO_TEST_SUITE(ClImportTensorHandleTests) @@ -38,7 +41,7 @@ BOOST_FIXTURE_TEST_CASE(ClMallocImport, ClContextControlFixture) const size_t totalBytes = tensor.info()->total_size(); const size_t alignment = arm_compute::CLKernelLibrary::get().get_device().getInfo(); - size_t space = totalBytes + alignment; + size_t space = totalBytes + alignment + alignment; auto testData = std::make_unique(space); void* alignedPtr = testData.get(); BOOST_CHECK(std::align(alignment, totalBytes, alignedPtr, space)); @@ -57,7 +60,7 @@ BOOST_FIXTURE_TEST_CASE(ClMallocImport, ClContextControlFixture) // Validate result by checking that the output has no negative values for(unsigned int i = 0; i < numElements; ++i) { - BOOST_ASSERT(typedPtr[i] >= 0); + BOOST_TEST(typedPtr[i] >= 0); } } @@ -78,7 +81,7 @@ BOOST_FIXTURE_TEST_CASE(ClIncorrectMemorySourceImport, ClContextControlFixture) const size_t totalBytes = tensor.info()->total_size(); const size_t alignment = arm_compute::CLKernelLibrary::get().get_device().getInfo(); - size_t space = totalBytes + alignment; + size_t space = totalBytes + alignment + alignment; auto testData = std::make_unique(space); void* alignedPtr = testData.get(); BOOST_CHECK(std::align(alignment, totalBytes, alignedPtr, space)); @@ -108,4 +111,105 @@ BOOST_FIXTURE_TEST_CASE(ClInvalidMemorySourceImport, ClContextControlFixture) BOOST_CHECK_THROW(handle->Import(inputData.data(), invalidMemSource), MemoryImportException); } -BOOST_AUTO_TEST_SUITE_END() \ No newline at end of file +BOOST_FIXTURE_TEST_CASE(ClImportEndToEnd, ClContextControlFixture) +{ + // Create runtime in which test will run + IRuntime::CreationOptions options; + IRuntimePtr runtime(armnn::IRuntime::Create(options)); + + // build up the structure of the network + INetworkPtr net(INetwork::Create()); + + IConnectableLayer* input = net->AddInputLayer(0, "Input"); + + ActivationDescriptor descriptor; + descriptor.m_Function = ActivationFunction::ReLu; + IConnectableLayer* activation = net->AddActivationLayer(descriptor, "Activation"); + + IConnectableLayer* output = net->AddOutputLayer(0, "Output"); + + input->GetOutputSlot(0).Connect(activation->GetInputSlot(0)); + activation->GetOutputSlot(0).Connect(output->GetInputSlot(0)); + + TensorInfo tensorInfo = TensorInfo({ 1, 24, 16, 3 }, DataType::Float32); + unsigned int numElements = tensorInfo.GetNumElements(); + size_t totalBytes = numElements * sizeof(float); + + input->GetOutputSlot(0).SetTensorInfo(tensorInfo); + activation->GetOutputSlot(0).SetTensorInfo(tensorInfo); + + // Optimize the network + OptimizerOptions optOptions; + optOptions.m_ImportEnabled = true; + std::vector backends = {armnn::Compute::GpuAcc}; + IOptimizedNetworkPtr optNet = Optimize(*net, backends, runtime->GetDeviceSpec(), optOptions); + BOOST_CHECK(optNet); + + // Loads it into the runtime. + NetworkId netId; + std::string ignoredErrorMessage; + // Enable Importing + INetworkProperties networkProperties(false, MemorySource::Malloc, MemorySource::Malloc); + runtime->LoadNetwork(netId, std::move(optNet), ignoredErrorMessage, networkProperties); + + // Creates structures for input & output + const size_t alignment = + arm_compute::CLKernelLibrary::get().get_device().getInfo(); + size_t space = totalBytes + alignment + alignment; + auto inputData = std::make_unique(space); + void* alignedInputPtr = inputData.get(); + BOOST_CHECK(std::align(alignment, totalBytes, alignedInputPtr, space)); + + // Input with negative values + auto* intputPtr = reinterpret_cast(alignedInputPtr); + std::fill_n(intputPtr, numElements, -5.0f); + + auto outputData = std::make_unique(space); + void* alignedOutputPtr = outputData.get(); + BOOST_CHECK(std::align(alignment, totalBytes, alignedOutputPtr, space)); + + InputTensors inputTensors + { + {0,armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 0), alignedInputPtr)}, + }; + OutputTensors outputTensors + { + {0,armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), alignedOutputPtr)} + }; + + runtime->GetProfiler(netId)->EnableProfiling(true); + + // Do the inference + runtime->EnqueueWorkload(netId, inputTensors, outputTensors); + + // Retrieve the Profiler.Print() output to get the workload execution + ProfilerManager& profilerManager = armnn::ProfilerManager::GetInstance(); + std::stringstream ss; + profilerManager.GetProfiler()->Print(ss);; + std::string dump = ss.str(); + + // Contains ActivationWorkload + std::size_t found = dump.find("ActivationWorkload"); + BOOST_TEST(found != std::string::npos); + + // Contains SyncMemGeneric + found = dump.find("SyncMemGeneric"); + BOOST_TEST(found != std::string::npos); + + // Does not contain CopyMemGeneric + found = dump.find("CopyMemGeneric"); + BOOST_TEST(found == std::string::npos); + + // Check output is as expected + // Validate result by checking that the output has no negative values + auto* outputResult = reinterpret_cast(alignedOutputPtr); + BOOST_TEST(outputResult); + for(unsigned int i = 0; i < numElements; ++i) + { + BOOST_TEST(outputResult[i] >= 0); + } + + runtime->UnloadNetwork(netId); +} + +BOOST_AUTO_TEST_SUITE_END() diff --git a/src/backends/neon/test/NeonFallbackTests.cpp b/src/backends/neon/test/NeonFallbackTests.cpp index 8dc592db5d..383a5f654c 100644 --- a/src/backends/neon/test/NeonFallbackTests.cpp +++ b/src/backends/neon/test/NeonFallbackTests.cpp @@ -16,7 +16,7 @@ BOOST_AUTO_TEST_CASE(FallbackImportToCpuAcc) { using namespace armnn; - // Create a mock backend object + // Create a mock backend objectN MockImportBackendInitialiser initialiser; // Register the Mock Backend auto backendObjPtr = CreateBackendObject(MockImportBackendId()); BOOST_TEST((backendObjPtr != nullptr)); @@ -677,7 +677,7 @@ BOOST_AUTO_TEST_CASE(FallbackDisableImportFromCpuAcc) } #if defined(ARMCOMPUTECL_ENABLED) -BOOST_AUTO_TEST_CASE(NeonImportEnabledFallbackToCl, * boost::unit_test::disabled()) +BOOST_AUTO_TEST_CASE(NeonImportEnabledFallbackToCl) { using namespace armnn; @@ -700,7 +700,7 @@ BOOST_AUTO_TEST_CASE(NeonImportEnabledFallbackToCl, * boost::unit_test::disabled add->GetOutputSlot(0).Connect(sub->GetInputSlot(1)); sub->GetOutputSlot(0).Connect(output->GetInputSlot(0)); - TensorInfo info = TensorInfo({ 1, 2, 3, 2 }, DataType::Float32); + TensorInfo info = TensorInfo({ 1, 2, 4, 2 }, DataType::Float32); input0->GetOutputSlot(0).SetTensorInfo(info); input1->GetOutputSlot(0).SetTensorInfo(info); @@ -752,29 +752,43 @@ BOOST_AUTO_TEST_CASE(NeonImportEnabledFallbackToCl, * boost::unit_test::disabled // Creates structures for input & output std::vector inputData0 { - 1.0f, 1.0f, 2.0f, 2.0f, 2.0f, 3.0f, 4.0f, 4.0f, 5.0f, 5.0f, 6.0f, 6.0f + 1.0f, 1.0f, 2.0f, 2.0f, 2.0f, 3.0f, 4.0f, 4.0f, 5.0f, 5.0f, 6.0f, 6.0f, 1.0f, 1.0f, 2.0f, 2.0f }; std::vector inputData1 { - 0.0f, 1.0f, 1.0f, 2.0f, 3.0f, 3.0f, 3.0f, 4.0f, 4.0f, 5.0f, 5.0f, 6.0f + 0.0f, 1.0f, 1.0f, 2.0f, 3.0f, 3.0f, 3.0f, 4.0f, 4.0f, 5.0f, 5.0f, 6.0f, 0.0f, 1.0f, 1.0f, 2.0f }; std::vector inputData2 { - 12.0f, 11.0f, 10.0f, 9.0f, 8.0f, 7.0f, 6.0f, 5.0f, 4.0f, 3.0f, 2.0f, 1.0f + 12.0f, 11.0f, 10.0f, 9.0f, 8.0f, 7.0f, 6.0f, 5.0f, 4.0f, 3.0f, 2.0f, 1.0f, 12.0f, 11.0f, 10.0f, 9.0f }; - std::vector outputData(12); + std::vector outputData(16); std::vector expectedOutput { - 11.0f, 9.0f, 7.0f, 5.0f, 3.0f, 1.0f, -1.0f, -3.0f, -5.0f, -7.0f, -9.0f, -11.0f + 11.0f, 9.0f, 7.0f, 5.0f, 3.0f, 1.0f, -1.0f, -3.0f, -5.0f, -7.0f, -9.0f, -11.0f, 11.0f, 9.0f, 7.0f, 5.0f }; + // Creates structures for input & output + unsigned int numElements = info.GetNumElements(); + size_t totalBytes = numElements * sizeof(float); + + // Prepare aligned data + const size_t alignment = 64; + size_t space = totalBytes + alignment + alignment; + auto inputData = std::make_unique(space); + void* alignedInputPtr = inputData.get(); + BOOST_CHECK(std::align(alignment, totalBytes, alignedInputPtr, space)); + + auto* intputPtr = reinterpret_cast(alignedInputPtr); + std::copy(inputData2.begin(), inputData2.end(), intputPtr); + InputTensors inputTensors { { 0, armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 0), inputData0.data()) }, { 1, armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 1), inputData1.data()) }, - { 2, armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 2), inputData2.data()) } + { 2, armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 2), alignedInputPtr) } }; OutputTensors outputTensors { @@ -801,7 +815,11 @@ BOOST_AUTO_TEST_CASE(NeonImportEnabledFallbackToCl, * boost::unit_test::disabled BOOST_TEST(found != std::string::npos); // Check output is as expected - BOOST_TEST(outputData == expectedOutput); + for(unsigned int i = 0; i < numElements; ++i) + { + BOOST_TEST(outputData[i] == expectedOutput[i]); + } + runtime->UnloadNetwork(netId); } BOOST_AUTO_TEST_CASE(NeonImportDisabledFallbackToCl) @@ -926,7 +944,7 @@ BOOST_AUTO_TEST_CASE(NeonImportDisabledFallbackToCl) BOOST_TEST(outputData == expectedOutput); } -BOOST_AUTO_TEST_CASE(NeonImportEnabledFallbackSubgraphToCl, * boost::unit_test::disabled()) +BOOST_AUTO_TEST_CASE(NeonImportEnabledFallbackSubgraphToCl) { using namespace armnn; @@ -937,6 +955,10 @@ BOOST_AUTO_TEST_CASE(NeonImportEnabledFallbackSubgraphToCl, * boost::unit_test:: INetworkPtr net(INetwork::Create()); Pooling2dDescriptor desc; + desc.m_PoolWidth = 2; + desc.m_PoolHeight = 2; + desc.m_StrideX = 2; + desc.m_StrideY = 2; IConnectableLayer* input0 = net->AddInputLayer(0, "input0"); IConnectableLayer* input1 = net->AddInputLayer(1, "input1"); @@ -953,8 +975,8 @@ BOOST_AUTO_TEST_CASE(NeonImportEnabledFallbackSubgraphToCl, * boost::unit_test:: sub->GetOutputSlot(0).Connect(pooling->GetInputSlot(0)); pooling->GetOutputSlot(0).Connect(output->GetInputSlot(0)); - TensorInfo info = TensorInfo({ 1, 2, 3, 2 }, DataType::Float32); - TensorInfo poolingInfo = TensorInfo({ 1, 2, 1, 1 }, DataType::Float32); + TensorInfo info = TensorInfo({ 1, 2, 4, 2 }, DataType::Float32); + TensorInfo poolingInfo = TensorInfo({ 1, 2, 2, 1 }, DataType::Float32); input0->GetOutputSlot(0).SetTensorInfo(info); input1->GetOutputSlot(0).SetTensorInfo(info); @@ -1012,26 +1034,38 @@ BOOST_AUTO_TEST_CASE(NeonImportEnabledFallbackSubgraphToCl, * boost::unit_test:: // Creates structures for input & output std::vector inputData0 { - 1.0f, 1.0f, 2.0f, 2.0f, 2.0f, 3.0f, 4.0f, 4.0f, 5.0f, 5.0f, 6.0f, 6.0f + 1.0f, 1.0f, 2.0f, 2.0f, 2.0f, 3.0f, 4.0f, 4.0f, 5.0f, 5.0f, 6.0f, 6.0f, 1.0f, 1.0f, 2.0f, 2.0f }; std::vector inputData1 { - 0.0f, 1.0f, 1.0f, 2.0f, 3.0f, 3.0f, 3.0f, 4.0f, 4.0f, 5.0f, 5.0f, 6.0f + 0.0f, 1.0f, 1.0f, 2.0f, 3.0f, 3.0f, 3.0f, 4.0f, 4.0f, 5.0f, 5.0f, 6.0f, 0.0f, 1.0f, 1.0f, 2.0f }; std::vector inputData2 { - 12.0f, 11.0f, 10.0f, 9.0f, 8.0f, 7.0f, 6.0f, 5.0f, 4.0f, 3.0f, 2.0f, 1.0f + 12.0f, 11.0f, 10.0f, 9.0f, 8.0f, 7.0f, 6.0f, 5.0f, 4.0f, 3.0f, 2.0f, 1.0f, 12.0f, 11.0f, 10.0f, 9.0f }; - std::vector outputData(2); + std::vector outputData(4); - std::vector expectedOutput{ 11.0f, -1.0f }; + std::vector expectedOutput{ 11.0f, 3.0f, -5.0f, 11.0f }; + + // Prepare aligned data + unsigned int numElements = info.GetNumElements(); + size_t totalBytes = numElements * sizeof(float); + const size_t alignment = 64; + size_t space = totalBytes + alignment + alignment; + auto inputData = std::make_unique(space); + void* alignedInputPtr = inputData.get(); + BOOST_CHECK(std::align(alignment, totalBytes, alignedInputPtr, space)); + + auto* intputPtr = reinterpret_cast(alignedInputPtr); + std::copy(inputData2.begin(), inputData2.end(), intputPtr); InputTensors inputTensors { { 0, armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 0), inputData0.data()) }, { 1, armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 1), inputData1.data()) }, - { 2, armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 2), inputData2.data()) } + { 2, armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 2), alignedInputPtr) } }; OutputTensors outputTensors { @@ -1067,6 +1101,7 @@ BOOST_AUTO_TEST_CASE(NeonImportEnabledFallbackSubgraphToCl, * boost::unit_test:: // Check output is as expected BOOST_TEST(outputData == expectedOutput); + runtime->UnloadNetwork(netId); } BOOST_AUTO_TEST_CASE(NeonImportDisableFallbackSubgraphToCl) -- cgit v1.2.1