From e5f0b2409c2e557a5a78e2f4659d203154289b23 Mon Sep 17 00:00:00 2001 From: Narumol Prangnawarat Date: Fri, 7 May 2021 17:52:36 +0100 Subject: IVGCVSW-5818 Enable import on GPU Signed-off-by: Narumol Prangnawarat Change-Id: I4e4eb107aa2bfa09625840d738001f33152e6792 --- src/backends/cl/test/ClFallbackTests.cpp | 91 ++++++++++++----- src/backends/cl/test/ClImportTensorHandleTests.cpp | 112 ++++++++++++++++++++- 2 files changed, 176 insertions(+), 27 deletions(-) (limited to 'src/backends/cl/test') diff --git a/src/backends/cl/test/ClFallbackTests.cpp b/src/backends/cl/test/ClFallbackTests.cpp index eec3afe447..183b8caa2e 100644 --- a/src/backends/cl/test/ClFallbackTests.cpp +++ b/src/backends/cl/test/ClFallbackTests.cpp @@ -11,7 +11,7 @@ BOOST_AUTO_TEST_SUITE(ClFallback) -BOOST_AUTO_TEST_CASE(ClImportEnabledFallbackToNeon, * boost::unit_test::disabled()) +BOOST_AUTO_TEST_CASE(ClImportEnabledFallbackToNeon) { using namespace armnn; @@ -34,7 +34,7 @@ BOOST_AUTO_TEST_CASE(ClImportEnabledFallbackToNeon, * boost::unit_test::disabled add->GetOutputSlot(0).Connect(sub->GetInputSlot(1)); sub->GetOutputSlot(0).Connect(output->GetInputSlot(0)); - TensorInfo info = TensorInfo({ 1, 2, 3, 2 }, DataType::Float32); + TensorInfo info = TensorInfo({ 1, 2, 4, 2 }, DataType::Float32); input0->GetOutputSlot(0).SetTensorInfo(info); input1->GetOutputSlot(0).SetTensorInfo(info); @@ -82,30 +82,49 @@ BOOST_AUTO_TEST_CASE(ClImportEnabledFallbackToNeon, * boost::unit_test::disabled runtime->LoadNetwork(netId, std::move(optNet), ignoredErrorMessage, networkProperties); // Creates structures for input & output - std::vector inputData0 + std::vector inputValue0 { - 1.0f, 1.0f, 2.0f, 2.0f, 2.0f, 3.0f, 4.0f, 4.0f, 5.0f, 5.0f, 6.0f, 6.0f + 1.0f, 1.0f, 2.0f, 2.0f, 2.0f, 3.0f, 4.0f, 4.0f, 5.0f, 5.0f, 6.0f, 6.0f, 1.0f, 1.0f, 2.0f, 2.0f }; - std::vector inputData1 + std::vector inputValue1 { - 0.0f, 1.0f, 1.0f, 2.0f, 3.0f, 3.0f, 3.0f, 4.0f, 4.0f, 5.0f, 5.0f, 6.0f + 0.0f, 1.0f, 1.0f, 2.0f, 3.0f, 3.0f, 3.0f, 4.0f, 4.0f, 5.0f, 5.0f, 6.0f, 0.0f, 1.0f, 1.0f, 2.0f }; std::vector inputData2 { - 12.0f, 11.0f, 10.0f, 9.0f, 8.0f, 7.0f, 6.0f, 5.0f, 4.0f, 3.0f, 2.0f, 1.0f + 12.0f, 11.0f, 10.0f, 9.0f, 8.0f, 7.0f, 6.0f, 5.0f, 4.0f, 3.0f, 2.0f, 1.0f, 12.0f, 11.0f, 10.0f, 9.0f }; - std::vector outputData(12); + std::vector outputData(16); std::vector expectedOutput { - 11.0f, 9.0f, 7.0f, 5.0f, 3.0f, 1.0f, -1.0f, -3.0f, -5.0f, -7.0f, -9.0f, -11.0f + 11.0f, 9.0f, 7.0f, 5.0f, 3.0f, 1.0f, -1.0f, -3.0f, -5.0f, -7.0f, -9.0f, -11.0f, 11.0f, 9.0f, 7.0f, 5.0f }; + // Prepare aligned data + unsigned int numElements = info.GetNumElements(); + size_t totalBytes = numElements * sizeof(float); + const size_t alignment = 64; + size_t space = totalBytes + alignment + alignment; + auto inputData0 = std::make_unique(space); + void* alignedInputPtr0 = inputData0.get(); + BOOST_CHECK(std::align(alignment, totalBytes, alignedInputPtr0, space)); + + auto* intputPtr0 = reinterpret_cast(alignedInputPtr0); + std::copy(inputValue0.begin(), inputValue0.end(), intputPtr0); + + auto inputData1 = std::make_unique(space); + void* alignedInputPtr1 = inputData1.get(); + BOOST_CHECK(std::align(alignment, totalBytes, alignedInputPtr1, space)); + + auto* intputPtr1 = reinterpret_cast(alignedInputPtr1); + std::copy(inputValue1.begin(), inputValue1.end(), intputPtr1); + InputTensors inputTensors { - { 0, armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 0), inputData0.data()) }, - { 1, armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 1), inputData1.data()) }, + { 0, armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 0), alignedInputPtr0) }, + { 1, armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 1), alignedInputPtr1) }, { 2, armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 2), inputData2.data()) } }; OutputTensors outputTensors @@ -134,6 +153,8 @@ BOOST_AUTO_TEST_CASE(ClImportEnabledFallbackToNeon, * boost::unit_test::disabled // Check output is as expected BOOST_TEST(outputData == expectedOutput); + + runtime->UnloadNetwork(netId); } BOOST_AUTO_TEST_CASE(ClImportDisabledFallbackToNeon) @@ -258,7 +279,7 @@ BOOST_AUTO_TEST_CASE(ClImportDisabledFallbackToNeon) BOOST_TEST(outputData == expectedOutput); } -BOOST_AUTO_TEST_CASE(ClImportEnabledFallbackSubgraphToNeon, * boost::unit_test::disabled()) +BOOST_AUTO_TEST_CASE(ClImportEnabledFallbackSubgraphToNeon) { using namespace armnn; @@ -269,6 +290,10 @@ BOOST_AUTO_TEST_CASE(ClImportEnabledFallbackSubgraphToNeon, * boost::unit_test:: INetworkPtr net(INetwork::Create()); Pooling2dDescriptor desc; + desc.m_PoolWidth = 2; + desc.m_PoolHeight = 2; + desc.m_StrideX = 2; + desc.m_StrideY = 2; IConnectableLayer* input0 = net->AddInputLayer(0, "input0"); IConnectableLayer* input1 = net->AddInputLayer(1, "input1"); @@ -285,8 +310,8 @@ BOOST_AUTO_TEST_CASE(ClImportEnabledFallbackSubgraphToNeon, * boost::unit_test:: sub->GetOutputSlot(0).Connect(pooling->GetInputSlot(0)); pooling->GetOutputSlot(0).Connect(output->GetInputSlot(0)); - TensorInfo info = TensorInfo({ 1, 2, 3, 2 }, DataType::Float32); - TensorInfo poolingInfo = TensorInfo({ 1, 2, 1, 1 }, DataType::Float32); + TensorInfo info = TensorInfo({ 1, 2, 4, 2 }, DataType::Float32); + TensorInfo poolingInfo = TensorInfo({ 1, 2, 2, 1 }, DataType::Float32); input0->GetOutputSlot(0).SetTensorInfo(info); input1->GetOutputSlot(0).SetTensorInfo(info); @@ -340,27 +365,45 @@ BOOST_AUTO_TEST_CASE(ClImportEnabledFallbackSubgraphToNeon, * boost::unit_test:: runtime->LoadNetwork(netId, std::move(optNet), ignoredErrorMessage, networkProperties); // Creates structures for input & output - std::vector inputData0 + std::vector inputValue0 { - 1.0f, 1.0f, 2.0f, 2.0f, 2.0f, 3.0f, 4.0f, 4.0f, 5.0f, 5.0f, 6.0f, 6.0f + 1.0f, 1.0f, 2.0f, 2.0f, 2.0f, 3.0f, 4.0f, 4.0f, 5.0f, 5.0f, 6.0f, 6.0f, 1.0f, 1.0f, 2.0f, 2.0f }; - std::vector inputData1 + std::vector inputValue1 { - 0.0f, 1.0f, 1.0f, 2.0f, 3.0f, 3.0f, 3.0f, 4.0f, 4.0f, 5.0f, 5.0f, 6.0f + 0.0f, 1.0f, 1.0f, 2.0f, 3.0f, 3.0f, 3.0f, 4.0f, 4.0f, 5.0f, 5.0f, 6.0f, 0.0f, 1.0f, 1.0f, 2.0f }; std::vector inputData2 { - 12.0f, 11.0f, 10.0f, 9.0f, 8.0f, 7.0f, 6.0f, 5.0f, 4.0f, 3.0f, 2.0f, 1.0f + 12.0f, 11.0f, 10.0f, 9.0f, 8.0f, 7.0f, 6.0f, 5.0f, 4.0f, 3.0f, 2.0f, 1.0f, 12.0f, 11.0f, 10.0f, 9.0f }; - std::vector outputData(2); + std::vector outputData(4); - std::vector expectedOutput{ 11.0f, -1.0f }; + std::vector expectedOutput{ 11.0f, 3.0f, -5.0f, 11.0f }; + + unsigned int numElements = info.GetNumElements(); + size_t totalBytes = numElements * sizeof(float); + const size_t alignment = 64; + size_t space = totalBytes + alignment + alignment; + auto inputData0 = std::make_unique(space); + void* alignedInputPtr0 = inputData0.get(); + BOOST_CHECK(std::align(alignment, totalBytes, alignedInputPtr0, space)); + + auto* intputPtr0 = reinterpret_cast(alignedInputPtr0); + std::copy(inputValue0.begin(), inputValue0.end(), intputPtr0); + + auto inputData1 = std::make_unique(space); + void* alignedInputPtr1 = inputData1.get(); + BOOST_CHECK(std::align(alignment, totalBytes, alignedInputPtr1, space)); + + auto* intputPtr1 = reinterpret_cast(alignedInputPtr1); + std::copy(inputValue1.begin(), inputValue1.end(), intputPtr1); InputTensors inputTensors { - { 0, armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 0), inputData0.data()) }, - { 1, armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 1), inputData1.data()) }, + { 0, armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 0), alignedInputPtr0) }, + { 1, armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 1), alignedInputPtr1) }, { 2, armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 2), inputData2.data()) } }; OutputTensors outputTensors @@ -393,6 +436,8 @@ BOOST_AUTO_TEST_CASE(ClImportEnabledFallbackSubgraphToNeon, * boost::unit_test:: // Check output is as expected BOOST_TEST(outputData == expectedOutput); + + runtime->UnloadNetwork(netId); } BOOST_AUTO_TEST_CASE(ClImportDisableFallbackSubgraphToNeon) diff --git a/src/backends/cl/test/ClImportTensorHandleTests.cpp b/src/backends/cl/test/ClImportTensorHandleTests.cpp index bfb74af801..85ff35f0af 100644 --- a/src/backends/cl/test/ClImportTensorHandleTests.cpp +++ b/src/backends/cl/test/ClImportTensorHandleTests.cpp @@ -11,6 +11,9 @@ #include +#include +#include + using namespace armnn; BOOST_AUTO_TEST_SUITE(ClImportTensorHandleTests) @@ -38,7 +41,7 @@ BOOST_FIXTURE_TEST_CASE(ClMallocImport, ClContextControlFixture) const size_t totalBytes = tensor.info()->total_size(); const size_t alignment = arm_compute::CLKernelLibrary::get().get_device().getInfo(); - size_t space = totalBytes + alignment; + size_t space = totalBytes + alignment + alignment; auto testData = std::make_unique(space); void* alignedPtr = testData.get(); BOOST_CHECK(std::align(alignment, totalBytes, alignedPtr, space)); @@ -57,7 +60,7 @@ BOOST_FIXTURE_TEST_CASE(ClMallocImport, ClContextControlFixture) // Validate result by checking that the output has no negative values for(unsigned int i = 0; i < numElements; ++i) { - BOOST_ASSERT(typedPtr[i] >= 0); + BOOST_TEST(typedPtr[i] >= 0); } } @@ -78,7 +81,7 @@ BOOST_FIXTURE_TEST_CASE(ClIncorrectMemorySourceImport, ClContextControlFixture) const size_t totalBytes = tensor.info()->total_size(); const size_t alignment = arm_compute::CLKernelLibrary::get().get_device().getInfo(); - size_t space = totalBytes + alignment; + size_t space = totalBytes + alignment + alignment; auto testData = std::make_unique(space); void* alignedPtr = testData.get(); BOOST_CHECK(std::align(alignment, totalBytes, alignedPtr, space)); @@ -108,4 +111,105 @@ BOOST_FIXTURE_TEST_CASE(ClInvalidMemorySourceImport, ClContextControlFixture) BOOST_CHECK_THROW(handle->Import(inputData.data(), invalidMemSource), MemoryImportException); } -BOOST_AUTO_TEST_SUITE_END() \ No newline at end of file +BOOST_FIXTURE_TEST_CASE(ClImportEndToEnd, ClContextControlFixture) +{ + // Create runtime in which test will run + IRuntime::CreationOptions options; + IRuntimePtr runtime(armnn::IRuntime::Create(options)); + + // build up the structure of the network + INetworkPtr net(INetwork::Create()); + + IConnectableLayer* input = net->AddInputLayer(0, "Input"); + + ActivationDescriptor descriptor; + descriptor.m_Function = ActivationFunction::ReLu; + IConnectableLayer* activation = net->AddActivationLayer(descriptor, "Activation"); + + IConnectableLayer* output = net->AddOutputLayer(0, "Output"); + + input->GetOutputSlot(0).Connect(activation->GetInputSlot(0)); + activation->GetOutputSlot(0).Connect(output->GetInputSlot(0)); + + TensorInfo tensorInfo = TensorInfo({ 1, 24, 16, 3 }, DataType::Float32); + unsigned int numElements = tensorInfo.GetNumElements(); + size_t totalBytes = numElements * sizeof(float); + + input->GetOutputSlot(0).SetTensorInfo(tensorInfo); + activation->GetOutputSlot(0).SetTensorInfo(tensorInfo); + + // Optimize the network + OptimizerOptions optOptions; + optOptions.m_ImportEnabled = true; + std::vector backends = {armnn::Compute::GpuAcc}; + IOptimizedNetworkPtr optNet = Optimize(*net, backends, runtime->GetDeviceSpec(), optOptions); + BOOST_CHECK(optNet); + + // Loads it into the runtime. + NetworkId netId; + std::string ignoredErrorMessage; + // Enable Importing + INetworkProperties networkProperties(false, MemorySource::Malloc, MemorySource::Malloc); + runtime->LoadNetwork(netId, std::move(optNet), ignoredErrorMessage, networkProperties); + + // Creates structures for input & output + const size_t alignment = + arm_compute::CLKernelLibrary::get().get_device().getInfo(); + size_t space = totalBytes + alignment + alignment; + auto inputData = std::make_unique(space); + void* alignedInputPtr = inputData.get(); + BOOST_CHECK(std::align(alignment, totalBytes, alignedInputPtr, space)); + + // Input with negative values + auto* intputPtr = reinterpret_cast(alignedInputPtr); + std::fill_n(intputPtr, numElements, -5.0f); + + auto outputData = std::make_unique(space); + void* alignedOutputPtr = outputData.get(); + BOOST_CHECK(std::align(alignment, totalBytes, alignedOutputPtr, space)); + + InputTensors inputTensors + { + {0,armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 0), alignedInputPtr)}, + }; + OutputTensors outputTensors + { + {0,armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), alignedOutputPtr)} + }; + + runtime->GetProfiler(netId)->EnableProfiling(true); + + // Do the inference + runtime->EnqueueWorkload(netId, inputTensors, outputTensors); + + // Retrieve the Profiler.Print() output to get the workload execution + ProfilerManager& profilerManager = armnn::ProfilerManager::GetInstance(); + std::stringstream ss; + profilerManager.GetProfiler()->Print(ss);; + std::string dump = ss.str(); + + // Contains ActivationWorkload + std::size_t found = dump.find("ActivationWorkload"); + BOOST_TEST(found != std::string::npos); + + // Contains SyncMemGeneric + found = dump.find("SyncMemGeneric"); + BOOST_TEST(found != std::string::npos); + + // Does not contain CopyMemGeneric + found = dump.find("CopyMemGeneric"); + BOOST_TEST(found == std::string::npos); + + // Check output is as expected + // Validate result by checking that the output has no negative values + auto* outputResult = reinterpret_cast(alignedOutputPtr); + BOOST_TEST(outputResult); + for(unsigned int i = 0; i < numElements; ++i) + { + BOOST_TEST(outputResult[i] >= 0); + } + + runtime->UnloadNetwork(netId); +} + +BOOST_AUTO_TEST_SUITE_END() -- cgit v1.2.1