From 70104000ddcf3bc1a1d21f16d1468456ca17b80a Mon Sep 17 00:00:00 2001 From: Aron Virginas-Tar Date: Wed, 24 Oct 2018 15:33:28 +0100 Subject: IVGCVSW-2073: Move remaining backend-specific tests from armnn to backends Change-Id: I45fd5b6dd32c435b78a54dc377a623e60978ce13 --- CMakeLists.txt | 4 +- src/armnn/test/EndToEndTest.cpp | 380 +----------- src/armnn/test/GraphUtils.hpp | 3 +- src/armnn/test/JsonPrinterTests.cpp | 378 ------------ src/armnn/test/NetworkTests.cpp | 640 +-------------------- src/backends/cl/backend.mk | 3 + src/backends/cl/test/CMakeLists.txt | 3 + src/backends/cl/test/ClEndToEndTests.cpp | 18 + src/backends/cl/test/ClJsonPrinterTests.cpp | 23 + src/backends/cl/test/ClOptimizedNetworkTests.cpp | 101 ++++ src/backends/neon/backend.mk | 3 + src/backends/neon/test/CMakeLists.txt | 3 + src/backends/neon/test/NeonEndToEndTests.cpp | 52 ++ src/backends/neon/test/NeonJsonPrinterTests.cpp | 22 + .../neon/test/NeonOptimizedNetworkTests.cpp | 70 +++ src/backends/reference/backend.mk | 3 + src/backends/reference/test/CMakeLists.txt | 3 + src/backends/reference/test/RefEndToEndTests.cpp | 251 ++++++++ .../reference/test/RefJsonPrinterTests.cpp | 22 + .../reference/test/RefOptimizedNetworkTests.cpp | 212 +++++++ src/backends/test/EndToEndTestImpl.hpp | 102 ++++ src/backends/test/JsonPrinterTestImpl.hpp | 354 ++++++++++++ src/backends/test/OptimizedNetworkTests.cpp | 329 +++++++++++ 23 files changed, 1587 insertions(+), 1392 deletions(-) delete mode 100644 src/armnn/test/JsonPrinterTests.cpp create mode 100644 src/backends/cl/test/ClEndToEndTests.cpp create mode 100644 src/backends/cl/test/ClJsonPrinterTests.cpp create mode 100644 src/backends/cl/test/ClOptimizedNetworkTests.cpp create mode 100644 src/backends/neon/test/NeonEndToEndTests.cpp create mode 100644 src/backends/neon/test/NeonJsonPrinterTests.cpp create mode 100644 src/backends/neon/test/NeonOptimizedNetworkTests.cpp create mode 100644 src/backends/reference/test/RefEndToEndTests.cpp create mode 100644 src/backends/reference/test/RefJsonPrinterTests.cpp create mode 100644 src/backends/reference/test/RefOptimizedNetworkTests.cpp create mode 100644 src/backends/test/EndToEndTestImpl.hpp create mode 100644 src/backends/test/JsonPrinterTestImpl.hpp create mode 100644 src/backends/test/OptimizedNetworkTests.cpp diff --git a/CMakeLists.txt b/CMakeLists.txt index 5cdc07da35..257a49d192 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -337,7 +337,6 @@ if(BUILD_UNIT_TESTS) src/armnn/test/UnitTests.hpp src/armnn/test/EndToEndTest.cpp src/armnn/test/UtilsTests.cpp - src/armnn/test/JsonPrinterTests.cpp src/armnn/test/GraphTests.cpp src/armnn/test/OptimizerTests.cpp src/armnn/test/ProfilerTests.cpp @@ -366,6 +365,8 @@ if(BUILD_UNIT_TESTS) src/backends/test/Conv2dTestImpl.hpp src/backends/test/ActivationTestImpl.hpp src/backends/test/ActivationFixture.hpp + src/backends/test/EndToEndTestImpl.hpp + src/backends/test/JsonPrinterTestImpl.hpp src/backends/test/Pooling2dTestImpl.hpp src/backends/test/ReshapeTestImpl.hpp src/backends/test/PermuteTestImpl.hpp @@ -373,6 +374,7 @@ if(BUILD_UNIT_TESTS) src/backends/test/SplitterTestImpl.hpp src/backends/test/NormTestImpl.hpp src/backends/test/BatchNormTestImpl.hpp + src/backends/test/OptimizedNetworkTests.cpp src/backends/test/WorkloadTestUtils.hpp src/backends/test/QuantizeHelper.hpp) diff --git a/src/armnn/test/EndToEndTest.cpp b/src/armnn/test/EndToEndTest.cpp index d34bf69548..4f202f174e 100644 --- a/src/armnn/test/EndToEndTest.cpp +++ b/src/armnn/test/EndToEndTest.cpp @@ -2,14 +2,15 @@ // Copyright © 2017 Arm Ltd. All rights reserved. // SPDX-License-Identifier: MIT // -#include #include #include #include #include + #include +#include #include @@ -17,12 +18,14 @@ BOOST_AUTO_TEST_SUITE(EndToEnd) namespace { + template bool IsFloatIterFunc(T iter) { boost::ignore_unused(iter); return IsFloatingPointIterator::value; } + } //namespace BOOST_AUTO_TEST_CASE(QuantizedHelper) @@ -44,381 +47,6 @@ BOOST_AUTO_TEST_CASE(QuantizedHelper) BOOST_TEST(IsFloatIterFunc(&ints[0]) == false); } -BOOST_AUTO_TEST_CASE(Unsigned8) -{ - using namespace armnn; - - // Create runtime in which test will run - armnn::IRuntime::CreationOptions options; - armnn::IRuntimePtr runtime(armnn::IRuntime::Create(options)); - - // Builds up the structure of the network. - armnn::INetworkPtr net(INetwork::Create()); - - IConnectableLayer* input = net->AddInputLayer(0, "input"); - IConnectableLayer* softmax = net->AddSoftmaxLayer(SoftmaxDescriptor(), "softmax"); - IConnectableLayer* output = net->AddOutputLayer(0, "output"); - - input->GetOutputSlot(0).Connect(softmax->GetInputSlot(0)); - softmax->GetOutputSlot(0).Connect(output->GetInputSlot(0)); - - // Sets the tensors in the network. - TensorInfo inputTensorInfo(TensorShape({1, 5}), DataType::QuantisedAsymm8); - inputTensorInfo.SetQuantizationOffset(100); - inputTensorInfo.SetQuantizationScale(10000.0f); - input->GetOutputSlot(0).SetTensorInfo(inputTensorInfo); - - TensorInfo outputTensorInfo(TensorShape({1, 5}), DataType::QuantisedAsymm8); - outputTensorInfo.SetQuantizationOffset(0); - outputTensorInfo.SetQuantizationScale(1.0f/255.0f); - softmax->GetOutputSlot(0).SetTensorInfo(outputTensorInfo); - - // optimize the network - std::vector backends = {armnn::Compute::CpuRef}; - IOptimizedNetworkPtr optNet = Optimize(*net, backends, runtime->GetDeviceSpec()); - - // Loads it into the runtime. - NetworkId netId; - auto error = runtime->LoadNetwork(netId, std::move(optNet)); - BOOST_TEST(error == Status::Success); - - // Creates structures for input & output. - std::vector inputData - { - 1, 10, 3, 200, 5 // Some inputs - one of which is sufficiently larger than the others to saturate softmax. - }; - std::vector outputData(5); - - armnn::InputTensors inputTensors - { - {0, armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 0), inputData.data())} - }; - armnn::OutputTensors outputTensors - { - {0, armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), outputData.data())} - }; - - // Does the inference. - runtime->EnqueueWorkload(netId, inputTensors, outputTensors); - - // Checks the results. - BOOST_TEST(outputData[0] == 0); - BOOST_TEST(outputData[1] == 0); - BOOST_TEST(outputData[2] == 0); - BOOST_TEST(outputData[3] == 255); // softmax has been saturated. - BOOST_TEST(outputData[4] == 0); -} - -template -void ConstantUsageTest(const std::vector& computeDevice, - const armnn::TensorInfo& commonTensorInfo, - const std::vector& inputData, - const std::vector& constantData, - const std::vector& expectedOutputData) -{ - using namespace armnn; - - // Create runtime in which test will run - armnn::IRuntime::CreationOptions options; - armnn::IRuntimePtr runtime(armnn::IRuntime::Create(options)); - - // Builds up the structure of the network. - INetworkPtr net(INetwork::Create()); - - IConnectableLayer* input = net->AddInputLayer(0); - IConnectableLayer* constant = net->AddConstantLayer(ConstTensor(commonTensorInfo, constantData)); - IConnectableLayer* add = net->AddAdditionLayer(); - IConnectableLayer* output = net->AddOutputLayer(0); - - input->GetOutputSlot(0).Connect(add->GetInputSlot(0)); - constant->GetOutputSlot(0).Connect(add->GetInputSlot(1)); - add->GetOutputSlot(0).Connect(output->GetInputSlot(0)); - - // Sets the tensors in the network. - input->GetOutputSlot(0).SetTensorInfo(commonTensorInfo); - constant->GetOutputSlot(0).SetTensorInfo(commonTensorInfo); - add->GetOutputSlot(0).SetTensorInfo(commonTensorInfo); - - // optimize the network - IOptimizedNetworkPtr optNet = Optimize(*net, computeDevice, runtime->GetDeviceSpec()); - - // Loads it into the runtime. - NetworkId netId; - runtime->LoadNetwork(netId, std::move(optNet)); - - // Creates structures for input & output. - std::vector outputData(inputData.size()); - - InputTensors inputTensors - { - {0, armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 0), inputData.data())} - }; - OutputTensors outputTensors - { - {0, armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), outputData.data())} - }; - - // Does the inference. - runtime->EnqueueWorkload(netId, inputTensors, outputTensors); - - // Checks the results. - BOOST_TEST(outputData == expectedOutputData); -} - -static void ConstantUsageFloat32Test(const std::vector& computeDevice) -{ - const armnn::TensorInfo commonTensorInfo({ 2, 3 }, armnn::DataType::Float32); - - ConstantUsageTest(computeDevice, - commonTensorInfo, - std::vector{ 1.f, 2.f, 3.f, 4.f, 5.f, 6.f }, // Input. - std::vector{ 6.f, 5.f, 4.f, 3.f, 2.f, 1.f }, // Const input. - std::vector{ 7.f, 7.f, 7.f, 7.f, 7.f, 7.f } // Expected output. - ); -} - -static void ConstantUsageUint8Test(const std::vector& computeDevice) -{ - armnn::TensorInfo commonTensorInfo({ 2, 3 }, armnn::DataType::QuantisedAsymm8); - - const float scale = 0.023529f; - const int8_t offset = -43; - - commonTensorInfo.SetQuantizationScale(scale); - commonTensorInfo.SetQuantizationOffset(offset); - - ConstantUsageTest(computeDevice, - commonTensorInfo, - QuantizedVector(scale, offset, { 1.f, 2.f, 3.f, 4.f, 5.f, 6.f }), // Input. - QuantizedVector(scale, offset, { 6.f, 5.f, 4.f, 3.f, 2.f, 1.f }), // Const input. - QuantizedVector(scale, offset, { 7.f, 7.f, 7.f, 7.f, 7.f, 7.f }) // Expected output. - ); -} - -BOOST_AUTO_TEST_CASE(ConstantUsage_Ref_Float32) -{ - std::vector backends = {armnn::Compute::CpuRef}; - ConstantUsageFloat32Test(backends); -} - -#if ARMCOMPUTENEON_ENABLED -BOOST_AUTO_TEST_CASE(ConstantUsage_Neon_Float32) -{ - ConstantUsageFloat32Test({armnn::Compute::CpuAcc}); -} -#endif - -#if ARMCOMPUTECL_ENABLED -BOOST_AUTO_TEST_CASE(ConstantUsage_Cl_Float32) -{ - ConstantUsageFloat32Test({armnn::Compute::GpuAcc}); -} -#endif - -BOOST_AUTO_TEST_CASE(ConstantUsage_Ref_Uint8) -{ - std::vector backends = {armnn::Compute::CpuRef}; - ConstantUsageUint8Test(backends); -} - -BOOST_AUTO_TEST_CASE(TrivialAdd) -{ - // This test was designed to match "AddTwo" in android nn/runtime/test/TestTrivialModel.cpp. - - using namespace armnn; - - // Create runtime in which test will run - armnn::IRuntime::CreationOptions options; - armnn::IRuntimePtr runtime(armnn::IRuntime::Create(options)); - - // Builds up the structure of the network. - armnn::INetworkPtr net(INetwork::Create()); - - IConnectableLayer* input1 = net->AddInputLayer(0); - IConnectableLayer* input2 = net->AddInputLayer(1); - IConnectableLayer* add = net->AddAdditionLayer(); - IConnectableLayer* output = net->AddOutputLayer(0); - - input1->GetOutputSlot(0).Connect(add->GetInputSlot(0)); - input2->GetOutputSlot(0).Connect(add->GetInputSlot(1)); - add->GetOutputSlot(0).Connect(output->GetInputSlot(0)); - - // Sets the tensors in the network. - TensorInfo tensorInfo(TensorShape({3, 4}), DataType::Float32); - input1->GetOutputSlot(0).SetTensorInfo(tensorInfo); - input2->GetOutputSlot(0).SetTensorInfo(tensorInfo); - add->GetOutputSlot(0).SetTensorInfo(tensorInfo); - - // optimize the network - std::vector backends = {armnn::Compute::CpuRef}; - IOptimizedNetworkPtr optNet = Optimize(*net, backends, runtime->GetDeviceSpec()); - - // Loads it into the runtime. - NetworkId netId; - runtime->LoadNetwork(netId, std::move(optNet)); - - // Creates structures for input & output - matching android nn test. - std::vector input1Data - { - 1.f, 2.f, 3.f, 4.f, 5.f, 6.f, 7.f, 8.f, 9.f, 10.f, 11.f, 12.f - }; - std::vector input2Data - { - 100.f, 200.f, 300.f, 400.f, 500.f, 600.f, 700.f, 800.f, 900.f, 1000.f, 1100.f, 1200.f - }; - std::vector outputData(12); - - InputTensors inputTensors - { - {0,armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 0), input1Data.data())}, - {1,armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 0), input2Data.data())} - }; - OutputTensors outputTensors - { - {0,armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), outputData.data())} - }; - - // Does the inference. - runtime->EnqueueWorkload(netId, inputTensors, outputTensors); - - // Checks the results - BOOST_TEST(outputData[0] == 101); - BOOST_TEST(outputData[1] == 202); - BOOST_TEST(outputData[2] == 303); - BOOST_TEST(outputData[3] == 404); - BOOST_TEST(outputData[4] == 505); - BOOST_TEST(outputData[5] == 606); - BOOST_TEST(outputData[6] == 707); - BOOST_TEST(outputData[7] == 808); - BOOST_TEST(outputData[8] == 909); - BOOST_TEST(outputData[9] == 1010); - BOOST_TEST(outputData[10] == 1111); - BOOST_TEST(outputData[11] == 1212); -} - -BOOST_AUTO_TEST_CASE(MultipleOutputs) -{ - using namespace armnn; - - // Create runtime in which test will run - armnn::IRuntime::CreationOptions options; - armnn::IRuntimePtr runtime(armnn::IRuntime::Create(options)); - - // Builds up the structure of the network. - INetworkPtr net(INetwork::Create()); - - IConnectableLayer* input = net->AddInputLayer(0); - - // ReLu1 - ActivationDescriptor activation1Descriptor; - activation1Descriptor.m_Function = ActivationFunction::BoundedReLu; - activation1Descriptor.m_A = 1.f; - activation1Descriptor.m_B = -1.f; - IConnectableLayer* activation1 = net->AddActivationLayer(activation1Descriptor); - - // ReLu6 - ActivationDescriptor activation2Descriptor; - activation2Descriptor.m_Function = ActivationFunction::BoundedReLu; - activation2Descriptor.m_A = 6.0f; - IConnectableLayer* activation2 = net->AddActivationLayer(activation2Descriptor); - - // BoundedReLu(min=2, max=5) - ActivationDescriptor activation3Descriptor; - activation3Descriptor.m_Function = ActivationFunction::BoundedReLu; - activation3Descriptor.m_A = 5.0f; - activation3Descriptor.m_B = 2.0f; - IConnectableLayer* activation3 = net->AddActivationLayer(activation3Descriptor); - - IConnectableLayer* output1 = net->AddOutputLayer(0); - IConnectableLayer* output2 = net->AddOutputLayer(1); - IConnectableLayer* output3 = net->AddOutputLayer(2); - - input->GetOutputSlot(0).Connect(activation1->GetInputSlot(0)); - input->GetOutputSlot(0).Connect(activation2->GetInputSlot(0)); - input->GetOutputSlot(0).Connect(activation3->GetInputSlot(0)); - - activation1->GetOutputSlot(0).Connect(output1->GetInputSlot(0)); - activation2->GetOutputSlot(0).Connect(output2->GetInputSlot(0)); - activation3->GetOutputSlot(0).Connect(output3->GetInputSlot(0)); - - // Sets the tensors in the network. - TensorInfo tensorInfo(TensorShape({ 10 }), DataType::Float32); - input->GetOutputSlot(0).SetTensorInfo(tensorInfo); - activation1->GetOutputSlot(0).SetTensorInfo(tensorInfo); - activation2->GetOutputSlot(0).SetTensorInfo(tensorInfo); - activation3->GetOutputSlot(0).SetTensorInfo(tensorInfo); - - // optimize the network - std::vector backends = {armnn::Compute::CpuRef}; - IOptimizedNetworkPtr optNet = Optimize(*net, backends, runtime->GetDeviceSpec()); - - // Loads it into the runtime. - NetworkId netId; - runtime->LoadNetwork(netId, std::move(optNet)); - - // Creates structures for input & output. - const std::vector inputData{ 3.f, 5.f, 2.f, 3.f, 7.f, 0.f, -2.f, -1.f, 3.f, 3.f }; - - std::vector output1Data(inputData.size()); - std::vector output2Data(inputData.size()); - std::vector output3Data(inputData.size()); - - InputTensors inputTensors - { - {0,armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 0), inputData.data())} - }; - OutputTensors outputTensors - { - {0,armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), output1Data.data())}, - {1,armnn::Tensor(runtime->GetOutputTensorInfo(netId, 1), output2Data.data())}, - {2,armnn::Tensor(runtime->GetOutputTensorInfo(netId, 2), output3Data.data())} - }; - - // Does the inference. - runtime->EnqueueWorkload(netId, inputTensors, outputTensors); - - // Checks the results. - BOOST_TEST(output1Data == std::vector({ 1.f, 1.f, 1.f, 1.f, 1.f, 0.f, -1.f, -1.f, 1.f, 1.f })); // ReLu1 - BOOST_TEST(output2Data == std::vector({ 3.f, 5.f, 2.f, 3.f, 6.f, 0.f, 0.f, 0.f, 3.f, 3.f })); // ReLu6 - BOOST_TEST(output3Data == std::vector({ 3.f, 5.f, 2.f, 3.f, 5.f, 2.f, 2.f, 2.f, 3.f, 3.f })); // [2, 5] -} - -#if ARMCOMPUTENEON_ENABLED -BOOST_AUTO_TEST_CASE(FallbackToCpuRef) -{ - using namespace armnn; - - // Create runtime in which test will run and allow fallback to CpuRef. - IRuntime::CreationOptions options; - IRuntimePtr runtime(IRuntime::Create(options)); - - // Builds up the structure of the network. - INetworkPtr net(INetwork::Create()); - - IConnectableLayer* input = net->AddInputLayer(0); - - // This layer configuration isn't supported by CpuAcc but we allow fallback to CpuRef so it shoud pass. - NormalizationDescriptor descriptor; - IConnectableLayer* pooling = net->AddNormalizationLayer(descriptor); - - IConnectableLayer* output = net->AddOutputLayer(0); - - input->GetOutputSlot(0).Connect(pooling->GetInputSlot(0)); - pooling->GetOutputSlot(0).Connect(output->GetInputSlot(0)); - - input->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 4, 4 }, DataType::Float32)); - pooling->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 4, 4 }, DataType::Float32)); - - // optimize the network - std::vector backends = {Compute::CpuAcc, Compute::CpuRef}; - IOptimizedNetworkPtr optNet = Optimize(*net, backends, runtime->GetDeviceSpec()); - - // Load it into the runtime. It should pass. - NetworkId netId; - BOOST_TEST(runtime->LoadNetwork(netId, std::move(optNet)) == Status::Success); -} -#endif // ARMCOMPUTENEON_ENABLED - BOOST_AUTO_TEST_CASE(ErrorOnLoadNetwork) { using namespace armnn; diff --git a/src/armnn/test/GraphUtils.hpp b/src/armnn/test/GraphUtils.hpp index 4d385de92f..3325405eaf 100644 --- a/src/armnn/test/GraphUtils.hpp +++ b/src/armnn/test/GraphUtils.hpp @@ -4,7 +4,8 @@ // #pragma once -#include "Graph.hpp" +#include + #include namespace diff --git a/src/armnn/test/JsonPrinterTests.cpp b/src/armnn/test/JsonPrinterTests.cpp deleted file mode 100644 index 93f32cc540..0000000000 --- a/src/armnn/test/JsonPrinterTests.cpp +++ /dev/null @@ -1,378 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include - -BOOST_FIXTURE_TEST_SUITE(JsonPrinterTests, ClProfilingContextControlFixture) - -bool AreMatchingPair(const char opening, const char closing) -{ - return (opening == '{' && closing == '}') || (opening == '[' && closing == ']'); -} - -bool AreParenthesesMatching(const std::string& exp) -{ - std::stack expStack; - for (size_t i = 0; i < exp.length(); ++i) - { - if (exp[i] == '{' || exp[i] == '[') - { - expStack.push(exp[i]); - } - else if (exp[i] == '}' || exp[i] == ']') - { - if (expStack.empty() || !AreMatchingPair(expStack.top(), exp[i])) - { - return false; - } - else - { - expStack.pop(); - } - } - } - return expStack.empty(); -} - -std::vector ExtractMeasurements(const std::string& exp) -{ - std::vector numbers; - bool inArray = false; - std::string numberString; - for (size_t i = 0; i < exp.size(); ++i) - { - if (exp[i] == '[') - { - inArray = true; - } - else if (exp[i] == ']' && inArray) - { - try - { - boost::trim_if(numberString, boost::is_any_of("\t,\n")); - numbers.push_back(std::stod(numberString)); - } - catch (std::invalid_argument const& e) - { - BOOST_FAIL("Could not convert measurements to double: " + numberString); - } - - numberString.clear(); - inArray = false; - } - else if (exp[i] == ',' && inArray) - { - try - { - boost::trim_if(numberString, boost::is_any_of("\t,\n")); - numbers.push_back(std::stod(numberString)); - } - catch (std::invalid_argument const& e) - { - BOOST_FAIL("Could not convert measurements to double: " + numberString); - } - numberString.clear(); - } - else if (exp[i] != '[' && inArray && exp[i] != ',' && exp[i] != ' ') - { - numberString += exp[i]; - } - } - return numbers; -} - -std::vector ExtractSections(const std::string& exp) -{ - std::vector sections; - - std::stack s; - for (size_t i = 0; i < exp.size(); i++) - { - if (exp.at(i) == '{') - { - s.push(i); - } - else if (exp.at(i) == '}') - { - size_t from = s.top(); - s.pop(); - sections.push_back(exp.substr(from, i - from + 1)); - } - } - - return sections; -} - -std::string SoftmaxProfilerTestSetupHelper(const std::vector& backends) -{ - using namespace armnn; - - BOOST_CHECK(!backends.empty()); - - ProfilerManager& profilerManager = armnn::ProfilerManager::GetInstance(); - - // Create runtime in which test will run - IRuntime::CreationOptions options; - options.m_EnableGpuProfiling = backends.front() == armnn::Compute::GpuAcc; - IRuntimePtr runtime(IRuntime::Create(options)); - - // build up the structure of the network - INetworkPtr net(INetwork::Create()); - - IConnectableLayer* input = net->AddInputLayer(0, "input"); - IConnectableLayer* softmax = net->AddSoftmaxLayer(SoftmaxDescriptor(), "softmax"); - IConnectableLayer* output = net->AddOutputLayer(0, "output"); - - input->GetOutputSlot(0).Connect(softmax->GetInputSlot(0)); - softmax->GetOutputSlot(0).Connect(output->GetInputSlot(0)); - - // set the tensors in the network - TensorInfo inputTensorInfo(TensorShape({1, 5}), DataType::QuantisedAsymm8); - inputTensorInfo.SetQuantizationOffset(100); - inputTensorInfo.SetQuantizationScale(10000.0f); - input->GetOutputSlot(0).SetTensorInfo(inputTensorInfo); - - TensorInfo outputTensorInfo(TensorShape({1, 5}), DataType::QuantisedAsymm8); - outputTensorInfo.SetQuantizationOffset(0); - outputTensorInfo.SetQuantizationScale(1.0f / 256.0f); - softmax->GetOutputSlot(0).SetTensorInfo(outputTensorInfo); - - // optimize the network - IOptimizedNetworkPtr optNet = Optimize(*net, backends, runtime->GetDeviceSpec()); - if(!optNet) - { - BOOST_FAIL("Error occurred during Optimization, Optimize() returned nullptr."); - } - // load it into the runtime - NetworkId netId; - auto error = runtime->LoadNetwork(netId, std::move(optNet)); - BOOST_TEST(error == Status::Success); - - // create structures for input & output - std::vector inputData - { - 1, 10, 3, 200, 5 - // one of inputs is sufficiently larger than the others to saturate softmax - }; - std::vector outputData(5); - - armnn::InputTensors inputTensors - { - {0, armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 0), inputData.data())} - }; - armnn::OutputTensors outputTensors - { - {0, armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), outputData.data())} - }; - - runtime->GetProfiler(netId)->EnableProfiling(true); - - // do the inferences - runtime->EnqueueWorkload(netId, inputTensors, outputTensors); - runtime->EnqueueWorkload(netId, inputTensors, outputTensors); - runtime->EnqueueWorkload(netId, inputTensors, outputTensors); - - // retrieve the Profiler.Print() output - std::stringstream ss; - profilerManager.GetProfiler()->Print(ss); - - return ss.str(); -} - -void SoftmaxProfilerTestValidationHelper(std::string& result, const std::string& testData) -{ - // ensure all measurements are greater than zero - std::vector measurementsVector = ExtractMeasurements(result); - BOOST_CHECK(!measurementsVector.empty()); - - // check sections contain raw and unit tags - // first ensure Parenthesis are balanced - if (AreParenthesesMatching(result)) - { - // remove parent sections that will not have raw or unit tag - std::vector sectionVector = ExtractSections(result); - for (size_t i = 0; i < sectionVector.size(); ++i) - { - if (boost::contains(sectionVector[i], "\"ArmNN\":") - || boost::contains(sectionVector[i], "\"inference_measurements\":")) - { - sectionVector.erase(sectionVector.begin() + static_cast(i)); - } - } - BOOST_CHECK(!sectionVector.empty()); - - BOOST_CHECK(std::all_of(sectionVector.begin(), sectionVector.end(), - [](std::string i) { return boost::contains(i, "\"raw\":"); })); - - BOOST_CHECK(std::all_of(sectionVector.begin(), sectionVector.end(), - [](std::string i) { return boost::contains(i, "\"unit\":"); })); - } - - // remove the time measurements as they vary from test to test - result.erase(std::remove_if (result.begin(),result.end(), - [](char c) { return c == '.'; }), result.end()); - result.erase(std::remove_if (result.begin(), result.end(), &isdigit), result.end()); - result.erase(std::remove_if (result.begin(),result.end(), - [](char c) { return c == '\t'; }), result.end()); - - BOOST_CHECK(boost::contains(result, "ArmNN")); - BOOST_CHECK(boost::contains(result, "inference_measurements")); - BOOST_CHECK(boost::contains(result, "layer_measurements")); - BOOST_CHECK_EQUAL(result, testData); - - // ensure no spare parenthesis present in print output - BOOST_CHECK(AreParenthesesMatching(result)); -} - -void SetupSoftmaxProfilerWithSpecifiedBackendsAndValidateJSONPrinterResult( - const std::vector& backends) -{ - // setup the test fixture and obtain JSON Printer result - std::string result = SoftmaxProfilerTestSetupHelper(backends); - - std::string backend = "Ref"; - std::string changeLine31 = "\n},\n\"CopyMemGeneric_Execute\": {"; - std::string changeLine39 = "us\""; - std::string changeLine40; - std::string changeLine45; - - if (backends[0] == armnn::Compute::GpuAcc) { - backend = "Cl"; - changeLine31 = ",\n\"OpenClKernelTimer/: softmax_layer_max_shift_exp_sum_quantized_serial GWS[,,]\": {"; - changeLine39 = R"(us" -}, -"OpenClKernelTimer/: softmax_layer_norm_quantized GWS[,,]": { -"raw": [ -, -, - -], -"unit": "us")"; - - changeLine40 = R"( -}, -"CopyMemGeneric_Execute": { -"raw": [ -, -, - -], -"unit": "us")"; - changeLine45 = "}\n"; - } - else if (backends[0] == armnn::Compute::CpuAcc) - { - backend = "Neon"; - changeLine31 = ",\n\"NeonKernelTimer/: NEFillBorderKernel\": {"; - changeLine39 = R"(us" -}, -"NeonKernelTimer/: NELogitsDMaxKernel": { -"raw": [ -, -, - -], -"unit": "us" -}, -"NeonKernelTimer/: NELogitsDSoftmaxKernel": { -"raw": [ -, -, - -], -"unit": "us")"; - changeLine40 = R"( -}, -"CopyMemGeneric_Execute": { -"raw": [ -, -, - -], -"unit": "us")"; - changeLine45 = "}\n"; - } - - std::string testData = R"({ -"ArmNN": { -"inference_measurements": { -"raw": [ -, -, - -], -"unit": "us", -"layer_measurements": { -"raw": [ -, -, - -], -"unit": "us", -"CopyMemGeneric_Execute": { -"raw": [ -, -, - -], -"unit": "us" -}, -")" + backend + R"(SoftmaxUintWorkload_Execute": { -"raw": [ -, -, - -], -"unit": "us")" + changeLine31 + R"( -"raw": [ -, -, - -], -"unit": ")" + changeLine39 + R"( -})" + changeLine40 + R"( -} -} -} -} -)" + changeLine45 + R"()"; - - // validate the JSON Printer result - SoftmaxProfilerTestValidationHelper(result, testData); -} - -BOOST_AUTO_TEST_CASE(SoftmaxProfilerJSONPrinterCpuRefTest) -{ - SetupSoftmaxProfilerWithSpecifiedBackendsAndValidateJSONPrinterResult({armnn::Compute::CpuRef}); -} - - -#if ARMCOMPUTENEON_ENABLED -BOOST_AUTO_TEST_CASE(SoftmaxProfilerJSONPrinterCpuAccTest) -{ - SetupSoftmaxProfilerWithSpecifiedBackendsAndValidateJSONPrinterResult({armnn::Compute::CpuAcc}); -} -#endif - -#if ARMCOMPUTECL_ENABLED -BOOST_AUTO_TEST_CASE(SoftmaxProfilerJSONPrinterGpuAccTest) -{ - SetupSoftmaxProfilerWithSpecifiedBackendsAndValidateJSONPrinterResult({armnn::Compute::GpuAcc}); -} -#endif - -BOOST_AUTO_TEST_SUITE_END() diff --git a/src/armnn/test/NetworkTests.cpp b/src/armnn/test/NetworkTests.cpp index 4f8dd7ea7b..91ff7fa983 100644 --- a/src/armnn/test/NetworkTests.cpp +++ b/src/armnn/test/NetworkTests.cpp @@ -2,16 +2,13 @@ // Copyright © 2017 Arm Ltd. All rights reserved. // SPDX-License-Identifier: MIT // -#include + +#include "GraphUtils.hpp" #include -#include -#include -#include -#include -#include +#include -#include "GraphUtils.hpp" +#include namespace { @@ -43,54 +40,6 @@ BOOST_AUTO_TEST_CASE(LayerGuids) BOOST_TEST(inputId != outputId); } -BOOST_AUTO_TEST_CASE(SerializeToDot) -{ - armnn::Network net; - - //Defines layers. - auto input = net.AddInputLayer(0); - auto add = net.AddAdditionLayer(); - auto output = net.AddOutputLayer(0); - - // Connects layers. - input->GetOutputSlot(0).Connect(add->GetInputSlot(0)); - input->GetOutputSlot(0).Connect(add->GetInputSlot(1)); - add->GetOutputSlot(0).Connect(output->GetInputSlot(0)); - - armnn::TensorShape shape({4}); - armnn::TensorInfo info(shape, armnn::DataType::Float32); - input->GetOutputSlot(0).SetTensorInfo(info); - add->GetOutputSlot(0).SetTensorInfo(info); - - armnn::IRuntime::CreationOptions options; - armnn::IRuntimePtr runtime(armnn::IRuntime::Create(options)); - - std::vector backends = {armnn::Compute::CpuRef}; - armnn::IOptimizedNetworkPtr optimizedNet = armnn::Optimize(net, backends, runtime->GetDeviceSpec()); - - std::ostringstream ss; - optimizedNet->SerializeToDot(ss); - - auto inputId = input->GetGuid(); - auto addId = add->GetGuid(); - auto outputId = output->GetGuid(); - - std::stringstream expected; - expected << - "digraph Optimized {\n" - " node [shape=\"record\"];\n" - " edge [fontsize=8 fontcolor=\"blue\" fontname=\"arial-bold\"];\n" - " " << inputId << " [label=\"{Input}\"];\n" - " " << addId << " [label=\"{Addition}\"];\n" - " " << outputId << " [label=\"{Output}\"];\n" - " " << inputId << " -> " << addId << " [label=< [4] >];\n" - " " << inputId << " -> " << addId << " [label=< [4] >];\n" - " " << addId << " -> " << outputId << " [label=< [4] >];\n" - "}\n"; - - BOOST_TEST(ss.str() == expected.str()); -} - BOOST_AUTO_TEST_CASE(NetworkBasic) { armnn::Network net; @@ -417,585 +366,4 @@ BOOST_AUTO_TEST_CASE(NetworkModification_SplitterMultiplication) prevLayer->GetOutputSlot(0).Connect(layer->GetInputSlot(0)); } -BOOST_AUTO_TEST_CASE(OptimizeValidateCpuRefWorkloads) -{ - const armnn::TensorInfo desc({3, 5}, armnn::DataType::Float32); - - armnn::Network net; - - armnn::NormalizationDescriptor nmDesc; - armnn::ActivationDescriptor acDesc; - - // in - // | - // nm - // / | - // ac | - // \ | - // ml - // | - // sm - // | - // ot - armnn::IConnectableLayer* layer = net.AddInputLayer(0, "in"); - layer->GetOutputSlot(0).SetTensorInfo(desc); - - armnn::IConnectableLayer* const normLayer = net.AddNormalizationLayer(nmDesc, "nm"); - - layer->GetOutputSlot(0).Connect(normLayer->GetInputSlot(0)); - normLayer->GetOutputSlot(0).SetTensorInfo(desc); - - layer = net.AddActivationLayer(acDesc, "ac"); - - normLayer->GetOutputSlot(0).Connect(layer->GetInputSlot(0)); - layer->GetOutputSlot(0).SetTensorInfo(desc); - - armnn::IConnectableLayer* prevLayer = layer; - layer = net.AddMultiplicationLayer("ml"); - - prevLayer->GetOutputSlot(0).Connect(layer->GetInputSlot(0)); - normLayer->GetOutputSlot(0).Connect(layer->GetInputSlot(1)); - layer->GetOutputSlot(0).SetTensorInfo(desc); - - prevLayer = layer; - armnn::SoftmaxDescriptor softmaxDescriptor; - layer = net.AddSoftmaxLayer(softmaxDescriptor, "sm"); - - prevLayer->GetOutputSlot(0).Connect(layer->GetInputSlot(0)); - layer->GetOutputSlot(0).SetTensorInfo(desc); - - prevLayer = layer; - layer = net.AddOutputLayer(0, "ot"); - - prevLayer->GetOutputSlot(0).Connect(layer->GetInputSlot(0)); - - armnn::IRuntime::CreationOptions options; - armnn::IRuntimePtr runtime(armnn::IRuntime::Create(options)); - - std::vector backends = { armnn::Compute::CpuRef }; - armnn::IOptimizedNetworkPtr optNet = armnn::Optimize(net, backends, runtime->GetDeviceSpec()); - static_cast(optNet.get())->GetGraph().AllocateDynamicBuffers(); - BOOST_CHECK(optNet); - - // Validates workloads. - armnn::RefWorkloadFactory fact; - for (auto&& layer : static_cast(optNet.get())->GetGraph()) - { - BOOST_CHECK_NO_THROW( - layer->CreateWorkload(static_cast(optNet.get())->GetGraph(), fact)); - } -} - -#if ARMCOMPUTENEON_ENABLED -BOOST_AUTO_TEST_CASE(OptimizeValidateCpuAccDeviceSupportLayerNoFallback) -{ - // build up the structure of the network - armnn::INetworkPtr net(armnn::INetwork::Create()); - - armnn::IConnectableLayer* input = net->AddInputLayer(0); - - armnn::IConnectableLayer* output = net->AddOutputLayer(0); - - input->GetOutputSlot(0).Connect(output->GetInputSlot(0)); - - input->GetOutputSlot(0).SetTensorInfo(armnn::TensorInfo({ 1, 1, 4, 4 }, armnn::DataType::Float32)); - - armnn::IRuntime::CreationOptions options; - armnn::IRuntimePtr runtime(armnn::IRuntime::Create(options)); - - std::vector backends = { armnn::Compute::CpuAcc }; - armnn::IOptimizedNetworkPtr optNet = armnn::Optimize(*net, backends, runtime->GetDeviceSpec()); - BOOST_CHECK(optNet); - // validate workloads - armnn::NeonWorkloadFactory fact; - for (auto&& layer : static_cast(optNet.get())->GetGraph()) - { - BOOST_CHECK(layer->GetBackendId() == armnn::Compute::CpuAcc); - BOOST_CHECK_NO_THROW( - layer->CreateWorkload(static_cast(optNet.get())->GetGraph(), fact)); - } -} -#endif // ARMCOMPUTENEON_ENABLED - -#if ARMCOMPUTECL_ENABLED -BOOST_AUTO_TEST_CASE(OptimizeValidateGpuDeviceSupportLayerNoFallback) -{ - // build up the structure of the network - armnn::INetworkPtr net(armnn::INetwork::Create()); - - armnn::IConnectableLayer* input = net->AddInputLayer(0); - - armnn::IConnectableLayer* output = net->AddOutputLayer(0); - - input->GetOutputSlot(0).Connect(output->GetInputSlot(0)); - - input->GetOutputSlot(0).SetTensorInfo(armnn::TensorInfo({ 1, 1, 4, 4 }, armnn::DataType::Float32)); - - armnn::IRuntime::CreationOptions options; - armnn::IRuntimePtr runtime(armnn::IRuntime::Create(options)); - - std::vector backends = { armnn::Compute::GpuAcc }; - armnn::IOptimizedNetworkPtr optNet = armnn::Optimize(*net, backends, runtime->GetDeviceSpec()); - BOOST_CHECK(optNet); - // validate workloads - armnn::ClWorkloadFactory fact; - for (auto&& layer : static_cast(optNet.get())->GetGraph()) - { - BOOST_CHECK(layer->GetBackendId() == armnn::Compute::GpuAcc); - BOOST_CHECK_NO_THROW( - layer->CreateWorkload(static_cast(optNet.get())->GetGraph(), fact)); - } -} -#endif // ARMCOMPUTECL_ENABLED - -BOOST_AUTO_TEST_CASE(OptimizeValidateDeviceNonSupportLayerNoFallback) -{ - // build up the structure of the network - armnn::INetworkPtr net(armnn::INetwork::Create()); - - armnn::IConnectableLayer* input = net->AddInputLayer(0); - - // This layer configuration isn't supported by CpuAcc and isn't allowed to fall back, so Optimize will return null. - armnn::NormalizationDescriptor descriptor; - armnn::IConnectableLayer* normalize = net->AddNormalizationLayer(descriptor); - - armnn::IConnectableLayer* output = net->AddOutputLayer(0); - - input->GetOutputSlot(0).Connect(normalize->GetInputSlot(0)); - normalize->GetOutputSlot(0).Connect(output->GetInputSlot(0)); - - input->GetOutputSlot(0).SetTensorInfo(armnn::TensorInfo({ 1, 1, 4, 4 }, armnn::DataType::Float32)); - normalize->GetOutputSlot(0).SetTensorInfo(armnn::TensorInfo({ 1, 1, 4, 4 }, armnn::DataType::Float32)); - - armnn::IRuntime::CreationOptions options; - armnn::IRuntimePtr runtime(armnn::IRuntime::Create(options)); - - std::vector backends = { armnn::Compute::CpuAcc }; - armnn::IOptimizedNetworkPtr optNet = armnn::Optimize(*net, backends, runtime->GetDeviceSpec()); - BOOST_CHECK(!optNet); -} - -BOOST_AUTO_TEST_CASE(OptimizeValidateDeviceNonSupportLayerWithFallback) -{ - // build up the structure of the network - armnn::INetworkPtr net(armnn::INetwork::Create()); - - armnn::IConnectableLayer* input = net->AddInputLayer(0); - - // This layer configuration isn't supported by CpuAcc but it allows to fallback to CpuRef. - armnn::NormalizationDescriptor descriptor; - armnn::IConnectableLayer* normalize = net->AddNormalizationLayer(descriptor); - - armnn::IConnectableLayer* output = net->AddOutputLayer(0); - - input->GetOutputSlot(0).Connect(normalize->GetInputSlot(0)); - normalize->GetOutputSlot(0).Connect(output->GetInputSlot(0)); - - input->GetOutputSlot(0).SetTensorInfo(armnn::TensorInfo({ 1, 1, 4, 4 }, armnn::DataType::Float32)); - normalize->GetOutputSlot(0).SetTensorInfo(armnn::TensorInfo({ 1, 1, 4, 4 }, armnn::DataType::Float32)); - - armnn::IRuntime::CreationOptions options; - armnn::IRuntimePtr runtime(armnn::IRuntime::Create(options)); - - std::vector backends = { armnn::Compute::CpuAcc, armnn::Compute::CpuRef }; - armnn::IOptimizedNetworkPtr optNet = armnn::Optimize(*net, backends, runtime->GetDeviceSpec()); - BOOST_REQUIRE(optNet); - - for (auto&& layer : static_cast(optNet.get())->GetGraph()) - { - // If NEON is enabled, Input and Output layers are supported by CpuAcc, - // the other layers are supported by CpuRef. - // If NEON is not enabled, all layers are supported by CpuRef. -#if ARMCOMPUTENEON_ENABLED - if (layer->GetType() == armnn::LayerType::Input || layer->GetType() == armnn::LayerType::Output) - { - BOOST_CHECK(layer->GetBackendId() == armnn::Compute::CpuAcc); - } - else if (layer->GetType() == armnn::LayerType::Normalization) - { - BOOST_CHECK(layer->GetBackendId() == armnn::Compute::CpuRef); - } -#else - BOOST_CHECK(layer->GetBackendId() == armnn::Compute::CpuRef); -#endif - } -} - -BOOST_AUTO_TEST_CASE(OptimizeValidateWorkloadsUndefinedComputeDevice) -{ - const armnn::TensorInfo desc({3, 5}, armnn::DataType::Float32); - - armnn::Network net; - - armnn::NormalizationDescriptor nmDesc; - armnn::ActivationDescriptor acDesc; - - // in - // | - // nm - // / | - // ac | - // \ | - // ml - // | - // sm - // | - // ot - armnn::IConnectableLayer* layer = net.AddInputLayer(0, "in"); - layer->GetOutputSlot(0).SetTensorInfo(desc); - - armnn::IConnectableLayer* const normLayer = net.AddNormalizationLayer(nmDesc, "nm"); - - layer->GetOutputSlot(0).Connect(normLayer->GetInputSlot(0)); - normLayer->GetOutputSlot(0).SetTensorInfo(desc); - - layer = net.AddActivationLayer(acDesc, "ac"); - - normLayer->GetOutputSlot(0).Connect(layer->GetInputSlot(0)); - layer->GetOutputSlot(0).SetTensorInfo(desc); - - armnn::IConnectableLayer* prevLayer = layer; - layer = net.AddMultiplicationLayer("ml"); - - prevLayer->GetOutputSlot(0).Connect(layer->GetInputSlot(0)); - normLayer->GetOutputSlot(0).Connect(layer->GetInputSlot(1)); - layer->GetOutputSlot(0).SetTensorInfo(desc); - - prevLayer = layer; - armnn::SoftmaxDescriptor softmaxDescriptor; - layer = net.AddSoftmaxLayer(softmaxDescriptor, "sm"); - - prevLayer->GetOutputSlot(0).Connect(layer->GetInputSlot(0)); - layer->GetOutputSlot(0).SetTensorInfo(desc); - - prevLayer = layer; - layer = net.AddOutputLayer(0, "ot"); - - prevLayer->GetOutputSlot(0).Connect(layer->GetInputSlot(0)); - - armnn::IRuntime::CreationOptions options; - armnn::IRuntimePtr runtime(armnn::IRuntime::Create(options)); - - std::vector backends = { armnn::Compute::Undefined }; - - armnn::IOptimizedNetworkPtr optNet = armnn::Optimize(net, backends, runtime->GetDeviceSpec()); - BOOST_CHECK(!optNet); - -} - -BOOST_AUTO_TEST_CASE(OptimizeValidateWorkloadsUndefinedComputeDeviceWithFallback) -{ - const armnn::TensorInfo desc({3, 5}, armnn::DataType::Float32); - - armnn::Network net; - - armnn::NormalizationDescriptor nmDesc; - armnn::ActivationDescriptor acDesc; - - // in - // | - // nm - // / | - // ac | - // \ | - // ml - // | - // sm - // | - // ot - armnn::IConnectableLayer* layer = net.AddInputLayer(0, "in"); - layer->GetOutputSlot(0).SetTensorInfo(desc); - - armnn::IConnectableLayer* const normLayer = net.AddNormalizationLayer(nmDesc, "nm"); - - layer->GetOutputSlot(0).Connect(normLayer->GetInputSlot(0)); - normLayer->GetOutputSlot(0).SetTensorInfo(desc); - - layer = net.AddActivationLayer(acDesc, "ac"); - - normLayer->GetOutputSlot(0).Connect(layer->GetInputSlot(0)); - layer->GetOutputSlot(0).SetTensorInfo(desc); - - armnn::IConnectableLayer* prevLayer = layer; - layer = net.AddMultiplicationLayer("ml"); - - prevLayer->GetOutputSlot(0).Connect(layer->GetInputSlot(0)); - normLayer->GetOutputSlot(0).Connect(layer->GetInputSlot(1)); - layer->GetOutputSlot(0).SetTensorInfo(desc); - - prevLayer = layer; - armnn::SoftmaxDescriptor softmaxDescriptor; - layer = net.AddSoftmaxLayer(softmaxDescriptor, "sm"); - - prevLayer->GetOutputSlot(0).Connect(layer->GetInputSlot(0)); - layer->GetOutputSlot(0).SetTensorInfo(desc); - - prevLayer = layer; - layer = net.AddOutputLayer(0, "ot"); - - prevLayer->GetOutputSlot(0).Connect(layer->GetInputSlot(0)); - - armnn::IRuntime::CreationOptions options; - armnn::IRuntimePtr runtime(armnn::IRuntime::Create(options)); - - std::vector backends = { armnn::Compute::Undefined, armnn::Compute::CpuRef }; - - armnn::IOptimizedNetworkPtr optNet = armnn::Optimize(net, backends, runtime->GetDeviceSpec()); - BOOST_CHECK(optNet); - - // validate workloads - armnn::RefWorkloadFactory fact; - for (auto&& layer : static_cast(optNet.get())->GetGraph()) - { - BOOST_CHECK(layer->GetBackendId() == armnn::Compute::CpuRef); - BOOST_CHECK_NO_THROW( - layer->CreateWorkload(static_cast(optNet.get())->GetGraph(), fact)); - } -} -BOOST_AUTO_TEST_CASE(OptimizeValidateWorkloadsDuplicateComputeDeviceWithFallback) -{ - // build up the structure of the network - armnn::INetworkPtr net(armnn::INetwork::Create()); - - armnn::IConnectableLayer* input = net->AddInputLayer(0); - - // This layer configuration isn't supported by CpuAcc but it allows to fallback to CpuRef. - armnn::NormalizationDescriptor descriptor; - armnn::IConnectableLayer* normalize = net->AddNormalizationLayer(descriptor); - - armnn::IConnectableLayer* output = net->AddOutputLayer(0); - - input->GetOutputSlot(0).Connect(normalize->GetInputSlot(0)); - normalize->GetOutputSlot(0).Connect(output->GetInputSlot(0)); - - input->GetOutputSlot(0).SetTensorInfo(armnn::TensorInfo({ 1, 1, 4, 4 }, armnn::DataType::Float32)); - normalize->GetOutputSlot(0).SetTensorInfo(armnn::TensorInfo({ 1, 1, 4, 4 }, armnn::DataType::Float32)); - - armnn::IRuntime::CreationOptions options; - armnn::IRuntimePtr runtime(armnn::IRuntime::Create(options)); - - std::vector backends = { armnn::Compute::CpuAcc, - armnn::Compute::GpuAcc, - armnn::Compute::CpuRef }; - - armnn::IOptimizedNetworkPtr optNet = armnn::Optimize(*net, backends, runtime->GetDeviceSpec()); - BOOST_REQUIRE(optNet); - - for (auto&& layer : static_cast(optNet.get())->GetGraph()) - { - // If NEON is enabled, Input and Output layers are supported by CpuAcc, - // the other layers are supported by CpuRef. - // If only CL is enabled, Input and Output layers are supported by GpuAcc, - // the other layers are supported by CpuRef. - // If neither NEON, nor CL is enabled, all layers are supported by CpuRef. -#if ARMCOMPUTENEON_ENABLED - if (layer->GetType() == armnn::LayerType::Input || layer->GetType() == armnn::LayerType::Output) - { - BOOST_CHECK(layer->GetBackendId() == armnn::Compute::CpuAcc); - } - else if (layer->GetType() == armnn::LayerType::Normalization) - { - BOOST_CHECK(layer->GetBackendId() == armnn::Compute::CpuRef); - } -#elif ARMCOMPUTECL_ENABLED - if (layer->GetType() == armnn::LayerType::Input || layer->GetType() == armnn::LayerType::Output) - { - BOOST_CHECK(layer->GetBackendId() == armnn::Compute::GpuAcc); - } - else if (layer->GetType() == armnn::LayerType::Normalization) - { - BOOST_CHECK(layer->GetBackendId() == armnn::Compute::CpuRef); - } -#else - BOOST_CHECK(layer->GetBackendId() == armnn::Compute::CpuRef); -#endif - } -} - -BOOST_AUTO_TEST_CASE(OptimizeValidateWorkloadsCpuRefPermuteLayer) -{ - // Create runtime in which test will run - armnn::IRuntime::CreationOptions options; - armnn::IRuntimePtr runtime(armnn::IRuntime::Create(options)); - - std::vector backends = {armnn::Compute::CpuRef}; - - // build up the structure of the network - armnn::INetworkPtr net(armnn::INetwork::Create()); - - armnn::IConnectableLayer* input = net->AddInputLayer(0); - - armnn::PermuteDescriptor descriptor({0, 2, 3, 1}); - armnn::IConnectableLayer* permute = net->AddPermuteLayer(descriptor); - - armnn::IConnectableLayer* output = net->AddOutputLayer(0); - - input->GetOutputSlot(0).Connect(permute->GetInputSlot(0)); - permute->GetOutputSlot(0).Connect(output->GetInputSlot(0)); - - input->GetOutputSlot(0).SetTensorInfo(armnn::TensorInfo({ 1, 1, 4, 4 }, armnn::DataType::Float32)); - permute->GetOutputSlot(0).SetTensorInfo(armnn::TensorInfo({ 1, 4, 1, 4 }, armnn::DataType::Float32)); - - // optimize the network - armnn::IOptimizedNetworkPtr optNet = armnn::Optimize(*net, backends, runtime->GetDeviceSpec()); - - for (auto&& layer : static_cast(optNet.get())->GetGraph()) - { - BOOST_CHECK(layer->GetBackendId() == armnn::Compute::CpuRef); - } -} - -BOOST_AUTO_TEST_CASE(OptimizeValidateWorkloadsCpuRefMeanLayer) -{ - // Create runtime in which test will run - armnn::IRuntime::CreationOptions options; - armnn::IRuntimePtr runtime(armnn::IRuntime::Create(options)); - - std::vector backends = {armnn::Compute::CpuRef}; - - // build up the structure of the network - armnn::INetworkPtr net(armnn::INetwork::Create()); - - armnn::IConnectableLayer* input = net->AddInputLayer(0); - - armnn::MeanDescriptor descriptor({ 0, 1 }, false); - armnn::IConnectableLayer* meanLayer = net->AddMeanLayer(descriptor); - - armnn::IConnectableLayer* output = net->AddOutputLayer(0); - - input->GetOutputSlot(0).Connect(meanLayer->GetInputSlot(0)); - meanLayer->GetOutputSlot(0).Connect(output->GetInputSlot(0)); - - input->GetOutputSlot(0).SetTensorInfo(armnn::TensorInfo({ 4, 3, 2 }, armnn::DataType::Float32)); - meanLayer->GetOutputSlot(0).SetTensorInfo(armnn::TensorInfo({ 2 }, armnn::DataType::Float32)); - - // optimize the network - armnn::IOptimizedNetworkPtr optNet = armnn::Optimize(*net, backends, runtime->GetDeviceSpec()); - - for (auto&& layer : static_cast(optNet.get())->GetGraph()) - { - BOOST_CHECK(layer->GetBackendId() == armnn::Compute::CpuRef); - } -} - -BOOST_AUTO_TEST_CASE(FP16TurboModeTestOnCpuRef) -{ - // Test to check when FP16 Turbo mode set - // it converts the FP32 network to FP16 Network - // add FP32ToFP16 conversion layer after the InputLayer - // add FP16ToFP32 conversion layer after the OutputLayer - // checks the other layers if they are supported in FP16 - // if they are not put the conversion layers before and after - // if they are not supported in FP16 use FP32 instead - // if there are inverse conversion layers remove them with optimization - // at the moment FloorLayer is not supported in FP16 so it rolls back to FP32 - // and inverse conversion layers are removed by the optimizer - armnn::Network net; - - // Defines layers. - auto input = net.AddInputLayer(0); - auto floor = net.AddFloorLayer(); - auto output = net.AddOutputLayer(0); - - // Connects layers. - input->GetOutputSlot(0).Connect(floor->GetInputSlot(0)); - floor->GetOutputSlot(0).Connect(output->GetInputSlot(0)); - - armnn::TensorShape shape({4}); - armnn::TensorInfo info(shape, armnn::DataType::Float32); - input->GetOutputSlot(0).SetTensorInfo(info); - floor->GetOutputSlot(0).SetTensorInfo(info); - - armnn::IRuntime::CreationOptions options; - armnn::IRuntimePtr runtime(armnn::IRuntime::Create(options)); - - std::vector backends = {armnn::Compute::CpuRef}; - - armnn::OptimizerOptions optimizerOptions; - optimizerOptions.m_ReduceFp32ToFp16 = true; - - armnn::IOptimizedNetworkPtr optimizedNet = armnn::Optimize(net, backends, runtime->GetDeviceSpec(), - optimizerOptions); - - std::ostringstream ss; - optimizedNet->SerializeToDot(ss); - - auto inputId = input->GetGuid(); - auto floorId = floor->GetGuid(); - auto outputId = output->GetGuid(); - - std::stringstream expected; - expected << - "digraph Optimized {\n" - " node [shape=\"record\"];\n" - " edge [fontsize=8 fontcolor=\"blue\" fontname=\"arial-bold\"];\n" - " " << inputId << " [label=\"{Input}\"];\n" - " " << floorId << " [label=\"{Floor}\"];\n" - " " << outputId << " [label=\"{Output}\"];\n" - " " << inputId << " -> " << floorId << " [label=< [4] >];\n" - " " << floorId << " -> " << outputId << " [label=< [4] >];\n" - "}\n"; - - BOOST_TEST(ss.str() == expected.str()); -} - -#if ARMCOMPUTECL_ENABLED -BOOST_AUTO_TEST_CASE(FP16TurboModeTestOnGpuAcc) -{ - // Test to check when Fp16 Turbo mode set - // it converts the Fp32 network to Fp16 Network - // add Fp32ToFp16 conversion layer after the InputLayer - // add Fp16ToFp32 conversion layer after the OutputLayer - // checks the other layers if they are supported in Fp16 - // if they are not put the conversion layers before and after - // if they are not supported in Fp16 use Fp32 instead - // if there are inverse conversion layers remove them with optimization - // at the moment FloorLayer is not supported in Fp16 so it rolls back to Fp32 - // and inverse conversion layers are removed by the optimizer - armnn::Network net; - - // Defines layers. - auto input = net.AddInputLayer(0, "input layer"); - // ReLu1 - armnn::ActivationDescriptor activation1Descriptor; - activation1Descriptor.m_Function = armnn::ActivationFunction::BoundedReLu; - activation1Descriptor.m_A = 1.f; - activation1Descriptor.m_B = -1.f; - auto activation = net.AddActivationLayer(activation1Descriptor, "activation layer"); - auto output = net.AddOutputLayer(0, "output layer"); - - // Connects layers. - input->GetOutputSlot(0).Connect(activation->GetInputSlot(0)); - activation->GetOutputSlot(0).Connect(output->GetInputSlot(0)); - - armnn::TensorShape shape({4}); - armnn::TensorInfo info(shape, armnn::DataType::Float32); - input->GetOutputSlot(0).SetTensorInfo(info); - activation->GetOutputSlot(0).SetTensorInfo(info); - - armnn::IRuntime::CreationOptions options; - armnn::IRuntimePtr runtime(armnn::IRuntime::Create(options)); - - std::vector backends = {armnn::Compute::GpuAcc}; - - armnn::OptimizerOptions optimizerOptions; - optimizerOptions.m_ReduceFp32ToFp16 = true; - - armnn::IOptimizedNetworkPtr optimizedNet = armnn::Optimize( - net, backends, runtime->GetDeviceSpec(), optimizerOptions); - - const armnn::Graph& graph = static_cast(optimizedNet.get())->GetGraph(); - - // Tests that all layers are present in the graph. - BOOST_TEST(graph.GetNumLayers() == 5); - - // Tests that the vertices exist and have correct names. - BOOST_TEST(GraphHasNamedLayer(graph, "input layer")); - BOOST_TEST(GraphHasNamedLayer(graph, "convert_fp32_to_fp16-0-input layer")); - BOOST_TEST(GraphHasNamedLayer(graph, "activation layer")); - BOOST_TEST(GraphHasNamedLayer(graph, "convert_fp16_to_fp32-0-output layer")); - BOOST_TEST(GraphHasNamedLayer(graph, "output layer")); -} -#endif - BOOST_AUTO_TEST_SUITE_END() diff --git a/src/backends/cl/backend.mk b/src/backends/cl/backend.mk index 1f89f3b0a4..996db3fbfd 100644 --- a/src/backends/cl/backend.mk +++ b/src/backends/cl/backend.mk @@ -44,9 +44,12 @@ BACKEND_SOURCES := \ BACKEND_TEST_SOURCES := \ test/ClCreateWorkloadTests.cpp \ + test/ClEndToEndTests.cpp \ + test/ClJsonPrinterTests.cpp \ test/ClLayerSupportTests.cpp \ test/ClLayerTests.cpp \ test/ClMemCopyTests.cpp \ + test/ClOptimizedNetworkTests.cpp \ test/ClRuntimeTests.cpp \ test/Fp16SupportTest.cpp \ test/OpenClTimerTest.cpp diff --git a/src/backends/cl/test/CMakeLists.txt b/src/backends/cl/test/CMakeLists.txt index 69aa08d42b..c017377768 100644 --- a/src/backends/cl/test/CMakeLists.txt +++ b/src/backends/cl/test/CMakeLists.txt @@ -6,9 +6,12 @@ list(APPEND armnnClBackendUnitTests_sources ClContextControlFixture.hpp ClCreateWorkloadTests.cpp + ClEndToEndTests.cpp + ClJsonPrinterTests.cpp ClLayerSupportTests.cpp ClLayerTests.cpp ClMemCopyTests.cpp + ClOptimizedNetworkTests.cpp ClRuntimeTests.cpp OpenClTimerTest.cpp ) diff --git a/src/backends/cl/test/ClEndToEndTests.cpp b/src/backends/cl/test/ClEndToEndTests.cpp new file mode 100644 index 0000000000..d6fd8875c4 --- /dev/null +++ b/src/backends/cl/test/ClEndToEndTests.cpp @@ -0,0 +1,18 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include + +#include + +BOOST_AUTO_TEST_SUITE(ClEndToEnd) + +BOOST_AUTO_TEST_CASE(ConstantUsage_Cl_Float32) +{ + std::vector backends = {armnn::Compute::GpuAcc}; + ConstantUsageFloat32Test(backends); +} + +BOOST_AUTO_TEST_SUITE_END() diff --git a/src/backends/cl/test/ClJsonPrinterTests.cpp b/src/backends/cl/test/ClJsonPrinterTests.cpp new file mode 100644 index 0000000000..f0b4b7acae --- /dev/null +++ b/src/backends/cl/test/ClJsonPrinterTests.cpp @@ -0,0 +1,23 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include + +#include +#include + +#include + +#include + +BOOST_FIXTURE_TEST_SUITE(ClJsonPrinter, ClProfilingContextControlFixture) + +BOOST_AUTO_TEST_CASE(SoftmaxProfilerJsonPrinterGpuAccTest) +{ + std::vector backends = {armnn::Compute::GpuAcc}; + SetupSoftmaxProfilerWithSpecifiedBackendsAndValidateJsonPrinterResult(backends); +} + +BOOST_AUTO_TEST_SUITE_END() \ No newline at end of file diff --git a/src/backends/cl/test/ClOptimizedNetworkTests.cpp b/src/backends/cl/test/ClOptimizedNetworkTests.cpp new file mode 100644 index 0000000000..b39a4b1304 --- /dev/null +++ b/src/backends/cl/test/ClOptimizedNetworkTests.cpp @@ -0,0 +1,101 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include +#include + +#include + +#include + +#include + +BOOST_AUTO_TEST_SUITE(ClOptimizedNetwork) + +BOOST_AUTO_TEST_CASE(OptimizeValidateGpuDeviceSupportLayerNoFallback) +{ + // build up the structure of the network + armnn::INetworkPtr net(armnn::INetwork::Create()); + + armnn::IConnectableLayer* input = net->AddInputLayer(0); + armnn::IConnectableLayer* output = net->AddOutputLayer(0); + + input->GetOutputSlot(0).Connect(output->GetInputSlot(0)); + input->GetOutputSlot(0).SetTensorInfo(armnn::TensorInfo({ 1, 1, 4, 4 }, armnn::DataType::Float32)); + + armnn::IRuntime::CreationOptions options; + armnn::IRuntimePtr runtime(armnn::IRuntime::Create(options)); + + std::vector backends = { armnn::Compute::GpuAcc }; + armnn::IOptimizedNetworkPtr optNet = armnn::Optimize(*net, backends, runtime->GetDeviceSpec()); + BOOST_CHECK(optNet); + // validate workloads + armnn::ClWorkloadFactory fact; + for (auto&& layer : static_cast(optNet.get())->GetGraph()) + { + BOOST_CHECK(layer->GetBackendId() == armnn::Compute::GpuAcc); + BOOST_CHECK_NO_THROW( + layer->CreateWorkload(static_cast(optNet.get())->GetGraph(), fact)); + } +} + +BOOST_AUTO_TEST_CASE(FP16TurboModeTestOnGpuAcc) +{ + // Test to check when Fp16 Turbo mode set + // it converts the Fp32 network to Fp16 Network + // add Fp32ToFp16 conversion layer after the InputLayer + // add Fp16ToFp32 conversion layer after the OutputLayer + // checks the other layers if they are supported in Fp16 + // if they are not put the conversion layers before and after + // if they are not supported in Fp16 use Fp32 instead + // if there are inverse conversion layers remove them with optimization + // at the moment FloorLayer is not supported in Fp16 so it rolls back to Fp32 + // and inverse conversion layers are removed by the optimizer + armnn::Network net; + + // Defines layers. + auto input = net.AddInputLayer(0, "input layer"); + // ReLu1 + armnn::ActivationDescriptor activation1Descriptor; + activation1Descriptor.m_Function = armnn::ActivationFunction::BoundedReLu; + activation1Descriptor.m_A = 1.f; + activation1Descriptor.m_B = -1.f; + auto activation = net.AddActivationLayer(activation1Descriptor, "activation layer"); + auto output = net.AddOutputLayer(0, "output layer"); + + // Connects layers. + input->GetOutputSlot(0).Connect(activation->GetInputSlot(0)); + activation->GetOutputSlot(0).Connect(output->GetInputSlot(0)); + + armnn::TensorShape shape({4}); + armnn::TensorInfo info(shape, armnn::DataType::Float32); + input->GetOutputSlot(0).SetTensorInfo(info); + activation->GetOutputSlot(0).SetTensorInfo(info); + + armnn::IRuntime::CreationOptions options; + armnn::IRuntimePtr runtime(armnn::IRuntime::Create(options)); + + std::vector backends = {armnn::Compute::GpuAcc}; + + armnn::OptimizerOptions optimizerOptions; + optimizerOptions.m_ReduceFp32ToFp16 = true; + + armnn::IOptimizedNetworkPtr optimizedNet = armnn::Optimize( + net, backends, runtime->GetDeviceSpec(), optimizerOptions); + + const armnn::Graph& graph = static_cast(optimizedNet.get())->GetGraph(); + + // Tests that all layers are present in the graph. + BOOST_TEST(graph.GetNumLayers() == 5); + + // Tests that the vertices exist and have correct names. + BOOST_TEST(GraphHasNamedLayer(graph, "input layer")); + BOOST_TEST(GraphHasNamedLayer(graph, "convert_fp32_to_fp16-0-input layer")); + BOOST_TEST(GraphHasNamedLayer(graph, "activation layer")); + BOOST_TEST(GraphHasNamedLayer(graph, "convert_fp16_to_fp32-0-output layer")); + BOOST_TEST(GraphHasNamedLayer(graph, "output layer")); +} + +BOOST_AUTO_TEST_SUITE_END(); \ No newline at end of file diff --git a/src/backends/neon/backend.mk b/src/backends/neon/backend.mk index a4e6db9610..8f7e72b17c 100644 --- a/src/backends/neon/backend.mk +++ b/src/backends/neon/backend.mk @@ -41,8 +41,11 @@ BACKEND_SOURCES := \ BACKEND_TEST_SOURCES := \ test/NeonCreateWorkloadTests.cpp \ + test/NeonEndToEndTests.cpp \ + test/NeonJsonPrinterTests.cpp \ test/NeonLayerSupportTests.cpp \ test/NeonLayerTests.cpp \ test/NeonMemCopyTests.cpp \ + test/NeonOptimizedNetworkTests.cpp \ test/NeonRuntimeTests.cpp \ test/NeonTimerTest.cpp diff --git a/src/backends/neon/test/CMakeLists.txt b/src/backends/neon/test/CMakeLists.txt index e6a28590b5..999bd4f339 100644 --- a/src/backends/neon/test/CMakeLists.txt +++ b/src/backends/neon/test/CMakeLists.txt @@ -5,9 +5,12 @@ list(APPEND armnnNeonBackendUnitTests_sources NeonCreateWorkloadTests.cpp + NeonEndToEndTests.cpp + NeonJsonPrinterTests.cpp NeonLayerSupportTests.cpp NeonLayerTests.cpp NeonMemCopyTests.cpp + NeonOptimizedNetworkTests.cpp NeonRuntimeTests.cpp NeonTimerTest.cpp ) diff --git a/src/backends/neon/test/NeonEndToEndTests.cpp b/src/backends/neon/test/NeonEndToEndTests.cpp new file mode 100644 index 0000000000..f9aa8a5df5 --- /dev/null +++ b/src/backends/neon/test/NeonEndToEndTests.cpp @@ -0,0 +1,52 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include + +#include + +BOOST_AUTO_TEST_SUITE(NeonEndToEnd) + +BOOST_AUTO_TEST_CASE(ConstantUsage_Neon_Float32) +{ + std::vector backends = {armnn::Compute::CpuAcc}; + BOOST_TEST(ConstantUsageFloat32Test(backends)); +} + +BOOST_AUTO_TEST_CASE(FallbackToCpuRef) +{ + using namespace armnn; + + // Create runtime in which test will run and allow fallback to CpuRef. + IRuntime::CreationOptions options; + IRuntimePtr runtime(IRuntime::Create(options)); + + // Builds up the structure of the network. + INetworkPtr net(INetwork::Create()); + + IConnectableLayer* input = net->AddInputLayer(0); + + // This layer configuration isn't supported by CpuAcc but we allow fallback to CpuRef so it shoud pass. + NormalizationDescriptor descriptor; + IConnectableLayer* pooling = net->AddNormalizationLayer(descriptor); + + IConnectableLayer* output = net->AddOutputLayer(0); + + input->GetOutputSlot(0).Connect(pooling->GetInputSlot(0)); + pooling->GetOutputSlot(0).Connect(output->GetInputSlot(0)); + + input->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 4, 4 }, DataType::Float32)); + pooling->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 4, 4 }, DataType::Float32)); + + // optimize the network + std::vector backends = {Compute::CpuAcc, Compute::CpuRef}; + IOptimizedNetworkPtr optNet = Optimize(*net, backends, runtime->GetDeviceSpec()); + + // Load it into the runtime. It should pass. + NetworkId netId; + BOOST_TEST(runtime->LoadNetwork(netId, std::move(optNet)) == Status::Success); +} + +BOOST_AUTO_TEST_SUITE_END() diff --git a/src/backends/neon/test/NeonJsonPrinterTests.cpp b/src/backends/neon/test/NeonJsonPrinterTests.cpp new file mode 100644 index 0000000000..6213c145ba --- /dev/null +++ b/src/backends/neon/test/NeonJsonPrinterTests.cpp @@ -0,0 +1,22 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include + +#include + +#include + +#include + +BOOST_AUTO_TEST_SUITE(NeonJsonPrinter) + +BOOST_AUTO_TEST_CASE(SoftmaxProfilerJsonPrinterCpuAccTest) +{ + std::vector backends = {armnn::Compute::CpuAcc}; + SetupSoftmaxProfilerWithSpecifiedBackendsAndValidateJsonPrinterResult(backends); +} + +BOOST_AUTO_TEST_SUITE_END() \ No newline at end of file diff --git a/src/backends/neon/test/NeonOptimizedNetworkTests.cpp b/src/backends/neon/test/NeonOptimizedNetworkTests.cpp new file mode 100644 index 0000000000..ae657ba770 --- /dev/null +++ b/src/backends/neon/test/NeonOptimizedNetworkTests.cpp @@ -0,0 +1,70 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include +#include +#include + +#include + +#include + +BOOST_AUTO_TEST_SUITE(NeonOptimizedNetwork) + +BOOST_AUTO_TEST_CASE(OptimizeValidateCpuAccDeviceSupportLayerNoFallback) +{ + // build up the structure of the network + armnn::INetworkPtr net(armnn::INetwork::Create()); + + armnn::IConnectableLayer* input = net->AddInputLayer(0); + armnn::IConnectableLayer* output = net->AddOutputLayer(0); + + input->GetOutputSlot(0).Connect(output->GetInputSlot(0)); + input->GetOutputSlot(0).SetTensorInfo(armnn::TensorInfo({ 1, 1, 4, 4 }, armnn::DataType::Float32)); + + armnn::IRuntime::CreationOptions options; + armnn::IRuntimePtr runtime(armnn::IRuntime::Create(options)); + + std::vector backends = { armnn::Compute::CpuAcc }; + armnn::IOptimizedNetworkPtr optNet = armnn::Optimize(*net, backends, runtime->GetDeviceSpec()); + BOOST_CHECK(optNet); + // validate workloads + armnn::NeonWorkloadFactory fact; + for (auto&& layer : static_cast(optNet.get())->GetGraph()) + { + BOOST_CHECK(layer->GetBackendId() == armnn::Compute::CpuAcc); + BOOST_CHECK_NO_THROW( + layer->CreateWorkload(static_cast(optNet.get())->GetGraph(), fact)); + } +} + +BOOST_AUTO_TEST_CASE(OptimizeValidateDeviceNonSupportLayerNoFallback) +{ + // build up the structure of the network + armnn::INetworkPtr net(armnn::INetwork::Create()); + + armnn::IConnectableLayer* input = net->AddInputLayer(0); + + // This layer configuration isn't supported by CpuAcc and isn't allowed to fall back, so Optimize will return null. + armnn::NormalizationDescriptor descriptor; + armnn::IConnectableLayer* normalize = net->AddNormalizationLayer(descriptor); + + armnn::IConnectableLayer* output = net->AddOutputLayer(0); + + input->GetOutputSlot(0).Connect(normalize->GetInputSlot(0)); + normalize->GetOutputSlot(0).Connect(output->GetInputSlot(0)); + + input->GetOutputSlot(0).SetTensorInfo(armnn::TensorInfo({ 1, 1, 4, 4 }, armnn::DataType::Float32)); + normalize->GetOutputSlot(0).SetTensorInfo(armnn::TensorInfo({ 1, 1, 4, 4 }, armnn::DataType::Float32)); + + armnn::IRuntime::CreationOptions options; + armnn::IRuntimePtr runtime(armnn::IRuntime::Create(options)); + + std::vector backends = { armnn::Compute::CpuAcc }; + armnn::IOptimizedNetworkPtr optNet = armnn::Optimize(*net, backends, runtime->GetDeviceSpec()); + BOOST_CHECK(!optNet); +} + +BOOST_AUTO_TEST_SUITE_END() \ No newline at end of file diff --git a/src/backends/reference/backend.mk b/src/backends/reference/backend.mk index 455ab4618e..007efceb9b 100644 --- a/src/backends/reference/backend.mk +++ b/src/backends/reference/backend.mk @@ -65,6 +65,9 @@ BACKEND_SOURCES := \ BACKEND_TEST_SOURCES := \ test/RefCreateWorkloadTests.cpp \ + test/RefEndToEndTests.cpp \ + test/RefJsonPrinterTests.cpp \ test/RefLayerSupportTests.cpp \ test/RefLayerTests.cpp \ + test/RefOptimizedNetworkTests.cpp \ test/RefRuntimeTests.cpp diff --git a/src/backends/reference/test/CMakeLists.txt b/src/backends/reference/test/CMakeLists.txt index dea0ef6498..1eec594aa9 100644 --- a/src/backends/reference/test/CMakeLists.txt +++ b/src/backends/reference/test/CMakeLists.txt @@ -5,8 +5,11 @@ list(APPEND armnnRefBackendUnitTests_sources RefCreateWorkloadTests.cpp + RefEndToEndTests.cpp + RefJsonPrinterTests.cpp RefLayerSupportTests.cpp RefLayerTests.cpp + RefOptimizedNetworkTests.cpp RefRuntimeTests.cpp ) diff --git a/src/backends/reference/test/RefEndToEndTests.cpp b/src/backends/reference/test/RefEndToEndTests.cpp new file mode 100644 index 0000000000..8938d6f222 --- /dev/null +++ b/src/backends/reference/test/RefEndToEndTests.cpp @@ -0,0 +1,251 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include + +#include + +BOOST_AUTO_TEST_SUITE(RefEndToEnd) + +BOOST_AUTO_TEST_CASE(ConstantUsage_Ref_Float32) +{ + std::vector backends = {armnn::Compute::CpuRef}; + BOOST_TEST(ConstantUsageFloat32Test(backends)); +} + +BOOST_AUTO_TEST_CASE(ConstantUsage_Ref_Uint8) +{ + std::vector backends = {armnn::Compute::CpuRef}; + BOOST_TEST(ConstantUsageUint8Test(backends)); +} + +BOOST_AUTO_TEST_CASE(Unsigned8) +{ + using namespace armnn; + + // Create runtime in which test will run + armnn::IRuntime::CreationOptions options; + armnn::IRuntimePtr runtime(armnn::IRuntime::Create(options)); + + // Builds up the structure of the network. + armnn::INetworkPtr net(INetwork::Create()); + + IConnectableLayer* input = net->AddInputLayer(0, "input"); + IConnectableLayer* softmax = net->AddSoftmaxLayer(SoftmaxDescriptor(), "softmax"); + IConnectableLayer* output = net->AddOutputLayer(0, "output"); + + input->GetOutputSlot(0).Connect(softmax->GetInputSlot(0)); + softmax->GetOutputSlot(0).Connect(output->GetInputSlot(0)); + + // Sets the tensors in the network. + TensorInfo inputTensorInfo(TensorShape({1, 5}), DataType::QuantisedAsymm8); + inputTensorInfo.SetQuantizationOffset(100); + inputTensorInfo.SetQuantizationScale(10000.0f); + input->GetOutputSlot(0).SetTensorInfo(inputTensorInfo); + + TensorInfo outputTensorInfo(TensorShape({1, 5}), DataType::QuantisedAsymm8); + outputTensorInfo.SetQuantizationOffset(0); + outputTensorInfo.SetQuantizationScale(1.0f/255.0f); + softmax->GetOutputSlot(0).SetTensorInfo(outputTensorInfo); + + // optimize the network + std::vector backends = {armnn::Compute::CpuRef}; + IOptimizedNetworkPtr optNet = Optimize(*net, backends, runtime->GetDeviceSpec()); + + // Loads it into the runtime. + NetworkId netId; + auto error = runtime->LoadNetwork(netId, std::move(optNet)); + BOOST_TEST(error == Status::Success); + + // Creates structures for input & output. + std::vector inputData + { + 1, 10, 3, 200, 5 // Some inputs - one of which is sufficiently larger than the others to saturate softmax. + }; + std::vector outputData(5); + + armnn::InputTensors inputTensors + { + {0, armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 0), inputData.data())} + }; + armnn::OutputTensors outputTensors + { + {0, armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), outputData.data())} + }; + + // Does the inference. + runtime->EnqueueWorkload(netId, inputTensors, outputTensors); + + // Checks the results. + BOOST_TEST(outputData[0] == 0); + BOOST_TEST(outputData[1] == 0); + BOOST_TEST(outputData[2] == 0); + BOOST_TEST(outputData[3] == 255); // softmax has been saturated. + BOOST_TEST(outputData[4] == 0); +} + +BOOST_AUTO_TEST_CASE(TrivialAdd) +{ + // This test was designed to match "AddTwo" in android nn/runtime/test/TestTrivialModel.cpp. + + using namespace armnn; + + // Create runtime in which test will run + armnn::IRuntime::CreationOptions options; + armnn::IRuntimePtr runtime(armnn::IRuntime::Create(options)); + + // Builds up the structure of the network. + armnn::INetworkPtr net(INetwork::Create()); + + IConnectableLayer* input1 = net->AddInputLayer(0); + IConnectableLayer* input2 = net->AddInputLayer(1); + IConnectableLayer* add = net->AddAdditionLayer(); + IConnectableLayer* output = net->AddOutputLayer(0); + + input1->GetOutputSlot(0).Connect(add->GetInputSlot(0)); + input2->GetOutputSlot(0).Connect(add->GetInputSlot(1)); + add->GetOutputSlot(0).Connect(output->GetInputSlot(0)); + + // Sets the tensors in the network. + TensorInfo tensorInfo(TensorShape({3, 4}), DataType::Float32); + input1->GetOutputSlot(0).SetTensorInfo(tensorInfo); + input2->GetOutputSlot(0).SetTensorInfo(tensorInfo); + add->GetOutputSlot(0).SetTensorInfo(tensorInfo); + + // optimize the network + std::vector backends = {armnn::Compute::CpuRef}; + IOptimizedNetworkPtr optNet = Optimize(*net, backends, runtime->GetDeviceSpec()); + + // Loads it into the runtime. + NetworkId netId; + runtime->LoadNetwork(netId, std::move(optNet)); + + // Creates structures for input & output - matching android nn test. + std::vector input1Data + { + 1.f, 2.f, 3.f, 4.f, 5.f, 6.f, 7.f, 8.f, 9.f, 10.f, 11.f, 12.f + }; + std::vector input2Data + { + 100.f, 200.f, 300.f, 400.f, 500.f, 600.f, 700.f, 800.f, 900.f, 1000.f, 1100.f, 1200.f + }; + std::vector outputData(12); + + InputTensors inputTensors + { + {0,armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 0), input1Data.data())}, + {1,armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 0), input2Data.data())} + }; + OutputTensors outputTensors + { + {0,armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), outputData.data())} + }; + + // Does the inference. + runtime->EnqueueWorkload(netId, inputTensors, outputTensors); + + // Checks the results + BOOST_TEST(outputData[0] == 101); + BOOST_TEST(outputData[1] == 202); + BOOST_TEST(outputData[2] == 303); + BOOST_TEST(outputData[3] == 404); + BOOST_TEST(outputData[4] == 505); + BOOST_TEST(outputData[5] == 606); + BOOST_TEST(outputData[6] == 707); + BOOST_TEST(outputData[7] == 808); + BOOST_TEST(outputData[8] == 909); + BOOST_TEST(outputData[9] == 1010); + BOOST_TEST(outputData[10] == 1111); + BOOST_TEST(outputData[11] == 1212); +} + +BOOST_AUTO_TEST_CASE(MultipleOutputs) +{ + using namespace armnn; + + // Create runtime in which test will run + armnn::IRuntime::CreationOptions options; + armnn::IRuntimePtr runtime(armnn::IRuntime::Create(options)); + + // Builds up the structure of the network. + INetworkPtr net(INetwork::Create()); + + IConnectableLayer* input = net->AddInputLayer(0); + + // ReLu1 + ActivationDescriptor activation1Descriptor; + activation1Descriptor.m_Function = ActivationFunction::BoundedReLu; + activation1Descriptor.m_A = 1.f; + activation1Descriptor.m_B = -1.f; + IConnectableLayer* activation1 = net->AddActivationLayer(activation1Descriptor); + + // ReLu6 + ActivationDescriptor activation2Descriptor; + activation2Descriptor.m_Function = ActivationFunction::BoundedReLu; + activation2Descriptor.m_A = 6.0f; + IConnectableLayer* activation2 = net->AddActivationLayer(activation2Descriptor); + + // BoundedReLu(min=2, max=5) + ActivationDescriptor activation3Descriptor; + activation3Descriptor.m_Function = ActivationFunction::BoundedReLu; + activation3Descriptor.m_A = 5.0f; + activation3Descriptor.m_B = 2.0f; + IConnectableLayer* activation3 = net->AddActivationLayer(activation3Descriptor); + + IConnectableLayer* output1 = net->AddOutputLayer(0); + IConnectableLayer* output2 = net->AddOutputLayer(1); + IConnectableLayer* output3 = net->AddOutputLayer(2); + + input->GetOutputSlot(0).Connect(activation1->GetInputSlot(0)); + input->GetOutputSlot(0).Connect(activation2->GetInputSlot(0)); + input->GetOutputSlot(0).Connect(activation3->GetInputSlot(0)); + + activation1->GetOutputSlot(0).Connect(output1->GetInputSlot(0)); + activation2->GetOutputSlot(0).Connect(output2->GetInputSlot(0)); + activation3->GetOutputSlot(0).Connect(output3->GetInputSlot(0)); + + // Sets the tensors in the network. + TensorInfo tensorInfo(TensorShape({ 10 }), DataType::Float32); + input->GetOutputSlot(0).SetTensorInfo(tensorInfo); + activation1->GetOutputSlot(0).SetTensorInfo(tensorInfo); + activation2->GetOutputSlot(0).SetTensorInfo(tensorInfo); + activation3->GetOutputSlot(0).SetTensorInfo(tensorInfo); + + // optimize the network + std::vector backends = {armnn::Compute::CpuRef}; + IOptimizedNetworkPtr optNet = Optimize(*net, backends, runtime->GetDeviceSpec()); + + // Loads it into the runtime. + NetworkId netId; + runtime->LoadNetwork(netId, std::move(optNet)); + + // Creates structures for input & output. + const std::vector inputData{ 3.f, 5.f, 2.f, 3.f, 7.f, 0.f, -2.f, -1.f, 3.f, 3.f }; + + std::vector output1Data(inputData.size()); + std::vector output2Data(inputData.size()); + std::vector output3Data(inputData.size()); + + InputTensors inputTensors + { + {0,armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 0), inputData.data())} + }; + OutputTensors outputTensors + { + {0,armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), output1Data.data())}, + {1,armnn::Tensor(runtime->GetOutputTensorInfo(netId, 1), output2Data.data())}, + {2,armnn::Tensor(runtime->GetOutputTensorInfo(netId, 2), output3Data.data())} + }; + + // Does the inference. + runtime->EnqueueWorkload(netId, inputTensors, outputTensors); + + // Checks the results. + BOOST_TEST(output1Data == std::vector({ 1.f, 1.f, 1.f, 1.f, 1.f, 0.f, -1.f, -1.f, 1.f, 1.f })); // ReLu1 + BOOST_TEST(output2Data == std::vector({ 3.f, 5.f, 2.f, 3.f, 6.f, 0.f, 0.f, 0.f, 3.f, 3.f })); // ReLu6 + BOOST_TEST(output3Data == std::vector({ 3.f, 5.f, 2.f, 3.f, 5.f, 2.f, 2.f, 2.f, 3.f, 3.f })); // [2, 5] +} + +BOOST_AUTO_TEST_SUITE_END() \ No newline at end of file diff --git a/src/backends/reference/test/RefJsonPrinterTests.cpp b/src/backends/reference/test/RefJsonPrinterTests.cpp new file mode 100644 index 0000000000..ee668a2513 --- /dev/null +++ b/src/backends/reference/test/RefJsonPrinterTests.cpp @@ -0,0 +1,22 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include + +#include + +#include + +#include + +BOOST_AUTO_TEST_SUITE(RefJsonPrinter) + +BOOST_AUTO_TEST_CASE(SoftmaxProfilerJsonPrinterCpuRefTest) +{ + std::vector backends = {armnn::Compute::CpuRef}; + SetupSoftmaxProfilerWithSpecifiedBackendsAndValidateJsonPrinterResult(backends); +} + +BOOST_AUTO_TEST_SUITE_END() \ No newline at end of file diff --git a/src/backends/reference/test/RefOptimizedNetworkTests.cpp b/src/backends/reference/test/RefOptimizedNetworkTests.cpp new file mode 100644 index 0000000000..63615e6859 --- /dev/null +++ b/src/backends/reference/test/RefOptimizedNetworkTests.cpp @@ -0,0 +1,212 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include +#include +#include + +#include + +#include + +BOOST_AUTO_TEST_SUITE(RefOptimizedNetwork) + +BOOST_AUTO_TEST_CASE(OptimizeValidateCpuRefWorkloads) +{ + const armnn::TensorInfo desc({3, 5}, armnn::DataType::Float32); + + armnn::Network net; + + armnn::NormalizationDescriptor nmDesc; + armnn::ActivationDescriptor acDesc; + + // in + // | + // nm + // / | + // ac | + // \ | + // ml + // | + // sm + // | + // ot + armnn::IConnectableLayer* layer = net.AddInputLayer(0, "in"); + layer->GetOutputSlot(0).SetTensorInfo(desc); + + armnn::IConnectableLayer* const normLayer = net.AddNormalizationLayer(nmDesc, "nm"); + + layer->GetOutputSlot(0).Connect(normLayer->GetInputSlot(0)); + normLayer->GetOutputSlot(0).SetTensorInfo(desc); + + layer = net.AddActivationLayer(acDesc, "ac"); + + normLayer->GetOutputSlot(0).Connect(layer->GetInputSlot(0)); + layer->GetOutputSlot(0).SetTensorInfo(desc); + + armnn::IConnectableLayer* prevLayer = layer; + layer = net.AddMultiplicationLayer("ml"); + + prevLayer->GetOutputSlot(0).Connect(layer->GetInputSlot(0)); + normLayer->GetOutputSlot(0).Connect(layer->GetInputSlot(1)); + layer->GetOutputSlot(0).SetTensorInfo(desc); + + prevLayer = layer; + armnn::SoftmaxDescriptor softmaxDescriptor; + layer = net.AddSoftmaxLayer(softmaxDescriptor, "sm"); + + prevLayer->GetOutputSlot(0).Connect(layer->GetInputSlot(0)); + layer->GetOutputSlot(0).SetTensorInfo(desc); + + prevLayer = layer; + layer = net.AddOutputLayer(0, "ot"); + + prevLayer->GetOutputSlot(0).Connect(layer->GetInputSlot(0)); + + armnn::IRuntime::CreationOptions options; + armnn::IRuntimePtr runtime(armnn::IRuntime::Create(options)); + + std::vector backends = { armnn::Compute::CpuRef }; + armnn::IOptimizedNetworkPtr optNet = armnn::Optimize(net, backends, runtime->GetDeviceSpec()); + static_cast(optNet.get())->GetGraph().AllocateDynamicBuffers(); + BOOST_CHECK(optNet); + + // Validates workloads. + armnn::RefWorkloadFactory fact; + for (auto&& layer : static_cast(optNet.get())->GetGraph()) + { + BOOST_CHECK_NO_THROW( + layer->CreateWorkload(static_cast(optNet.get())->GetGraph(), fact)); + } +} + +BOOST_AUTO_TEST_CASE(OptimizeValidateWorkloadsCpuRefPermuteLayer) +{ + // Create runtime in which test will run + armnn::IRuntime::CreationOptions options; + armnn::IRuntimePtr runtime(armnn::IRuntime::Create(options)); + + std::vector backends = {armnn::Compute::CpuRef}; + + // build up the structure of the network + armnn::INetworkPtr net(armnn::INetwork::Create()); + + armnn::IConnectableLayer* input = net->AddInputLayer(0); + + armnn::PermuteDescriptor descriptor({0, 2, 3, 1}); + armnn::IConnectableLayer* permute = net->AddPermuteLayer(descriptor); + + armnn::IConnectableLayer* output = net->AddOutputLayer(0); + + input->GetOutputSlot(0).Connect(permute->GetInputSlot(0)); + permute->GetOutputSlot(0).Connect(output->GetInputSlot(0)); + + input->GetOutputSlot(0).SetTensorInfo(armnn::TensorInfo({ 1, 1, 4, 4 }, armnn::DataType::Float32)); + permute->GetOutputSlot(0).SetTensorInfo(armnn::TensorInfo({ 1, 4, 1, 4 }, armnn::DataType::Float32)); + + // optimize the network + armnn::IOptimizedNetworkPtr optNet = armnn::Optimize(*net, backends, runtime->GetDeviceSpec()); + + for (auto&& layer : static_cast(optNet.get())->GetGraph()) + { + BOOST_CHECK(layer->GetBackendId() == armnn::Compute::CpuRef); + } +} + +BOOST_AUTO_TEST_CASE(OptimizeValidateWorkloadsCpuRefMeanLayer) +{ + // Create runtime in which test will run + armnn::IRuntime::CreationOptions options; + armnn::IRuntimePtr runtime(armnn::IRuntime::Create(options)); + + std::vector backends = {armnn::Compute::CpuRef}; + + // build up the structure of the network + armnn::INetworkPtr net(armnn::INetwork::Create()); + + armnn::IConnectableLayer* input = net->AddInputLayer(0); + + armnn::MeanDescriptor descriptor({ 0, 1 }, false); + armnn::IConnectableLayer* meanLayer = net->AddMeanLayer(descriptor); + + armnn::IConnectableLayer* output = net->AddOutputLayer(0); + + input->GetOutputSlot(0).Connect(meanLayer->GetInputSlot(0)); + meanLayer->GetOutputSlot(0).Connect(output->GetInputSlot(0)); + + input->GetOutputSlot(0).SetTensorInfo(armnn::TensorInfo({ 4, 3, 2 }, armnn::DataType::Float32)); + meanLayer->GetOutputSlot(0).SetTensorInfo(armnn::TensorInfo({ 2 }, armnn::DataType::Float32)); + + // optimize the network + armnn::IOptimizedNetworkPtr optNet = armnn::Optimize(*net, backends, runtime->GetDeviceSpec()); + + for (auto&& layer : static_cast(optNet.get())->GetGraph()) + { + BOOST_CHECK(layer->GetBackendId() == armnn::Compute::CpuRef); + } +} + +BOOST_AUTO_TEST_CASE(FP16TurboModeTestOnCpuRef) +{ + // Test to check when FP16 Turbo mode set + // it converts the FP32 network to FP16 Network + // add FP32ToFP16 conversion layer after the InputLayer + // add FP16ToFP32 conversion layer after the OutputLayer + // checks the other layers if they are supported in FP16 + // if they are not put the conversion layers before and after + // if they are not supported in FP16 use FP32 instead + // if there are inverse conversion layers remove them with optimization + // at the moment FloorLayer is not supported in FP16 so it rolls back to FP32 + // and inverse conversion layers are removed by the optimizer + armnn::Network net; + + // Defines layers. + auto input = net.AddInputLayer(0); + auto floor = net.AddFloorLayer(); + auto output = net.AddOutputLayer(0); + + // Connects layers. + input->GetOutputSlot(0).Connect(floor->GetInputSlot(0)); + floor->GetOutputSlot(0).Connect(output->GetInputSlot(0)); + + armnn::TensorShape shape({4}); + armnn::TensorInfo info(shape, armnn::DataType::Float32); + input->GetOutputSlot(0).SetTensorInfo(info); + floor->GetOutputSlot(0).SetTensorInfo(info); + + armnn::IRuntime::CreationOptions options; + armnn::IRuntimePtr runtime(armnn::IRuntime::Create(options)); + + std::vector backends = {armnn::Compute::CpuRef}; + + armnn::OptimizerOptions optimizerOptions; + optimizerOptions.m_ReduceFp32ToFp16 = true; + + armnn::IOptimizedNetworkPtr optimizedNet = armnn::Optimize(net, backends, runtime->GetDeviceSpec(), + optimizerOptions); + + std::ostringstream ss; + optimizedNet->SerializeToDot(ss); + + auto inputId = input->GetGuid(); + auto floorId = floor->GetGuid(); + auto outputId = output->GetGuid(); + + std::stringstream expected; + expected << + "digraph Optimized {\n" + " node [shape=\"record\"];\n" + " edge [fontsize=8 fontcolor=\"blue\" fontname=\"arial-bold\"];\n" + " " << inputId << " [label=\"{Input}\"];\n" + " " << floorId << " [label=\"{Floor}\"];\n" + " " << outputId << " [label=\"{Output}\"];\n" + " " << inputId << " -> " << floorId << " [label=< [4] >];\n" + " " << floorId << " -> " << outputId << " [label=< [4] >];\n" + "}\n"; + + BOOST_TEST(ss.str() == expected.str()); +} + +BOOST_AUTO_TEST_SUITE_END() diff --git a/src/backends/test/EndToEndTestImpl.hpp b/src/backends/test/EndToEndTestImpl.hpp new file mode 100644 index 0000000000..5f17f782f3 --- /dev/null +++ b/src/backends/test/EndToEndTestImpl.hpp @@ -0,0 +1,102 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// +#pragma once + +#include + +#include + +#include + +namespace +{ + +using namespace armnn; + +template +bool ConstantUsageTest(const std::vector& computeDevice, + const TensorInfo& commonTensorInfo, + const std::vector& inputData, + const std::vector& constantData, + const std::vector& expectedOutputData) +{ + // Create runtime in which test will run + IRuntime::CreationOptions options; + IRuntimePtr runtime(IRuntime::Create(options)); + + // Builds up the structure of the network. + INetworkPtr net(INetwork::Create()); + + IConnectableLayer* input = net->AddInputLayer(0); + IConnectableLayer* constant = net->AddConstantLayer(ConstTensor(commonTensorInfo, constantData)); + IConnectableLayer* add = net->AddAdditionLayer(); + IConnectableLayer* output = net->AddOutputLayer(0); + + input->GetOutputSlot(0).Connect(add->GetInputSlot(0)); + constant->GetOutputSlot(0).Connect(add->GetInputSlot(1)); + add->GetOutputSlot(0).Connect(output->GetInputSlot(0)); + + // Sets the tensors in the network. + input->GetOutputSlot(0).SetTensorInfo(commonTensorInfo); + constant->GetOutputSlot(0).SetTensorInfo(commonTensorInfo); + add->GetOutputSlot(0).SetTensorInfo(commonTensorInfo); + + // optimize the network + IOptimizedNetworkPtr optNet = Optimize(*net, computeDevice, runtime->GetDeviceSpec()); + + // Loads it into the runtime. + NetworkId netId; + runtime->LoadNetwork(netId, std::move(optNet)); + + // Creates structures for input & output. + std::vector outputData(inputData.size()); + + InputTensors inputTensors + { + {0, ConstTensor(runtime->GetInputTensorInfo(netId, 0), inputData.data())} + }; + OutputTensors outputTensors + { + {0, Tensor(runtime->GetOutputTensorInfo(netId, 0), outputData.data())} + }; + + // Does the inference. + runtime->EnqueueWorkload(netId, inputTensors, outputTensors); + + // Checks the results. + return outputData == expectedOutputData; +} + +inline bool ConstantUsageFloat32Test(const std::vector& backends) +{ + const TensorInfo commonTensorInfo({ 2, 3 }, DataType::Float32); + + return ConstantUsageTest(backends, + commonTensorInfo, + std::vector{ 1.f, 2.f, 3.f, 4.f, 5.f, 6.f }, // Input. + std::vector{ 6.f, 5.f, 4.f, 3.f, 2.f, 1.f }, // Const input. + std::vector{ 7.f, 7.f, 7.f, 7.f, 7.f, 7.f } // Expected output. + ); +} + +inline bool ConstantUsageUint8Test(const std::vector& backends) +{ + TensorInfo commonTensorInfo({ 2, 3 }, DataType::QuantisedAsymm8); + + const float scale = 0.023529f; + const int8_t offset = -43; + + commonTensorInfo.SetQuantizationScale(scale); + commonTensorInfo.SetQuantizationOffset(offset); + + return ConstantUsageTest(backends, + commonTensorInfo, + QuantizedVector(scale, offset, { 1.f, 2.f, 3.f, 4.f, 5.f, 6.f }), // Input. + QuantizedVector(scale, offset, { 6.f, 5.f, 4.f, 3.f, 2.f, 1.f }), // Const input. + QuantizedVector(scale, offset, { 7.f, 7.f, 7.f, 7.f, 7.f, 7.f }) // Expected output. + ); +} + +} // anonymous namespace \ No newline at end of file diff --git a/src/backends/test/JsonPrinterTestImpl.hpp b/src/backends/test/JsonPrinterTestImpl.hpp new file mode 100644 index 0000000000..47e0ec761b --- /dev/null +++ b/src/backends/test/JsonPrinterTestImpl.hpp @@ -0,0 +1,354 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include +#include +#include +#include + +#include +#include +#include + +#include +#include +#include +#include + +inline bool AreMatchingPair(const char opening, const char closing) +{ + return (opening == '{' && closing == '}') || (opening == '[' && closing == ']'); +} + +inline bool AreParenthesesMatching(const std::string& exp) +{ + std::stack expStack; + for (size_t i = 0; i < exp.length(); ++i) + { + if (exp[i] == '{' || exp[i] == '[') + { + expStack.push(exp[i]); + } + else if (exp[i] == '}' || exp[i] == ']') + { + if (expStack.empty() || !AreMatchingPair(expStack.top(), exp[i])) + { + return false; + } + else + { + expStack.pop(); + } + } + } + return expStack.empty(); +} + +inline std::vector ExtractMeasurements(const std::string& exp) +{ + std::vector numbers; + bool inArray = false; + std::string numberString; + for (size_t i = 0; i < exp.size(); ++i) + { + if (exp[i] == '[') + { + inArray = true; + } + else if (exp[i] == ']' && inArray) + { + try + { + boost::trim_if(numberString, boost::is_any_of("\t,\n")); + numbers.push_back(std::stod(numberString)); + } + catch (std::invalid_argument const& e) + { + BOOST_FAIL("Could not convert measurements to double: " + numberString); + } + + numberString.clear(); + inArray = false; + } + else if (exp[i] == ',' && inArray) + { + try + { + boost::trim_if(numberString, boost::is_any_of("\t,\n")); + numbers.push_back(std::stod(numberString)); + } + catch (std::invalid_argument const& e) + { + BOOST_FAIL("Could not convert measurements to double: " + numberString); + } + numberString.clear(); + } + else if (exp[i] != '[' && inArray && exp[i] != ',' && exp[i] != ' ') + { + numberString += exp[i]; + } + } + return numbers; +} + +inline std::vector ExtractSections(const std::string& exp) +{ + std::vector sections; + + std::stack s; + for (size_t i = 0; i < exp.size(); i++) + { + if (exp.at(i) == '{') + { + s.push(i); + } + else if (exp.at(i) == '}') + { + size_t from = s.top(); + s.pop(); + sections.push_back(exp.substr(from, i - from + 1)); + } + } + + return sections; +} + +inline std::string SoftmaxProfilerTestSetupHelper(const std::vector& backends) +{ + using namespace armnn; + + BOOST_CHECK(!backends.empty()); + + ProfilerManager& profilerManager = armnn::ProfilerManager::GetInstance(); + + // Create runtime in which test will run + IRuntime::CreationOptions options; + options.m_EnableGpuProfiling = backends.front() == armnn::Compute::GpuAcc; + IRuntimePtr runtime(IRuntime::Create(options)); + + // build up the structure of the network + INetworkPtr net(INetwork::Create()); + + IConnectableLayer* input = net->AddInputLayer(0, "input"); + IConnectableLayer* softmax = net->AddSoftmaxLayer(SoftmaxDescriptor(), "softmax"); + IConnectableLayer* output = net->AddOutputLayer(0, "output"); + + input->GetOutputSlot(0).Connect(softmax->GetInputSlot(0)); + softmax->GetOutputSlot(0).Connect(output->GetInputSlot(0)); + + // set the tensors in the network + TensorInfo inputTensorInfo(TensorShape({1, 5}), DataType::QuantisedAsymm8); + inputTensorInfo.SetQuantizationOffset(100); + inputTensorInfo.SetQuantizationScale(10000.0f); + input->GetOutputSlot(0).SetTensorInfo(inputTensorInfo); + + TensorInfo outputTensorInfo(TensorShape({1, 5}), DataType::QuantisedAsymm8); + outputTensorInfo.SetQuantizationOffset(0); + outputTensorInfo.SetQuantizationScale(1.0f / 256.0f); + softmax->GetOutputSlot(0).SetTensorInfo(outputTensorInfo); + + // optimize the network + IOptimizedNetworkPtr optNet = Optimize(*net, backends, runtime->GetDeviceSpec()); + if(!optNet) + { + BOOST_FAIL("Error occurred during Optimization, Optimize() returned nullptr."); + } + // load it into the runtime + NetworkId netId; + auto error = runtime->LoadNetwork(netId, std::move(optNet)); + BOOST_TEST(error == Status::Success); + + // create structures for input & output + std::vector inputData + { + 1, 10, 3, 200, 5 + // one of inputs is sufficiently larger than the others to saturate softmax + }; + std::vector outputData(5); + + armnn::InputTensors inputTensors + { + {0, armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 0), inputData.data())} + }; + armnn::OutputTensors outputTensors + { + {0, armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), outputData.data())} + }; + + runtime->GetProfiler(netId)->EnableProfiling(true); + + // do the inferences + runtime->EnqueueWorkload(netId, inputTensors, outputTensors); + runtime->EnqueueWorkload(netId, inputTensors, outputTensors); + runtime->EnqueueWorkload(netId, inputTensors, outputTensors); + + // retrieve the Profiler.Print() output + std::stringstream ss; + profilerManager.GetProfiler()->Print(ss); + + return ss.str(); +} + +inline void SoftmaxProfilerTestValidationHelper(std::string& result, const std::string& testData) +{ + // ensure all measurements are greater than zero + std::vector measurementsVector = ExtractMeasurements(result); + BOOST_CHECK(!measurementsVector.empty()); + + // check sections contain raw and unit tags + // first ensure Parenthesis are balanced + if (AreParenthesesMatching(result)) + { + // remove parent sections that will not have raw or unit tag + std::vector sectionVector = ExtractSections(result); + for (size_t i = 0; i < sectionVector.size(); ++i) + { + if (boost::contains(sectionVector[i], "\"ArmNN\":") + || boost::contains(sectionVector[i], "\"inference_measurements\":")) + { + sectionVector.erase(sectionVector.begin() + static_cast(i)); + } + } + BOOST_CHECK(!sectionVector.empty()); + + BOOST_CHECK(std::all_of(sectionVector.begin(), sectionVector.end(), + [](std::string i) { return boost::contains(i, "\"raw\":"); })); + + BOOST_CHECK(std::all_of(sectionVector.begin(), sectionVector.end(), + [](std::string i) { return boost::contains(i, "\"unit\":"); })); + } + + // remove the time measurements as they vary from test to test + result.erase(std::remove_if (result.begin(),result.end(), + [](char c) { return c == '.'; }), result.end()); + result.erase(std::remove_if (result.begin(), result.end(), &isdigit), result.end()); + result.erase(std::remove_if (result.begin(),result.end(), + [](char c) { return c == '\t'; }), result.end()); + + BOOST_CHECK(boost::contains(result, "ArmNN")); + BOOST_CHECK(boost::contains(result, "inference_measurements")); + BOOST_CHECK(boost::contains(result, "layer_measurements")); + BOOST_CHECK_EQUAL(result, testData); + + // ensure no spare parenthesis present in print output + BOOST_CHECK(AreParenthesesMatching(result)); +} + +inline void SetupSoftmaxProfilerWithSpecifiedBackendsAndValidateJsonPrinterResult( + const std::vector& backends) +{ + // setup the test fixture and obtain JSON Printer result + std::string result = SoftmaxProfilerTestSetupHelper(backends); + + std::string backend = "Ref"; + std::string changeLine31 = "\n},\n\"CopyMemGeneric_Execute\": {"; + std::string changeLine39 = "us\""; + std::string changeLine40; + std::string changeLine45; + + if (backends[0] == armnn::Compute::GpuAcc) { + backend = "Cl"; + changeLine31 = ",\n\"OpenClKernelTimer/: softmax_layer_max_shift_exp_sum_quantized_serial GWS[,,]\": {"; + changeLine39 = R"(us" +}, +"OpenClKernelTimer/: softmax_layer_norm_quantized GWS[,,]": { +"raw": [ +, +, + +], +"unit": "us")"; + + changeLine40 = R"( +}, +"CopyMemGeneric_Execute": { +"raw": [ +, +, + +], +"unit": "us")"; + changeLine45 = "}\n"; + } + else if (backends[0] == armnn::Compute::CpuAcc) + { + backend = "Neon"; + changeLine31 = ",\n\"NeonKernelTimer/: NEFillBorderKernel\": {"; + changeLine39 = R"(us" +}, +"NeonKernelTimer/: NELogitsDMaxKernel": { +"raw": [ +, +, + +], +"unit": "us" +}, +"NeonKernelTimer/: NELogitsDSoftmaxKernel": { +"raw": [ +, +, + +], +"unit": "us")"; + changeLine40 = R"( +}, +"CopyMemGeneric_Execute": { +"raw": [ +, +, + +], +"unit": "us")"; + changeLine45 = "}\n"; + } + + std::string testData = R"({ +"ArmNN": { +"inference_measurements": { +"raw": [ +, +, + +], +"unit": "us", +"layer_measurements": { +"raw": [ +, +, + +], +"unit": "us", +"CopyMemGeneric_Execute": { +"raw": [ +, +, + +], +"unit": "us" +}, +")" + backend + R"(SoftmaxUintWorkload_Execute": { +"raw": [ +, +, + +], +"unit": "us")" + changeLine31 + R"( +"raw": [ +, +, + +], +"unit": ")" + changeLine39 + R"( +})" + changeLine40 + R"( +} +} +} +} +)" + changeLine45 + R"()"; + + // validate the JSON Printer result + SoftmaxProfilerTestValidationHelper(result, testData); +} diff --git a/src/backends/test/OptimizedNetworkTests.cpp b/src/backends/test/OptimizedNetworkTests.cpp new file mode 100644 index 0000000000..72a35f99e0 --- /dev/null +++ b/src/backends/test/OptimizedNetworkTests.cpp @@ -0,0 +1,329 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include +#include +#include + +#include + +#include + +BOOST_AUTO_TEST_SUITE(OptimizedNetwork) + +BOOST_AUTO_TEST_CASE(SerializeToDot) +{ + armnn::Network net; + + //Defines layers. + auto input = net.AddInputLayer(0); + auto add = net.AddAdditionLayer(); + auto output = net.AddOutputLayer(0); + + // Connects layers. + input->GetOutputSlot(0).Connect(add->GetInputSlot(0)); + input->GetOutputSlot(0).Connect(add->GetInputSlot(1)); + add->GetOutputSlot(0).Connect(output->GetInputSlot(0)); + + armnn::TensorShape shape({4}); + armnn::TensorInfo info(shape, armnn::DataType::Float32); + input->GetOutputSlot(0).SetTensorInfo(info); + add->GetOutputSlot(0).SetTensorInfo(info); + + armnn::IRuntime::CreationOptions options; + armnn::IRuntimePtr runtime(armnn::IRuntime::Create(options)); + + std::vector backends = {armnn::Compute::CpuRef}; + armnn::IOptimizedNetworkPtr optimizedNet = armnn::Optimize(net, backends, runtime->GetDeviceSpec()); + + std::ostringstream ss; + optimizedNet->SerializeToDot(ss); + + auto inputId = input->GetGuid(); + auto addId = add->GetGuid(); + auto outputId = output->GetGuid(); + + std::stringstream expected; + expected << + "digraph Optimized {\n" + " node [shape=\"record\"];\n" + " edge [fontsize=8 fontcolor=\"blue\" fontname=\"arial-bold\"];\n" + " " << inputId << " [label=\"{Input}\"];\n" + " " << addId << " [label=\"{Addition}\"];\n" + " " << outputId << " [label=\"{Output}\"];\n" + " " << inputId << " -> " << addId << " [label=< [4] >];\n" + " " << inputId << " -> " << addId << " [label=< [4] >];\n" + " " << addId << " -> " << outputId << " [label=< [4] >];\n" + "}\n"; + + BOOST_TEST(ss.str() == expected.str()); +} + +BOOST_AUTO_TEST_CASE(OptimizeValidateDeviceNonSupportLayerNoFallback) +{ + // build up the structure of the network + armnn::INetworkPtr net(armnn::INetwork::Create()); + + armnn::IConnectableLayer* input = net->AddInputLayer(0); + + // This layer configuration isn't supported by CpuAcc and isn't allowed to fall back, so Optimize will return null. + armnn::NormalizationDescriptor descriptor; + armnn::IConnectableLayer* normalize = net->AddNormalizationLayer(descriptor); + + armnn::IConnectableLayer* output = net->AddOutputLayer(0); + + input->GetOutputSlot(0).Connect(normalize->GetInputSlot(0)); + normalize->GetOutputSlot(0).Connect(output->GetInputSlot(0)); + + input->GetOutputSlot(0).SetTensorInfo(armnn::TensorInfo({ 1, 1, 4, 4 }, armnn::DataType::Float32)); + normalize->GetOutputSlot(0).SetTensorInfo(armnn::TensorInfo({ 1, 1, 4, 4 }, armnn::DataType::Float32)); + + armnn::IRuntime::CreationOptions options; + armnn::IRuntimePtr runtime(armnn::IRuntime::Create(options)); + + std::vector backends = { armnn::Compute::CpuAcc }; + armnn::IOptimizedNetworkPtr optNet = armnn::Optimize(*net, backends, runtime->GetDeviceSpec()); + BOOST_CHECK(!optNet); +} + +BOOST_AUTO_TEST_CASE(OptimizeValidateDeviceNonSupportLayerWithFallback) +{ + // build up the structure of the network + armnn::INetworkPtr net(armnn::INetwork::Create()); + + armnn::IConnectableLayer* input = net->AddInputLayer(0); + + // This layer configuration isn't supported by CpuAcc but it allows to fallback to CpuRef. + armnn::NormalizationDescriptor descriptor; + armnn::IConnectableLayer* normalize = net->AddNormalizationLayer(descriptor); + + armnn::IConnectableLayer* output = net->AddOutputLayer(0); + + input->GetOutputSlot(0).Connect(normalize->GetInputSlot(0)); + normalize->GetOutputSlot(0).Connect(output->GetInputSlot(0)); + + input->GetOutputSlot(0).SetTensorInfo(armnn::TensorInfo({ 1, 1, 4, 4 }, armnn::DataType::Float32)); + normalize->GetOutputSlot(0).SetTensorInfo(armnn::TensorInfo({ 1, 1, 4, 4 }, armnn::DataType::Float32)); + + armnn::IRuntime::CreationOptions options; + armnn::IRuntimePtr runtime(armnn::IRuntime::Create(options)); + + std::vector backends = { armnn::Compute::CpuAcc, armnn::Compute::CpuRef }; + armnn::IOptimizedNetworkPtr optNet = armnn::Optimize(*net, backends, runtime->GetDeviceSpec()); + BOOST_REQUIRE(optNet); + + for (auto&& layer : static_cast(optNet.get())->GetGraph()) + { + // If NEON is enabled, Input and Output layers are supported by CpuAcc, + // the other layers are supported by CpuRef. + // If NEON is not enabled, all layers are supported by CpuRef. +#if ARMCOMPUTENEON_ENABLED + if (layer->GetType() == armnn::LayerType::Input || layer->GetType() == armnn::LayerType::Output) + { + BOOST_CHECK(layer->GetBackendId() == armnn::Compute::CpuAcc); + } + else if (layer->GetType() == armnn::LayerType::Normalization) + { + BOOST_CHECK(layer->GetBackendId() == armnn::Compute::CpuRef); + } +#else + BOOST_CHECK(layer->GetBackendId() == armnn::Compute::CpuRef); +#endif + } +} + +BOOST_AUTO_TEST_CASE(OptimizeValidateWorkloadsUndefinedComputeDevice) +{ + const armnn::TensorInfo desc({3, 5}, armnn::DataType::Float32); + + armnn::Network net; + + armnn::NormalizationDescriptor nmDesc; + armnn::ActivationDescriptor acDesc; + + // in + // | + // nm + // / | + // ac | + // \ | + // ml + // | + // sm + // | + // ot + armnn::IConnectableLayer* layer = net.AddInputLayer(0, "in"); + layer->GetOutputSlot(0).SetTensorInfo(desc); + + armnn::IConnectableLayer* const normLayer = net.AddNormalizationLayer(nmDesc, "nm"); + + layer->GetOutputSlot(0).Connect(normLayer->GetInputSlot(0)); + normLayer->GetOutputSlot(0).SetTensorInfo(desc); + + layer = net.AddActivationLayer(acDesc, "ac"); + + normLayer->GetOutputSlot(0).Connect(layer->GetInputSlot(0)); + layer->GetOutputSlot(0).SetTensorInfo(desc); + + armnn::IConnectableLayer* prevLayer = layer; + layer = net.AddMultiplicationLayer("ml"); + + prevLayer->GetOutputSlot(0).Connect(layer->GetInputSlot(0)); + normLayer->GetOutputSlot(0).Connect(layer->GetInputSlot(1)); + layer->GetOutputSlot(0).SetTensorInfo(desc); + + prevLayer = layer; + armnn::SoftmaxDescriptor softmaxDescriptor; + layer = net.AddSoftmaxLayer(softmaxDescriptor, "sm"); + + prevLayer->GetOutputSlot(0).Connect(layer->GetInputSlot(0)); + layer->GetOutputSlot(0).SetTensorInfo(desc); + + prevLayer = layer; + layer = net.AddOutputLayer(0, "ot"); + + prevLayer->GetOutputSlot(0).Connect(layer->GetInputSlot(0)); + + armnn::IRuntime::CreationOptions options; + armnn::IRuntimePtr runtime(armnn::IRuntime::Create(options)); + + std::vector backends = { armnn::Compute::Undefined }; + + armnn::IOptimizedNetworkPtr optNet = armnn::Optimize(net, backends, runtime->GetDeviceSpec()); + BOOST_CHECK(!optNet); + +} + +BOOST_AUTO_TEST_CASE(OptimizeValidateWorkloadsUndefinedComputeDeviceWithFallback) +{ + const armnn::TensorInfo desc({3, 5}, armnn::DataType::Float32); + + armnn::Network net; + + armnn::NormalizationDescriptor nmDesc; + armnn::ActivationDescriptor acDesc; + + // in + // | + // nm + // / | + // ac | + // \ | + // ml + // | + // sm + // | + // ot + armnn::IConnectableLayer* layer = net.AddInputLayer(0, "in"); + layer->GetOutputSlot(0).SetTensorInfo(desc); + + armnn::IConnectableLayer* const normLayer = net.AddNormalizationLayer(nmDesc, "nm"); + + layer->GetOutputSlot(0).Connect(normLayer->GetInputSlot(0)); + normLayer->GetOutputSlot(0).SetTensorInfo(desc); + + layer = net.AddActivationLayer(acDesc, "ac"); + + normLayer->GetOutputSlot(0).Connect(layer->GetInputSlot(0)); + layer->GetOutputSlot(0).SetTensorInfo(desc); + + armnn::IConnectableLayer* prevLayer = layer; + layer = net.AddMultiplicationLayer("ml"); + + prevLayer->GetOutputSlot(0).Connect(layer->GetInputSlot(0)); + normLayer->GetOutputSlot(0).Connect(layer->GetInputSlot(1)); + layer->GetOutputSlot(0).SetTensorInfo(desc); + + prevLayer = layer; + armnn::SoftmaxDescriptor softmaxDescriptor; + layer = net.AddSoftmaxLayer(softmaxDescriptor, "sm"); + + prevLayer->GetOutputSlot(0).Connect(layer->GetInputSlot(0)); + layer->GetOutputSlot(0).SetTensorInfo(desc); + + prevLayer = layer; + layer = net.AddOutputLayer(0, "ot"); + + prevLayer->GetOutputSlot(0).Connect(layer->GetInputSlot(0)); + + armnn::IRuntime::CreationOptions options; + armnn::IRuntimePtr runtime(armnn::IRuntime::Create(options)); + + std::vector backends = { armnn::Compute::Undefined, armnn::Compute::CpuRef }; + + armnn::IOptimizedNetworkPtr optNet = armnn::Optimize(net, backends, runtime->GetDeviceSpec()); + BOOST_CHECK(optNet); + + // validate workloads + armnn::RefWorkloadFactory fact; + for (auto&& layer : static_cast(optNet.get())->GetGraph()) + { + BOOST_CHECK(layer->GetBackendId() == armnn::Compute::CpuRef); + BOOST_CHECK_NO_THROW( + layer->CreateWorkload(static_cast(optNet.get())->GetGraph(), fact)); + } +} + +BOOST_AUTO_TEST_CASE(OptimizeValidateWorkloadsDuplicateComputeDeviceWithFallback) +{ + // build up the structure of the network + armnn::INetworkPtr net(armnn::INetwork::Create()); + + armnn::IConnectableLayer* input = net->AddInputLayer(0); + + // This layer configuration isn't supported by CpuAcc but it allows to fallback to CpuRef. + armnn::NormalizationDescriptor descriptor; + armnn::IConnectableLayer* normalize = net->AddNormalizationLayer(descriptor); + + armnn::IConnectableLayer* output = net->AddOutputLayer(0); + + input->GetOutputSlot(0).Connect(normalize->GetInputSlot(0)); + normalize->GetOutputSlot(0).Connect(output->GetInputSlot(0)); + + input->GetOutputSlot(0).SetTensorInfo(armnn::TensorInfo({ 1, 1, 4, 4 }, armnn::DataType::Float32)); + normalize->GetOutputSlot(0).SetTensorInfo(armnn::TensorInfo({ 1, 1, 4, 4 }, armnn::DataType::Float32)); + + armnn::IRuntime::CreationOptions options; + armnn::IRuntimePtr runtime(armnn::IRuntime::Create(options)); + + std::vector backends = { armnn::Compute::CpuAcc, + armnn::Compute::GpuAcc, + armnn::Compute::CpuRef }; + + armnn::IOptimizedNetworkPtr optNet = armnn::Optimize(*net, backends, runtime->GetDeviceSpec()); + BOOST_REQUIRE(optNet); + + for (auto&& layer : static_cast(optNet.get())->GetGraph()) + { + // If NEON is enabled, Input and Output layers are supported by CpuAcc, + // the other layers are supported by CpuRef. + // If only CL is enabled, Input and Output layers are supported by GpuAcc, + // the other layers are supported by CpuRef. + // If neither NEON, nor CL is enabled, all layers are supported by CpuRef. +#if ARMCOMPUTENEON_ENABLED + if (layer->GetType() == armnn::LayerType::Input || layer->GetType() == armnn::LayerType::Output) + { + BOOST_CHECK(layer->GetBackendId() == armnn::Compute::CpuAcc); + } + else if (layer->GetType() == armnn::LayerType::Normalization) + { + BOOST_CHECK(layer->GetBackendId() == armnn::Compute::CpuRef); + } +#elif ARMCOMPUTECL_ENABLED + if (layer->GetType() == armnn::LayerType::Input || layer->GetType() == armnn::LayerType::Output) + { + BOOST_CHECK(layer->GetBackendId() == armnn::Compute::GpuAcc); + } + else if (layer->GetType() == armnn::LayerType::Normalization) + { + BOOST_CHECK(layer->GetBackendId() == armnn::Compute::CpuRef); + } +#else + BOOST_CHECK(layer->GetBackendId() == armnn::Compute::CpuRef); +#endif + } +} + +BOOST_AUTO_TEST_SUITE_END() -- cgit v1.2.1