From 70104000ddcf3bc1a1d21f16d1468456ca17b80a Mon Sep 17 00:00:00 2001 From: Aron Virginas-Tar Date: Wed, 24 Oct 2018 15:33:28 +0100 Subject: IVGCVSW-2073: Move remaining backend-specific tests from armnn to backends Change-Id: I45fd5b6dd32c435b78a54dc377a623e60978ce13 --- src/backends/test/EndToEndTestImpl.hpp | 102 ++++++++ src/backends/test/JsonPrinterTestImpl.hpp | 354 ++++++++++++++++++++++++++++ src/backends/test/OptimizedNetworkTests.cpp | 329 ++++++++++++++++++++++++++ 3 files changed, 785 insertions(+) create mode 100644 src/backends/test/EndToEndTestImpl.hpp create mode 100644 src/backends/test/JsonPrinterTestImpl.hpp create mode 100644 src/backends/test/OptimizedNetworkTests.cpp (limited to 'src/backends/test') diff --git a/src/backends/test/EndToEndTestImpl.hpp b/src/backends/test/EndToEndTestImpl.hpp new file mode 100644 index 0000000000..5f17f782f3 --- /dev/null +++ b/src/backends/test/EndToEndTestImpl.hpp @@ -0,0 +1,102 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// +#pragma once + +#include + +#include + +#include + +namespace +{ + +using namespace armnn; + +template +bool ConstantUsageTest(const std::vector& computeDevice, + const TensorInfo& commonTensorInfo, + const std::vector& inputData, + const std::vector& constantData, + const std::vector& expectedOutputData) +{ + // Create runtime in which test will run + IRuntime::CreationOptions options; + IRuntimePtr runtime(IRuntime::Create(options)); + + // Builds up the structure of the network. + INetworkPtr net(INetwork::Create()); + + IConnectableLayer* input = net->AddInputLayer(0); + IConnectableLayer* constant = net->AddConstantLayer(ConstTensor(commonTensorInfo, constantData)); + IConnectableLayer* add = net->AddAdditionLayer(); + IConnectableLayer* output = net->AddOutputLayer(0); + + input->GetOutputSlot(0).Connect(add->GetInputSlot(0)); + constant->GetOutputSlot(0).Connect(add->GetInputSlot(1)); + add->GetOutputSlot(0).Connect(output->GetInputSlot(0)); + + // Sets the tensors in the network. + input->GetOutputSlot(0).SetTensorInfo(commonTensorInfo); + constant->GetOutputSlot(0).SetTensorInfo(commonTensorInfo); + add->GetOutputSlot(0).SetTensorInfo(commonTensorInfo); + + // optimize the network + IOptimizedNetworkPtr optNet = Optimize(*net, computeDevice, runtime->GetDeviceSpec()); + + // Loads it into the runtime. + NetworkId netId; + runtime->LoadNetwork(netId, std::move(optNet)); + + // Creates structures for input & output. + std::vector outputData(inputData.size()); + + InputTensors inputTensors + { + {0, ConstTensor(runtime->GetInputTensorInfo(netId, 0), inputData.data())} + }; + OutputTensors outputTensors + { + {0, Tensor(runtime->GetOutputTensorInfo(netId, 0), outputData.data())} + }; + + // Does the inference. + runtime->EnqueueWorkload(netId, inputTensors, outputTensors); + + // Checks the results. + return outputData == expectedOutputData; +} + +inline bool ConstantUsageFloat32Test(const std::vector& backends) +{ + const TensorInfo commonTensorInfo({ 2, 3 }, DataType::Float32); + + return ConstantUsageTest(backends, + commonTensorInfo, + std::vector{ 1.f, 2.f, 3.f, 4.f, 5.f, 6.f }, // Input. + std::vector{ 6.f, 5.f, 4.f, 3.f, 2.f, 1.f }, // Const input. + std::vector{ 7.f, 7.f, 7.f, 7.f, 7.f, 7.f } // Expected output. + ); +} + +inline bool ConstantUsageUint8Test(const std::vector& backends) +{ + TensorInfo commonTensorInfo({ 2, 3 }, DataType::QuantisedAsymm8); + + const float scale = 0.023529f; + const int8_t offset = -43; + + commonTensorInfo.SetQuantizationScale(scale); + commonTensorInfo.SetQuantizationOffset(offset); + + return ConstantUsageTest(backends, + commonTensorInfo, + QuantizedVector(scale, offset, { 1.f, 2.f, 3.f, 4.f, 5.f, 6.f }), // Input. + QuantizedVector(scale, offset, { 6.f, 5.f, 4.f, 3.f, 2.f, 1.f }), // Const input. + QuantizedVector(scale, offset, { 7.f, 7.f, 7.f, 7.f, 7.f, 7.f }) // Expected output. + ); +} + +} // anonymous namespace \ No newline at end of file diff --git a/src/backends/test/JsonPrinterTestImpl.hpp b/src/backends/test/JsonPrinterTestImpl.hpp new file mode 100644 index 0000000000..47e0ec761b --- /dev/null +++ b/src/backends/test/JsonPrinterTestImpl.hpp @@ -0,0 +1,354 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include +#include +#include +#include + +#include +#include +#include + +#include +#include +#include +#include + +inline bool AreMatchingPair(const char opening, const char closing) +{ + return (opening == '{' && closing == '}') || (opening == '[' && closing == ']'); +} + +inline bool AreParenthesesMatching(const std::string& exp) +{ + std::stack expStack; + for (size_t i = 0; i < exp.length(); ++i) + { + if (exp[i] == '{' || exp[i] == '[') + { + expStack.push(exp[i]); + } + else if (exp[i] == '}' || exp[i] == ']') + { + if (expStack.empty() || !AreMatchingPair(expStack.top(), exp[i])) + { + return false; + } + else + { + expStack.pop(); + } + } + } + return expStack.empty(); +} + +inline std::vector ExtractMeasurements(const std::string& exp) +{ + std::vector numbers; + bool inArray = false; + std::string numberString; + for (size_t i = 0; i < exp.size(); ++i) + { + if (exp[i] == '[') + { + inArray = true; + } + else if (exp[i] == ']' && inArray) + { + try + { + boost::trim_if(numberString, boost::is_any_of("\t,\n")); + numbers.push_back(std::stod(numberString)); + } + catch (std::invalid_argument const& e) + { + BOOST_FAIL("Could not convert measurements to double: " + numberString); + } + + numberString.clear(); + inArray = false; + } + else if (exp[i] == ',' && inArray) + { + try + { + boost::trim_if(numberString, boost::is_any_of("\t,\n")); + numbers.push_back(std::stod(numberString)); + } + catch (std::invalid_argument const& e) + { + BOOST_FAIL("Could not convert measurements to double: " + numberString); + } + numberString.clear(); + } + else if (exp[i] != '[' && inArray && exp[i] != ',' && exp[i] != ' ') + { + numberString += exp[i]; + } + } + return numbers; +} + +inline std::vector ExtractSections(const std::string& exp) +{ + std::vector sections; + + std::stack s; + for (size_t i = 0; i < exp.size(); i++) + { + if (exp.at(i) == '{') + { + s.push(i); + } + else if (exp.at(i) == '}') + { + size_t from = s.top(); + s.pop(); + sections.push_back(exp.substr(from, i - from + 1)); + } + } + + return sections; +} + +inline std::string SoftmaxProfilerTestSetupHelper(const std::vector& backends) +{ + using namespace armnn; + + BOOST_CHECK(!backends.empty()); + + ProfilerManager& profilerManager = armnn::ProfilerManager::GetInstance(); + + // Create runtime in which test will run + IRuntime::CreationOptions options; + options.m_EnableGpuProfiling = backends.front() == armnn::Compute::GpuAcc; + IRuntimePtr runtime(IRuntime::Create(options)); + + // build up the structure of the network + INetworkPtr net(INetwork::Create()); + + IConnectableLayer* input = net->AddInputLayer(0, "input"); + IConnectableLayer* softmax = net->AddSoftmaxLayer(SoftmaxDescriptor(), "softmax"); + IConnectableLayer* output = net->AddOutputLayer(0, "output"); + + input->GetOutputSlot(0).Connect(softmax->GetInputSlot(0)); + softmax->GetOutputSlot(0).Connect(output->GetInputSlot(0)); + + // set the tensors in the network + TensorInfo inputTensorInfo(TensorShape({1, 5}), DataType::QuantisedAsymm8); + inputTensorInfo.SetQuantizationOffset(100); + inputTensorInfo.SetQuantizationScale(10000.0f); + input->GetOutputSlot(0).SetTensorInfo(inputTensorInfo); + + TensorInfo outputTensorInfo(TensorShape({1, 5}), DataType::QuantisedAsymm8); + outputTensorInfo.SetQuantizationOffset(0); + outputTensorInfo.SetQuantizationScale(1.0f / 256.0f); + softmax->GetOutputSlot(0).SetTensorInfo(outputTensorInfo); + + // optimize the network + IOptimizedNetworkPtr optNet = Optimize(*net, backends, runtime->GetDeviceSpec()); + if(!optNet) + { + BOOST_FAIL("Error occurred during Optimization, Optimize() returned nullptr."); + } + // load it into the runtime + NetworkId netId; + auto error = runtime->LoadNetwork(netId, std::move(optNet)); + BOOST_TEST(error == Status::Success); + + // create structures for input & output + std::vector inputData + { + 1, 10, 3, 200, 5 + // one of inputs is sufficiently larger than the others to saturate softmax + }; + std::vector outputData(5); + + armnn::InputTensors inputTensors + { + {0, armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 0), inputData.data())} + }; + armnn::OutputTensors outputTensors + { + {0, armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), outputData.data())} + }; + + runtime->GetProfiler(netId)->EnableProfiling(true); + + // do the inferences + runtime->EnqueueWorkload(netId, inputTensors, outputTensors); + runtime->EnqueueWorkload(netId, inputTensors, outputTensors); + runtime->EnqueueWorkload(netId, inputTensors, outputTensors); + + // retrieve the Profiler.Print() output + std::stringstream ss; + profilerManager.GetProfiler()->Print(ss); + + return ss.str(); +} + +inline void SoftmaxProfilerTestValidationHelper(std::string& result, const std::string& testData) +{ + // ensure all measurements are greater than zero + std::vector measurementsVector = ExtractMeasurements(result); + BOOST_CHECK(!measurementsVector.empty()); + + // check sections contain raw and unit tags + // first ensure Parenthesis are balanced + if (AreParenthesesMatching(result)) + { + // remove parent sections that will not have raw or unit tag + std::vector sectionVector = ExtractSections(result); + for (size_t i = 0; i < sectionVector.size(); ++i) + { + if (boost::contains(sectionVector[i], "\"ArmNN\":") + || boost::contains(sectionVector[i], "\"inference_measurements\":")) + { + sectionVector.erase(sectionVector.begin() + static_cast(i)); + } + } + BOOST_CHECK(!sectionVector.empty()); + + BOOST_CHECK(std::all_of(sectionVector.begin(), sectionVector.end(), + [](std::string i) { return boost::contains(i, "\"raw\":"); })); + + BOOST_CHECK(std::all_of(sectionVector.begin(), sectionVector.end(), + [](std::string i) { return boost::contains(i, "\"unit\":"); })); + } + + // remove the time measurements as they vary from test to test + result.erase(std::remove_if (result.begin(),result.end(), + [](char c) { return c == '.'; }), result.end()); + result.erase(std::remove_if (result.begin(), result.end(), &isdigit), result.end()); + result.erase(std::remove_if (result.begin(),result.end(), + [](char c) { return c == '\t'; }), result.end()); + + BOOST_CHECK(boost::contains(result, "ArmNN")); + BOOST_CHECK(boost::contains(result, "inference_measurements")); + BOOST_CHECK(boost::contains(result, "layer_measurements")); + BOOST_CHECK_EQUAL(result, testData); + + // ensure no spare parenthesis present in print output + BOOST_CHECK(AreParenthesesMatching(result)); +} + +inline void SetupSoftmaxProfilerWithSpecifiedBackendsAndValidateJsonPrinterResult( + const std::vector& backends) +{ + // setup the test fixture and obtain JSON Printer result + std::string result = SoftmaxProfilerTestSetupHelper(backends); + + std::string backend = "Ref"; + std::string changeLine31 = "\n},\n\"CopyMemGeneric_Execute\": {"; + std::string changeLine39 = "us\""; + std::string changeLine40; + std::string changeLine45; + + if (backends[0] == armnn::Compute::GpuAcc) { + backend = "Cl"; + changeLine31 = ",\n\"OpenClKernelTimer/: softmax_layer_max_shift_exp_sum_quantized_serial GWS[,,]\": {"; + changeLine39 = R"(us" +}, +"OpenClKernelTimer/: softmax_layer_norm_quantized GWS[,,]": { +"raw": [ +, +, + +], +"unit": "us")"; + + changeLine40 = R"( +}, +"CopyMemGeneric_Execute": { +"raw": [ +, +, + +], +"unit": "us")"; + changeLine45 = "}\n"; + } + else if (backends[0] == armnn::Compute::CpuAcc) + { + backend = "Neon"; + changeLine31 = ",\n\"NeonKernelTimer/: NEFillBorderKernel\": {"; + changeLine39 = R"(us" +}, +"NeonKernelTimer/: NELogitsDMaxKernel": { +"raw": [ +, +, + +], +"unit": "us" +}, +"NeonKernelTimer/: NELogitsDSoftmaxKernel": { +"raw": [ +, +, + +], +"unit": "us")"; + changeLine40 = R"( +}, +"CopyMemGeneric_Execute": { +"raw": [ +, +, + +], +"unit": "us")"; + changeLine45 = "}\n"; + } + + std::string testData = R"({ +"ArmNN": { +"inference_measurements": { +"raw": [ +, +, + +], +"unit": "us", +"layer_measurements": { +"raw": [ +, +, + +], +"unit": "us", +"CopyMemGeneric_Execute": { +"raw": [ +, +, + +], +"unit": "us" +}, +")" + backend + R"(SoftmaxUintWorkload_Execute": { +"raw": [ +, +, + +], +"unit": "us")" + changeLine31 + R"( +"raw": [ +, +, + +], +"unit": ")" + changeLine39 + R"( +})" + changeLine40 + R"( +} +} +} +} +)" + changeLine45 + R"()"; + + // validate the JSON Printer result + SoftmaxProfilerTestValidationHelper(result, testData); +} diff --git a/src/backends/test/OptimizedNetworkTests.cpp b/src/backends/test/OptimizedNetworkTests.cpp new file mode 100644 index 0000000000..72a35f99e0 --- /dev/null +++ b/src/backends/test/OptimizedNetworkTests.cpp @@ -0,0 +1,329 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include +#include +#include + +#include + +#include + +BOOST_AUTO_TEST_SUITE(OptimizedNetwork) + +BOOST_AUTO_TEST_CASE(SerializeToDot) +{ + armnn::Network net; + + //Defines layers. + auto input = net.AddInputLayer(0); + auto add = net.AddAdditionLayer(); + auto output = net.AddOutputLayer(0); + + // Connects layers. + input->GetOutputSlot(0).Connect(add->GetInputSlot(0)); + input->GetOutputSlot(0).Connect(add->GetInputSlot(1)); + add->GetOutputSlot(0).Connect(output->GetInputSlot(0)); + + armnn::TensorShape shape({4}); + armnn::TensorInfo info(shape, armnn::DataType::Float32); + input->GetOutputSlot(0).SetTensorInfo(info); + add->GetOutputSlot(0).SetTensorInfo(info); + + armnn::IRuntime::CreationOptions options; + armnn::IRuntimePtr runtime(armnn::IRuntime::Create(options)); + + std::vector backends = {armnn::Compute::CpuRef}; + armnn::IOptimizedNetworkPtr optimizedNet = armnn::Optimize(net, backends, runtime->GetDeviceSpec()); + + std::ostringstream ss; + optimizedNet->SerializeToDot(ss); + + auto inputId = input->GetGuid(); + auto addId = add->GetGuid(); + auto outputId = output->GetGuid(); + + std::stringstream expected; + expected << + "digraph Optimized {\n" + " node [shape=\"record\"];\n" + " edge [fontsize=8 fontcolor=\"blue\" fontname=\"arial-bold\"];\n" + " " << inputId << " [label=\"{Input}\"];\n" + " " << addId << " [label=\"{Addition}\"];\n" + " " << outputId << " [label=\"{Output}\"];\n" + " " << inputId << " -> " << addId << " [label=< [4] >];\n" + " " << inputId << " -> " << addId << " [label=< [4] >];\n" + " " << addId << " -> " << outputId << " [label=< [4] >];\n" + "}\n"; + + BOOST_TEST(ss.str() == expected.str()); +} + +BOOST_AUTO_TEST_CASE(OptimizeValidateDeviceNonSupportLayerNoFallback) +{ + // build up the structure of the network + armnn::INetworkPtr net(armnn::INetwork::Create()); + + armnn::IConnectableLayer* input = net->AddInputLayer(0); + + // This layer configuration isn't supported by CpuAcc and isn't allowed to fall back, so Optimize will return null. + armnn::NormalizationDescriptor descriptor; + armnn::IConnectableLayer* normalize = net->AddNormalizationLayer(descriptor); + + armnn::IConnectableLayer* output = net->AddOutputLayer(0); + + input->GetOutputSlot(0).Connect(normalize->GetInputSlot(0)); + normalize->GetOutputSlot(0).Connect(output->GetInputSlot(0)); + + input->GetOutputSlot(0).SetTensorInfo(armnn::TensorInfo({ 1, 1, 4, 4 }, armnn::DataType::Float32)); + normalize->GetOutputSlot(0).SetTensorInfo(armnn::TensorInfo({ 1, 1, 4, 4 }, armnn::DataType::Float32)); + + armnn::IRuntime::CreationOptions options; + armnn::IRuntimePtr runtime(armnn::IRuntime::Create(options)); + + std::vector backends = { armnn::Compute::CpuAcc }; + armnn::IOptimizedNetworkPtr optNet = armnn::Optimize(*net, backends, runtime->GetDeviceSpec()); + BOOST_CHECK(!optNet); +} + +BOOST_AUTO_TEST_CASE(OptimizeValidateDeviceNonSupportLayerWithFallback) +{ + // build up the structure of the network + armnn::INetworkPtr net(armnn::INetwork::Create()); + + armnn::IConnectableLayer* input = net->AddInputLayer(0); + + // This layer configuration isn't supported by CpuAcc but it allows to fallback to CpuRef. + armnn::NormalizationDescriptor descriptor; + armnn::IConnectableLayer* normalize = net->AddNormalizationLayer(descriptor); + + armnn::IConnectableLayer* output = net->AddOutputLayer(0); + + input->GetOutputSlot(0).Connect(normalize->GetInputSlot(0)); + normalize->GetOutputSlot(0).Connect(output->GetInputSlot(0)); + + input->GetOutputSlot(0).SetTensorInfo(armnn::TensorInfo({ 1, 1, 4, 4 }, armnn::DataType::Float32)); + normalize->GetOutputSlot(0).SetTensorInfo(armnn::TensorInfo({ 1, 1, 4, 4 }, armnn::DataType::Float32)); + + armnn::IRuntime::CreationOptions options; + armnn::IRuntimePtr runtime(armnn::IRuntime::Create(options)); + + std::vector backends = { armnn::Compute::CpuAcc, armnn::Compute::CpuRef }; + armnn::IOptimizedNetworkPtr optNet = armnn::Optimize(*net, backends, runtime->GetDeviceSpec()); + BOOST_REQUIRE(optNet); + + for (auto&& layer : static_cast(optNet.get())->GetGraph()) + { + // If NEON is enabled, Input and Output layers are supported by CpuAcc, + // the other layers are supported by CpuRef. + // If NEON is not enabled, all layers are supported by CpuRef. +#if ARMCOMPUTENEON_ENABLED + if (layer->GetType() == armnn::LayerType::Input || layer->GetType() == armnn::LayerType::Output) + { + BOOST_CHECK(layer->GetBackendId() == armnn::Compute::CpuAcc); + } + else if (layer->GetType() == armnn::LayerType::Normalization) + { + BOOST_CHECK(layer->GetBackendId() == armnn::Compute::CpuRef); + } +#else + BOOST_CHECK(layer->GetBackendId() == armnn::Compute::CpuRef); +#endif + } +} + +BOOST_AUTO_TEST_CASE(OptimizeValidateWorkloadsUndefinedComputeDevice) +{ + const armnn::TensorInfo desc({3, 5}, armnn::DataType::Float32); + + armnn::Network net; + + armnn::NormalizationDescriptor nmDesc; + armnn::ActivationDescriptor acDesc; + + // in + // | + // nm + // / | + // ac | + // \ | + // ml + // | + // sm + // | + // ot + armnn::IConnectableLayer* layer = net.AddInputLayer(0, "in"); + layer->GetOutputSlot(0).SetTensorInfo(desc); + + armnn::IConnectableLayer* const normLayer = net.AddNormalizationLayer(nmDesc, "nm"); + + layer->GetOutputSlot(0).Connect(normLayer->GetInputSlot(0)); + normLayer->GetOutputSlot(0).SetTensorInfo(desc); + + layer = net.AddActivationLayer(acDesc, "ac"); + + normLayer->GetOutputSlot(0).Connect(layer->GetInputSlot(0)); + layer->GetOutputSlot(0).SetTensorInfo(desc); + + armnn::IConnectableLayer* prevLayer = layer; + layer = net.AddMultiplicationLayer("ml"); + + prevLayer->GetOutputSlot(0).Connect(layer->GetInputSlot(0)); + normLayer->GetOutputSlot(0).Connect(layer->GetInputSlot(1)); + layer->GetOutputSlot(0).SetTensorInfo(desc); + + prevLayer = layer; + armnn::SoftmaxDescriptor softmaxDescriptor; + layer = net.AddSoftmaxLayer(softmaxDescriptor, "sm"); + + prevLayer->GetOutputSlot(0).Connect(layer->GetInputSlot(0)); + layer->GetOutputSlot(0).SetTensorInfo(desc); + + prevLayer = layer; + layer = net.AddOutputLayer(0, "ot"); + + prevLayer->GetOutputSlot(0).Connect(layer->GetInputSlot(0)); + + armnn::IRuntime::CreationOptions options; + armnn::IRuntimePtr runtime(armnn::IRuntime::Create(options)); + + std::vector backends = { armnn::Compute::Undefined }; + + armnn::IOptimizedNetworkPtr optNet = armnn::Optimize(net, backends, runtime->GetDeviceSpec()); + BOOST_CHECK(!optNet); + +} + +BOOST_AUTO_TEST_CASE(OptimizeValidateWorkloadsUndefinedComputeDeviceWithFallback) +{ + const armnn::TensorInfo desc({3, 5}, armnn::DataType::Float32); + + armnn::Network net; + + armnn::NormalizationDescriptor nmDesc; + armnn::ActivationDescriptor acDesc; + + // in + // | + // nm + // / | + // ac | + // \ | + // ml + // | + // sm + // | + // ot + armnn::IConnectableLayer* layer = net.AddInputLayer(0, "in"); + layer->GetOutputSlot(0).SetTensorInfo(desc); + + armnn::IConnectableLayer* const normLayer = net.AddNormalizationLayer(nmDesc, "nm"); + + layer->GetOutputSlot(0).Connect(normLayer->GetInputSlot(0)); + normLayer->GetOutputSlot(0).SetTensorInfo(desc); + + layer = net.AddActivationLayer(acDesc, "ac"); + + normLayer->GetOutputSlot(0).Connect(layer->GetInputSlot(0)); + layer->GetOutputSlot(0).SetTensorInfo(desc); + + armnn::IConnectableLayer* prevLayer = layer; + layer = net.AddMultiplicationLayer("ml"); + + prevLayer->GetOutputSlot(0).Connect(layer->GetInputSlot(0)); + normLayer->GetOutputSlot(0).Connect(layer->GetInputSlot(1)); + layer->GetOutputSlot(0).SetTensorInfo(desc); + + prevLayer = layer; + armnn::SoftmaxDescriptor softmaxDescriptor; + layer = net.AddSoftmaxLayer(softmaxDescriptor, "sm"); + + prevLayer->GetOutputSlot(0).Connect(layer->GetInputSlot(0)); + layer->GetOutputSlot(0).SetTensorInfo(desc); + + prevLayer = layer; + layer = net.AddOutputLayer(0, "ot"); + + prevLayer->GetOutputSlot(0).Connect(layer->GetInputSlot(0)); + + armnn::IRuntime::CreationOptions options; + armnn::IRuntimePtr runtime(armnn::IRuntime::Create(options)); + + std::vector backends = { armnn::Compute::Undefined, armnn::Compute::CpuRef }; + + armnn::IOptimizedNetworkPtr optNet = armnn::Optimize(net, backends, runtime->GetDeviceSpec()); + BOOST_CHECK(optNet); + + // validate workloads + armnn::RefWorkloadFactory fact; + for (auto&& layer : static_cast(optNet.get())->GetGraph()) + { + BOOST_CHECK(layer->GetBackendId() == armnn::Compute::CpuRef); + BOOST_CHECK_NO_THROW( + layer->CreateWorkload(static_cast(optNet.get())->GetGraph(), fact)); + } +} + +BOOST_AUTO_TEST_CASE(OptimizeValidateWorkloadsDuplicateComputeDeviceWithFallback) +{ + // build up the structure of the network + armnn::INetworkPtr net(armnn::INetwork::Create()); + + armnn::IConnectableLayer* input = net->AddInputLayer(0); + + // This layer configuration isn't supported by CpuAcc but it allows to fallback to CpuRef. + armnn::NormalizationDescriptor descriptor; + armnn::IConnectableLayer* normalize = net->AddNormalizationLayer(descriptor); + + armnn::IConnectableLayer* output = net->AddOutputLayer(0); + + input->GetOutputSlot(0).Connect(normalize->GetInputSlot(0)); + normalize->GetOutputSlot(0).Connect(output->GetInputSlot(0)); + + input->GetOutputSlot(0).SetTensorInfo(armnn::TensorInfo({ 1, 1, 4, 4 }, armnn::DataType::Float32)); + normalize->GetOutputSlot(0).SetTensorInfo(armnn::TensorInfo({ 1, 1, 4, 4 }, armnn::DataType::Float32)); + + armnn::IRuntime::CreationOptions options; + armnn::IRuntimePtr runtime(armnn::IRuntime::Create(options)); + + std::vector backends = { armnn::Compute::CpuAcc, + armnn::Compute::GpuAcc, + armnn::Compute::CpuRef }; + + armnn::IOptimizedNetworkPtr optNet = armnn::Optimize(*net, backends, runtime->GetDeviceSpec()); + BOOST_REQUIRE(optNet); + + for (auto&& layer : static_cast(optNet.get())->GetGraph()) + { + // If NEON is enabled, Input and Output layers are supported by CpuAcc, + // the other layers are supported by CpuRef. + // If only CL is enabled, Input and Output layers are supported by GpuAcc, + // the other layers are supported by CpuRef. + // If neither NEON, nor CL is enabled, all layers are supported by CpuRef. +#if ARMCOMPUTENEON_ENABLED + if (layer->GetType() == armnn::LayerType::Input || layer->GetType() == armnn::LayerType::Output) + { + BOOST_CHECK(layer->GetBackendId() == armnn::Compute::CpuAcc); + } + else if (layer->GetType() == armnn::LayerType::Normalization) + { + BOOST_CHECK(layer->GetBackendId() == armnn::Compute::CpuRef); + } +#elif ARMCOMPUTECL_ENABLED + if (layer->GetType() == armnn::LayerType::Input || layer->GetType() == armnn::LayerType::Output) + { + BOOST_CHECK(layer->GetBackendId() == armnn::Compute::GpuAcc); + } + else if (layer->GetType() == armnn::LayerType::Normalization) + { + BOOST_CHECK(layer->GetBackendId() == armnn::Compute::CpuRef); + } +#else + BOOST_CHECK(layer->GetBackendId() == armnn::Compute::CpuRef); +#endif + } +} + +BOOST_AUTO_TEST_SUITE_END() -- cgit v1.2.1