3 files changed, 785 insertions, 0 deletions
diff --git a/src/backends/test/EndToEndTestImpl.hpp b/src/backends/test/EndToEndTestImpl.hpp
new file mode 100644
index 0000000000..5f17f782f3
--- /dev/null
+++ b/src/backends/test/EndToEndTestImpl.hpp
@@ -0,0 +1,102 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+#pragma once
+
+#include <armnn/ArmNN.hpp>
+
+#include <backends/test/QuantizeHelper.hpp>
+
+#include <vector>
+
+namespace
+{
+
+using namespace armnn;
+
+template<typename T>
+bool ConstantUsageTest(const std::vector<BackendId>& computeDevice,
+                       const TensorInfo& commonTensorInfo,
+                       const std::vector<T>& inputData,
+                       const std::vector<T>& constantData,
+                       const std::vector<T>& expectedOutputData)
+{
+    // Create runtime in which test will run
+    IRuntime::CreationOptions options;
+    IRuntimePtr runtime(IRuntime::Create(options));
+
+    // Builds up the structure of the network.
+    INetworkPtr net(INetwork::Create());
+
+    IConnectableLayer* input = net->AddInputLayer(0);
+    IConnectableLayer* constant = net->AddConstantLayer(ConstTensor(commonTensorInfo, constantData));
+    IConnectableLayer* add = net->AddAdditionLayer();
+    IConnectableLayer* output = net->AddOutputLayer(0);
+
+    input->GetOutputSlot(0).Connect(add->GetInputSlot(0));
+    constant->GetOutputSlot(0).Connect(add->GetInputSlot(1));
+    add->GetOutputSlot(0).Connect(output->GetInputSlot(0));
+
+    // Sets the tensors in the network.
+    input->GetOutputSlot(0).SetTensorInfo(commonTensorInfo);
+    constant->GetOutputSlot(0).SetTensorInfo(commonTensorInfo);
+    add->GetOutputSlot(0).SetTensorInfo(commonTensorInfo);
+
+    // optimize the network
+    IOptimizedNetworkPtr optNet = Optimize(*net, computeDevice, runtime->GetDeviceSpec());
+
+    // Loads it into the runtime.
+    NetworkId netId;
+    runtime->LoadNetwork(netId, std::move(optNet));
+
+    // Creates structures for input & output.
+    std::vector<T> outputData(inputData.size());
+
+    InputTensors inputTensors
+    {
+        {0, ConstTensor(runtime->GetInputTensorInfo(netId, 0), inputData.data())}
+    };
+    OutputTensors outputTensors
+    {
+        {0, Tensor(runtime->GetOutputTensorInfo(netId, 0), outputData.data())}
+    };
+
+    // Does the inference.
+    runtime->EnqueueWorkload(netId, inputTensors, outputTensors);
+
+    // Checks the results.
+    return outputData == expectedOutputData;
+}
+
+inline bool ConstantUsageFloat32Test(const std::vector<BackendId>& backends)
+{
+    const TensorInfo commonTensorInfo({ 2, 3 }, DataType::Float32);
+
+    return ConstantUsageTest(backends,
+        commonTensorInfo,
+        std::vector<float>{ 1.f, 2.f, 3.f, 4.f, 5.f, 6.f }, // Input.
+        std::vector<float>{ 6.f, 5.f, 4.f, 3.f, 2.f, 1.f }, // Const input.
+        std::vector<float>{ 7.f, 7.f, 7.f, 7.f, 7.f, 7.f }  // Expected output.
+    );
+}
+
+inline bool ConstantUsageUint8Test(const std::vector<BackendId>& backends)
+{
+    TensorInfo commonTensorInfo({ 2, 3 }, DataType::QuantisedAsymm8);
+
+    const float scale = 0.023529f;
+    const int8_t offset = -43;
+
+    commonTensorInfo.SetQuantizationScale(scale);
+    commonTensorInfo.SetQuantizationOffset(offset);
+
+    return ConstantUsageTest(backends,
+        commonTensorInfo,
+        QuantizedVector<uint8_t>(scale, offset, { 1.f, 2.f, 3.f, 4.f, 5.f, 6.f }), // Input.
+        QuantizedVector<uint8_t>(scale, offset, { 6.f, 5.f, 4.f, 3.f, 2.f, 1.f }), // Const input.
+        QuantizedVector<uint8_t>(scale, offset, { 7.f, 7.f, 7.f, 7.f, 7.f, 7.f })  // Expected output.
+    );
+}
+
+} // anonymous namespace
+\ No newline at end of file
diff --git a/src/backends/test/JsonPrinterTestImpl.hpp b/src/backends/test/JsonPrinterTestImpl.hpp
new file mode 100644
index 0000000000..47e0ec761b
--- /dev/null
+++ b/src/backends/test/JsonPrinterTestImpl.hpp
@@ -0,0 +1,354 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#include <armnn/Descriptors.hpp>
+#include <armnn/IRuntime.hpp>
+#include <armnn/INetwork.hpp>
+#include <armnn/Profiling.hpp>
+
+#include <boost/test/unit_test.hpp>
+#include <boost/algorithm/string.hpp>
+#include <boost/lexical_cast.hpp>
+
+#include <sstream>
+#include <stack>
+#include <string>
+#include <vector>
+
+inline bool AreMatchingPair(const char opening, const char closing)
+{
+    return (opening == '{' && closing == '}') || (opening == '[' && closing == ']');
+}
+
+inline bool AreParenthesesMatching(const std::string& exp)
+{
+    std::stack<char> expStack;
+    for (size_t i = 0; i < exp.length(); ++i)
+    {
+        if (exp[i] == '{' || exp[i] == '[')
+        {
+            expStack.push(exp[i]);
+        }
+        else if (exp[i] == '}' || exp[i] == ']')
+        {
+            if (expStack.empty() || !AreMatchingPair(expStack.top(), exp[i]))
+            {
+                return false;
+            }
+            else
+            {
+                expStack.pop();
+            }
+        }
+    }
+    return expStack.empty();
+}
+
+inline std::vector<double> ExtractMeasurements(const std::string& exp)
+{
+    std::vector<double> numbers;
+    bool inArray = false;
+    std::string numberString;
+    for (size_t i = 0; i < exp.size(); ++i)
+    {
+        if (exp[i] == '[')
+        {
+            inArray = true;
+        }
+        else if (exp[i] == ']' && inArray)
+        {
+            try
+            {
+                boost::trim_if(numberString, boost::is_any_of("\t,\n"));
+                numbers.push_back(std::stod(numberString));
+            }
+            catch (std::invalid_argument const& e)
+            {
+                BOOST_FAIL("Could not convert measurements to double: " + numberString);
+            }
+
+            numberString.clear();
+            inArray = false;
+        }
+        else if (exp[i] == ',' && inArray)
+        {
+            try
+            {
+                boost::trim_if(numberString, boost::is_any_of("\t,\n"));
+                numbers.push_back(std::stod(numberString));
+            }
+            catch (std::invalid_argument const& e)
+            {
+                BOOST_FAIL("Could not convert measurements to double: " + numberString);
+            }
+            numberString.clear();
+        }
+        else if (exp[i] != '[' && inArray && exp[i] != ',' && exp[i] != ' ')
+        {
+            numberString += exp[i];
+        }
+    }
+    return numbers;
+}
+
+inline std::vector<std::string> ExtractSections(const std::string& exp)
+{
+    std::vector<std::string> sections;
+
+    std::stack<size_t> s;
+    for (size_t i = 0; i < exp.size(); i++)
+    {
+        if (exp.at(i) == '{')
+        {
+            s.push(i);
+        }
+        else if (exp.at(i) == '}')
+        {
+            size_t from = s.top();
+            s.pop();
+            sections.push_back(exp.substr(from, i - from + 1));
+        }
+    }
+
+    return sections;
+}
+
+inline std::string SoftmaxProfilerTestSetupHelper(const std::vector<armnn::BackendId>& backends)
+{
+    using namespace armnn;
+
+    BOOST_CHECK(!backends.empty());
+
+    ProfilerManager& profilerManager = armnn::ProfilerManager::GetInstance();
+
+    // Create runtime in which test will run
+    IRuntime::CreationOptions options;
+    options.m_EnableGpuProfiling = backends.front() == armnn::Compute::GpuAcc;
+    IRuntimePtr runtime(IRuntime::Create(options));
+
+    // build up the structure of the network
+    INetworkPtr net(INetwork::Create());
+
+    IConnectableLayer* input = net->AddInputLayer(0, "input");
+    IConnectableLayer* softmax = net->AddSoftmaxLayer(SoftmaxDescriptor(), "softmax");
+    IConnectableLayer* output  = net->AddOutputLayer(0, "output");
+
+    input->GetOutputSlot(0).Connect(softmax->GetInputSlot(0));
+    softmax->GetOutputSlot(0).Connect(output->GetInputSlot(0));
+
+    // set the tensors in the network
+    TensorInfo inputTensorInfo(TensorShape({1, 5}), DataType::QuantisedAsymm8);
+    inputTensorInfo.SetQuantizationOffset(100);
+    inputTensorInfo.SetQuantizationScale(10000.0f);
+    input->GetOutputSlot(0).SetTensorInfo(inputTensorInfo);
+
+    TensorInfo outputTensorInfo(TensorShape({1, 5}), DataType::QuantisedAsymm8);
+    outputTensorInfo.SetQuantizationOffset(0);
+    outputTensorInfo.SetQuantizationScale(1.0f / 256.0f);
+    softmax->GetOutputSlot(0).SetTensorInfo(outputTensorInfo);
+
+    // optimize the network
+    IOptimizedNetworkPtr optNet = Optimize(*net, backends, runtime->GetDeviceSpec());
+    if(!optNet)
+    {
+        BOOST_FAIL("Error occurred during Optimization, Optimize() returned nullptr.");
+    }
+    // load it into the runtime
+    NetworkId netId;
+    auto error = runtime->LoadNetwork(netId, std::move(optNet));
+    BOOST_TEST(error == Status::Success);
+
+    // create structures for input & output
+    std::vector<uint8_t> inputData
+        {
+            1, 10, 3, 200, 5
+            // one of inputs is sufficiently larger than the others to saturate softmax
+        };
+    std::vector<uint8_t> outputData(5);
+
+    armnn::InputTensors inputTensors
+        {
+            {0, armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 0), inputData.data())}
+        };
+    armnn::OutputTensors outputTensors
+        {
+            {0, armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), outputData.data())}
+        };
+
+    runtime->GetProfiler(netId)->EnableProfiling(true);
+
+    // do the inferences
+    runtime->EnqueueWorkload(netId, inputTensors, outputTensors);
+    runtime->EnqueueWorkload(netId, inputTensors, outputTensors);
+    runtime->EnqueueWorkload(netId, inputTensors, outputTensors);
+
+    // retrieve the Profiler.Print() output
+    std::stringstream ss;
+    profilerManager.GetProfiler()->Print(ss);
+
+    return ss.str();
+}
+
+inline void SoftmaxProfilerTestValidationHelper(std::string& result, const std::string& testData)
+{
+    // ensure all measurements are greater than zero
+    std::vector<double> measurementsVector = ExtractMeasurements(result);
+    BOOST_CHECK(!measurementsVector.empty());
+
+    // check sections contain raw and unit tags
+    // first ensure Parenthesis are balanced
+    if (AreParenthesesMatching(result))
+    {
+        // remove parent sections that will not have raw or unit tag
+        std::vector<std::string> sectionVector = ExtractSections(result);
+        for (size_t i = 0; i < sectionVector.size(); ++i)
+        {
+            if (boost::contains(sectionVector[i], "\"ArmNN\":")
+                || boost::contains(sectionVector[i], "\"inference_measurements\":"))
+            {
+                sectionVector.erase(sectionVector.begin() + static_cast<int>(i));
+            }
+        }
+        BOOST_CHECK(!sectionVector.empty());
+
+        BOOST_CHECK(std::all_of(sectionVector.begin(), sectionVector.end(),
+                                [](std::string i) { return boost::contains(i, "\"raw\":"); }));
+
+        BOOST_CHECK(std::all_of(sectionVector.begin(), sectionVector.end(),
+                                [](std::string i) { return boost::contains(i, "\"unit\":"); }));
+    }
+
+    // remove the time measurements as they vary from test to test
+    result.erase(std::remove_if (result.begin(),result.end(),
+                                 [](char c) { return c == '.'; }), result.end());
+    result.erase(std::remove_if (result.begin(), result.end(), &isdigit), result.end());
+    result.erase(std::remove_if (result.begin(),result.end(),
+                                 [](char c) { return c == '\t'; }), result.end());
+
+    BOOST_CHECK(boost::contains(result, "ArmNN"));
+    BOOST_CHECK(boost::contains(result, "inference_measurements"));
+    BOOST_CHECK(boost::contains(result, "layer_measurements"));
+    BOOST_CHECK_EQUAL(result, testData);
+
+    // ensure no spare parenthesis present in print output
+    BOOST_CHECK(AreParenthesesMatching(result));
+}
+
+inline void SetupSoftmaxProfilerWithSpecifiedBackendsAndValidateJsonPrinterResult(
+        const std::vector<armnn::BackendId>& backends)
+{
+    // setup the test fixture and obtain JSON Printer result
+    std::string result = SoftmaxProfilerTestSetupHelper(backends);
+
+    std::string backend = "Ref";
+    std::string changeLine31 = "\n},\n\"CopyMemGeneric_Execute\": {";
+    std::string changeLine39 = "us\"";
+    std::string changeLine40;
+    std::string changeLine45;
+
+    if (backends[0] == armnn::Compute::GpuAcc) {
+        backend = "Cl";
+        changeLine31 = ",\n\"OpenClKernelTimer/: softmax_layer_max_shift_exp_sum_quantized_serial GWS[,,]\": {";
+        changeLine39 = R"(us"
+},
+"OpenClKernelTimer/: softmax_layer_norm_quantized GWS[,,]": {
+"raw": [
+,
+,
+
+],
+"unit": "us")";
+
+        changeLine40 = R"(
+},
+"CopyMemGeneric_Execute": {
+"raw": [
+,
+,
+
+],
+"unit": "us")";
+        changeLine45 = "}\n";
+    }
+    else if (backends[0] == armnn::Compute::CpuAcc)
+    {
+        backend = "Neon";
+        changeLine31 = ",\n\"NeonKernelTimer/: NEFillBorderKernel\": {";
+        changeLine39 = R"(us"
+},
+"NeonKernelTimer/: NELogitsDMaxKernel": {
+"raw": [
+,
+,
+
+],
+"unit": "us"
+},
+"NeonKernelTimer/: NELogitsDSoftmaxKernel": {
+"raw": [
+,
+,
+
+],
+"unit": "us")";
+        changeLine40 = R"(
+},
+"CopyMemGeneric_Execute": {
+"raw": [
+,
+,
+
+],
+"unit": "us")";
+        changeLine45 = "}\n";
+    }
+
+    std::string testData = R"({
+"ArmNN": {
+"inference_measurements": {
+"raw": [
+,
+,
+
+],
+"unit": "us",
+"layer_measurements": {
+"raw": [
+,
+,
+
+],
+"unit": "us",
+"CopyMemGeneric_Execute": {
+"raw": [
+,
+,
+
+],
+"unit": "us"
+},
+")" + backend + R"(SoftmaxUintWorkload_Execute": {
+"raw": [
+,
+,
+
+],
+"unit": "us")" + changeLine31 + R"(
+"raw": [
+,
+,
+
+],
+"unit": ")" + changeLine39 + R"(
+})" + changeLine40 + R"(
+}
+}
+}
+}
+)" + changeLine45 + R"()";
+
+    // validate the JSON Printer result
+    SoftmaxProfilerTestValidationHelper(result, testData);
+}
diff --git a/src/backends/test/OptimizedNetworkTests.cpp b/src/backends/test/OptimizedNetworkTests.cpp
new file mode 100644
index 0000000000..72a35f99e0
--- /dev/null
+++ b/src/backends/test/OptimizedNetworkTests.cpp
@@ -0,0 +1,329 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#include <armnn/ArmNN.hpp>
+#include <armnn/Graph.hpp>
+#include <armnn/Network.hpp>
+
+#include <backends/reference/RefWorkloadFactory.hpp>
+
+#include <boost/test/unit_test.hpp>
+
+BOOST_AUTO_TEST_SUITE(OptimizedNetwork)
+
+BOOST_AUTO_TEST_CASE(SerializeToDot)
+{
+    armnn::Network net;
+
+    //Defines layers.
+    auto input = net.AddInputLayer(0);
+    auto add = net.AddAdditionLayer();
+    auto output = net.AddOutputLayer(0);
+
+    // Connects layers.
+    input->GetOutputSlot(0).Connect(add->GetInputSlot(0));
+    input->GetOutputSlot(0).Connect(add->GetInputSlot(1));
+    add->GetOutputSlot(0).Connect(output->GetInputSlot(0));
+
+    armnn::TensorShape shape({4});
+    armnn::TensorInfo info(shape, armnn::DataType::Float32);
+    input->GetOutputSlot(0).SetTensorInfo(info);
+    add->GetOutputSlot(0).SetTensorInfo(info);
+
+    armnn::IRuntime::CreationOptions options;
+    armnn::IRuntimePtr runtime(armnn::IRuntime::Create(options));
+
+    std::vector<armnn::BackendId> backends = {armnn::Compute::CpuRef};
+    armnn::IOptimizedNetworkPtr optimizedNet = armnn::Optimize(net, backends, runtime->GetDeviceSpec());
+
+    std::ostringstream ss;
+    optimizedNet->SerializeToDot(ss);
+
+    auto inputId = input->GetGuid();
+    auto addId = add->GetGuid();
+    auto outputId = output->GetGuid();
+
+    std::stringstream expected;
+    expected <<
+        "digraph Optimized {\n"
+        "    node [shape=\"record\"];\n"
+        "    edge [fontsize=8 fontcolor=\"blue\" fontname=\"arial-bold\"];\n"
+        "    " << inputId << " [label=\"{Input}\"];\n"
+        "    " << addId << " [label=\"{Addition}\"];\n"
+        "    " << outputId << " [label=\"{Output}\"];\n"
+        "    " << inputId << " -> " << addId << " [label=< [4] >];\n"
+        "    " << inputId << " -> " << addId << " [label=< [4] >];\n"
+        "    " << addId << " -> " << outputId << " [label=< [4] >];\n"
+        "}\n";
+
+    BOOST_TEST(ss.str() == expected.str());
+}
+
+BOOST_AUTO_TEST_CASE(OptimizeValidateDeviceNonSupportLayerNoFallback)
+{
+    // build up the structure of the network
+    armnn::INetworkPtr net(armnn::INetwork::Create());
+
+    armnn::IConnectableLayer* input = net->AddInputLayer(0);
+
+    // This layer configuration isn't supported by CpuAcc and isn't allowed to fall back, so Optimize will return null.
+    armnn::NormalizationDescriptor descriptor;
+    armnn::IConnectableLayer* normalize = net->AddNormalizationLayer(descriptor);
+
+    armnn::IConnectableLayer* output = net->AddOutputLayer(0);
+
+    input->GetOutputSlot(0).Connect(normalize->GetInputSlot(0));
+    normalize->GetOutputSlot(0).Connect(output->GetInputSlot(0));
+
+    input->GetOutputSlot(0).SetTensorInfo(armnn::TensorInfo({ 1, 1, 4, 4 }, armnn::DataType::Float32));
+    normalize->GetOutputSlot(0).SetTensorInfo(armnn::TensorInfo({ 1, 1, 4, 4 }, armnn::DataType::Float32));
+
+    armnn::IRuntime::CreationOptions options;
+    armnn::IRuntimePtr runtime(armnn::IRuntime::Create(options));
+
+    std::vector<armnn::BackendId> backends = { armnn::Compute::CpuAcc };
+    armnn::IOptimizedNetworkPtr optNet = armnn::Optimize(*net, backends, runtime->GetDeviceSpec());
+    BOOST_CHECK(!optNet);
+}
+
+BOOST_AUTO_TEST_CASE(OptimizeValidateDeviceNonSupportLayerWithFallback)
+{
+    // build up the structure of the network
+    armnn::INetworkPtr net(armnn::INetwork::Create());
+
+    armnn::IConnectableLayer* input = net->AddInputLayer(0);
+
+    // This layer configuration isn't supported by CpuAcc but it allows to fallback to CpuRef.
+    armnn::NormalizationDescriptor descriptor;
+    armnn::IConnectableLayer* normalize = net->AddNormalizationLayer(descriptor);
+
+    armnn::IConnectableLayer* output = net->AddOutputLayer(0);
+
+    input->GetOutputSlot(0).Connect(normalize->GetInputSlot(0));
+    normalize->GetOutputSlot(0).Connect(output->GetInputSlot(0));
+
+    input->GetOutputSlot(0).SetTensorInfo(armnn::TensorInfo({ 1, 1, 4, 4 }, armnn::DataType::Float32));
+    normalize->GetOutputSlot(0).SetTensorInfo(armnn::TensorInfo({ 1, 1, 4, 4 }, armnn::DataType::Float32));
+
+    armnn::IRuntime::CreationOptions options;
+    armnn::IRuntimePtr runtime(armnn::IRuntime::Create(options));
+
+    std::vector<armnn::BackendId> backends = { armnn::Compute::CpuAcc, armnn::Compute::CpuRef };
+    armnn::IOptimizedNetworkPtr optNet = armnn::Optimize(*net, backends, runtime->GetDeviceSpec());
+    BOOST_REQUIRE(optNet);
+
+    for (auto&& layer : static_cast<armnn::OptimizedNetwork*>(optNet.get())->GetGraph())
+    {
+        // If NEON is enabled, Input and Output layers are supported by CpuAcc,
+        // the other layers are supported by CpuRef.
+        // If NEON is not enabled, all layers are supported by CpuRef.
+#if ARMCOMPUTENEON_ENABLED
+        if (layer->GetType() == armnn::LayerType::Input || layer->GetType() == armnn::LayerType::Output)
+        {
+            BOOST_CHECK(layer->GetBackendId() == armnn::Compute::CpuAcc);
+        }
+        else if (layer->GetType() == armnn::LayerType::Normalization)
+        {
+            BOOST_CHECK(layer->GetBackendId() == armnn::Compute::CpuRef);
+        }
+#else
+        BOOST_CHECK(layer->GetBackendId() == armnn::Compute::CpuRef);
+#endif
+    }
+}
+
+BOOST_AUTO_TEST_CASE(OptimizeValidateWorkloadsUndefinedComputeDevice)
+{
+    const armnn::TensorInfo desc({3, 5}, armnn::DataType::Float32);
+
+    armnn::Network  net;
+
+    armnn::NormalizationDescriptor nmDesc;
+    armnn::ActivationDescriptor acDesc;
+
+    //    in
+    //     |
+    //    nm
+    //   /  |
+    //  ac  |
+    //   \  |
+    //    ml
+    //     |
+    //    sm
+    //     |
+    //    ot
+    armnn::IConnectableLayer* layer = net.AddInputLayer(0, "in");
+    layer->GetOutputSlot(0).SetTensorInfo(desc);
+
+    armnn::IConnectableLayer* const normLayer = net.AddNormalizationLayer(nmDesc, "nm");
+
+    layer->GetOutputSlot(0).Connect(normLayer->GetInputSlot(0));
+    normLayer->GetOutputSlot(0).SetTensorInfo(desc);
+
+    layer = net.AddActivationLayer(acDesc, "ac");
+
+    normLayer->GetOutputSlot(0).Connect(layer->GetInputSlot(0));
+    layer->GetOutputSlot(0).SetTensorInfo(desc);
+
+    armnn::IConnectableLayer* prevLayer = layer;
+    layer = net.AddMultiplicationLayer("ml");
+
+    prevLayer->GetOutputSlot(0).Connect(layer->GetInputSlot(0));
+    normLayer->GetOutputSlot(0).Connect(layer->GetInputSlot(1));
+    layer->GetOutputSlot(0).SetTensorInfo(desc);
+
+    prevLayer = layer;
+    armnn::SoftmaxDescriptor softmaxDescriptor;
+    layer = net.AddSoftmaxLayer(softmaxDescriptor, "sm");
+
+    prevLayer->GetOutputSlot(0).Connect(layer->GetInputSlot(0));
+    layer->GetOutputSlot(0).SetTensorInfo(desc);
+
+    prevLayer = layer;
+    layer = net.AddOutputLayer(0, "ot");
+
+    prevLayer->GetOutputSlot(0).Connect(layer->GetInputSlot(0));
+
+    armnn::IRuntime::CreationOptions options;
+    armnn::IRuntimePtr runtime(armnn::IRuntime::Create(options));
+
+    std::vector<armnn::BackendId> backends = { armnn::Compute::Undefined };
+
+    armnn::IOptimizedNetworkPtr optNet = armnn::Optimize(net, backends, runtime->GetDeviceSpec());
+    BOOST_CHECK(!optNet);
+
+}
+
+BOOST_AUTO_TEST_CASE(OptimizeValidateWorkloadsUndefinedComputeDeviceWithFallback)
+{
+    const armnn::TensorInfo desc({3, 5}, armnn::DataType::Float32);
+
+    armnn::Network  net;
+
+    armnn::NormalizationDescriptor nmDesc;
+    armnn::ActivationDescriptor acDesc;
+
+    //    in
+    //     |
+    //    nm
+    //   /  |
+    //  ac  |
+    //   \  |
+    //    ml
+    //     |
+    //    sm
+    //     |
+    //    ot
+    armnn::IConnectableLayer* layer = net.AddInputLayer(0, "in");
+    layer->GetOutputSlot(0).SetTensorInfo(desc);
+
+    armnn::IConnectableLayer* const normLayer = net.AddNormalizationLayer(nmDesc, "nm");
+
+    layer->GetOutputSlot(0).Connect(normLayer->GetInputSlot(0));
+    normLayer->GetOutputSlot(0).SetTensorInfo(desc);
+
+    layer = net.AddActivationLayer(acDesc, "ac");
+
+    normLayer->GetOutputSlot(0).Connect(layer->GetInputSlot(0));
+    layer->GetOutputSlot(0).SetTensorInfo(desc);
+
+    armnn::IConnectableLayer* prevLayer = layer;
+    layer = net.AddMultiplicationLayer("ml");
+
+    prevLayer->GetOutputSlot(0).Connect(layer->GetInputSlot(0));
+    normLayer->GetOutputSlot(0).Connect(layer->GetInputSlot(1));
+    layer->GetOutputSlot(0).SetTensorInfo(desc);
+
+    prevLayer = layer;
+    armnn::SoftmaxDescriptor softmaxDescriptor;
+    layer = net.AddSoftmaxLayer(softmaxDescriptor, "sm");
+
+    prevLayer->GetOutputSlot(0).Connect(layer->GetInputSlot(0));
+    layer->GetOutputSlot(0).SetTensorInfo(desc);
+
+    prevLayer = layer;
+    layer = net.AddOutputLayer(0, "ot");
+
+    prevLayer->GetOutputSlot(0).Connect(layer->GetInputSlot(0));
+
+    armnn::IRuntime::CreationOptions options;
+    armnn::IRuntimePtr runtime(armnn::IRuntime::Create(options));
+
+    std::vector<armnn::BackendId> backends = { armnn::Compute::Undefined, armnn::Compute::CpuRef };
+
+    armnn::IOptimizedNetworkPtr optNet = armnn::Optimize(net, backends, runtime->GetDeviceSpec());
+    BOOST_CHECK(optNet);
+
+    // validate workloads
+    armnn::RefWorkloadFactory fact;
+    for (auto&& layer : static_cast<armnn::OptimizedNetwork*>(optNet.get())->GetGraph())
+    {
+        BOOST_CHECK(layer->GetBackendId() == armnn::Compute::CpuRef);
+        BOOST_CHECK_NO_THROW(
+            layer->CreateWorkload(static_cast<armnn::OptimizedNetwork*>(optNet.get())->GetGraph(), fact));
+    }
+}
+
+BOOST_AUTO_TEST_CASE(OptimizeValidateWorkloadsDuplicateComputeDeviceWithFallback)
+{
+    // build up the structure of the network
+    armnn::INetworkPtr net(armnn::INetwork::Create());
+
+    armnn::IConnectableLayer* input = net->AddInputLayer(0);
+
+    // This layer configuration isn't supported by CpuAcc but it allows to fallback to CpuRef.
+    armnn::NormalizationDescriptor descriptor;
+    armnn::IConnectableLayer* normalize = net->AddNormalizationLayer(descriptor);
+
+    armnn::IConnectableLayer* output = net->AddOutputLayer(0);
+
+    input->GetOutputSlot(0).Connect(normalize->GetInputSlot(0));
+    normalize->GetOutputSlot(0).Connect(output->GetInputSlot(0));
+
+    input->GetOutputSlot(0).SetTensorInfo(armnn::TensorInfo({ 1, 1, 4, 4 }, armnn::DataType::Float32));
+    normalize->GetOutputSlot(0).SetTensorInfo(armnn::TensorInfo({ 1, 1, 4, 4 }, armnn::DataType::Float32));
+
+    armnn::IRuntime::CreationOptions options;
+    armnn::IRuntimePtr runtime(armnn::IRuntime::Create(options));
+
+    std::vector<armnn::BackendId> backends = { armnn::Compute::CpuAcc,
+                                             armnn::Compute::GpuAcc,
+                                             armnn::Compute::CpuRef };
+
+    armnn::IOptimizedNetworkPtr optNet = armnn::Optimize(*net, backends, runtime->GetDeviceSpec());
+    BOOST_REQUIRE(optNet);
+
+    for (auto&& layer : static_cast<armnn::OptimizedNetwork*>(optNet.get())->GetGraph())
+    {
+        // If NEON is enabled, Input and Output layers are supported by CpuAcc,
+        // the other layers are supported by CpuRef.
+        // If only CL is enabled, Input and Output layers are supported by GpuAcc,
+        // the other layers are supported by CpuRef.
+        // If neither NEON, nor CL is enabled, all layers are supported by CpuRef.
+#if ARMCOMPUTENEON_ENABLED
+        if (layer->GetType() == armnn::LayerType::Input || layer->GetType() == armnn::LayerType::Output)
+        {
+            BOOST_CHECK(layer->GetBackendId() == armnn::Compute::CpuAcc);
+        }
+        else if (layer->GetType() == armnn::LayerType::Normalization)
+        {
+            BOOST_CHECK(layer->GetBackendId() == armnn::Compute::CpuRef);
+        }
+#elif ARMCOMPUTECL_ENABLED
+        if (layer->GetType() == armnn::LayerType::Input || layer->GetType() == armnn::LayerType::Output)
+        {
+            BOOST_CHECK(layer->GetBackendId() == armnn::Compute::GpuAcc);
+        }
+        else if (layer->GetType() == armnn::LayerType::Normalization)
+        {
+            BOOST_CHECK(layer->GetBackendId() == armnn::Compute::CpuRef);
+        }
+#else
+        BOOST_CHECK(layer->GetBackendId() == armnn::Compute::CpuRef);
+#endif
+    }
+}
+
+BOOST_AUTO_TEST_SUITE_END()