12 files changed, 3012 insertions, 0 deletions
diff --git a/src/armnn/test/CreateWorkload.hpp b/src/armnn/test/CreateWorkload.hpp
new file mode 100644
index 0000000000..d8aa208eb7
--- /dev/null
+++ b/src/armnn/test/CreateWorkload.hpp
@@ -0,0 +1,814 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// See LICENSE file in the project root for full license information.
+//
+#pragma once
+
+#include <boost/test/unit_test.hpp>
+
+#include <boost/cast.hpp>
+
+#include "backends/WorkloadData.hpp"
+#include "Layers.hpp"
+#include "Graph.hpp"
+
+#include <utility>
+
+#include "backends/CpuTensorHandle.hpp"
+
+using namespace armnn;
+
+namespace
+{
+
+using namespace std;
+
+// Calls CreateWorkload for a layer, and checks the returned pointer is of the correct type
+template<typename Workload>
+std::unique_ptr<Workload> MakeAndCheckWorkload(Layer& layer, Graph& graph, const IWorkloadFactory& factory)
+{
+    std::unique_ptr<IWorkload> workload = layer.CreateWorkload(graph, factory);
+    BOOST_TEST(workload.get() == boost::polymorphic_downcast<Workload*>(workload.get()),
+               "Cannot convert to derived class");
+    std::string reasonIfUnsupported;
+    BOOST_TEST(factory.IsLayerSupported(layer, layer.GetDataType(), reasonIfUnsupported));
+    return std::unique_ptr<Workload>(static_cast<Workload*>(workload.release()));
+}
+
+// connects two layers
+void Connect(Layer* from, Layer* to, const TensorInfo& tensorInfo, unsigned int fromIndex = 0, unsigned int toIndex = 0)
+{
+    from->GetOutputSlot(fromIndex).Connect(to->GetInputSlot(toIndex));
+    from->GetOutputHandler(fromIndex).SetTensorInfo(tensorInfo);
+}
+
+// helper function to create tensor handlers for workloads, assuming they all use the same factory
+void CreateTensorHandles(armnn::Graph& graph, armnn::IWorkloadFactory& factory)
+{
+    for (auto&& layer : graph.TopologicalSort())
+    {
+        layer->CreateTensorHandles(graph, factory);
+    }
+}
+
+/////////////////////////////////////////////////////////////////////////////////////////////
+// The following functions are called by backends/test/CreateWorkload*.cpp
+// They build very simple graphs, and then create a workload.
+// Some checks are performed on the workload to ensure parameters have been passed correctly.
+// They return the created workloads so that backend-specific checks can be performed.
+/////////////////////////////////////////////////////////////////////////////////////////////
+
+template <typename ActivationWorkload>
+std::unique_ptr<ActivationWorkload> CreateActivationWorkloadTest(armnn::IWorkloadFactory& factory,
+                                                                 armnn::Graph&            graph)
+{
+    // create the layer we're testing
+    ActivationDescriptor layerDesc;
+    layerDesc.m_Function = ActivationFunction::Abs;
+    layerDesc.m_A        = 3.5f;
+    layerDesc.m_B        = -10.0f;
+
+    ActivationLayer* const layer = graph.AddLayer<ActivationLayer>(layerDesc, "layer");
+
+    // create extra layers
+    Layer* const input = graph.AddLayer<InputLayer>(0, "input");
+    Layer* const output = graph.AddLayer<OutputLayer>(0, "output");
+
+    // connect up
+    armnn::TensorInfo tensorInfo({1, 1}, ActivationWorkload::ms_DataType);
+
+    Connect(input, layer, tensorInfo);
+    Connect(layer, output, tensorInfo);
+
+    CreateTensorHandles(graph, factory);
+
+    // make the workload and check it
+    auto workload = MakeAndCheckWorkload<ActivationWorkload>(*layer, graph, factory);
+
+    ActivationQueueDescriptor queueDescriptor = workload->GetData();
+    BOOST_TEST(queueDescriptor.m_Inputs.size() == 1);
+    BOOST_TEST(queueDescriptor.m_Outputs.size() == 1);
+    BOOST_TEST(queueDescriptor.m_Parameters.m_A == 3.5f);
+    BOOST_TEST(queueDescriptor.m_Parameters.m_B == -10.0f);
+    BOOST_TEST((queueDescriptor.m_Parameters.m_Function == ActivationFunction::Abs));
+
+    // return so we can do extra, backend-specific tests
+    return workload;
+}
+
+template <typename AdditionWorkload>
+std::unique_ptr<AdditionWorkload> CreateAdditionWorkloadTest(armnn::IWorkloadFactory& factory,
+                                                             armnn::Graph&            graph)
+{
+    // create the layer we're testing
+    Layer* const layer = graph.AddLayer<AdditionLayer>("layer");
+
+    // create extra layers
+    Layer* const input1 = graph.AddLayer<InputLayer>(1, "input1");
+    Layer* const input2 = graph.AddLayer<InputLayer>(2, "input2");
+    Layer* const output = graph.AddLayer<OutputLayer>(0, "output");
+
+    // connect up
+    armnn::TensorInfo tensorInfo({2, 3}, AdditionWorkload::ms_DataType);
+    Connect(input1, layer, tensorInfo, 0, 0);
+    Connect(input2, layer, tensorInfo, 0, 1);
+    Connect(layer, output, tensorInfo);
+    CreateTensorHandles(graph, factory);
+
+    // make the workload and check it
+    auto workload = MakeAndCheckWorkload<AdditionWorkload>(*layer, graph, factory);
+
+    AdditionQueueDescriptor queueDescriptor = workload->GetData();
+    BOOST_TEST(queueDescriptor.m_Inputs.size() == 2);
+    BOOST_TEST(queueDescriptor.m_Outputs.size() == 1);
+
+    // return so we can do extra, backend-specific tests
+    return workload;
+}
+
+template <typename BatchNormalizationFloat32Workload>
+std::unique_ptr<BatchNormalizationFloat32Workload> CreateBatchNormalizationWorkloadTest(
+    armnn::IWorkloadFactory& factory, armnn::Graph& graph)
+{
+    // create the layer we're testing
+    BatchNormalizationDescriptor layerDesc;
+    layerDesc.m_Eps = 0.05f;
+
+    BatchNormalizationLayer* const layer = graph.AddLayer<BatchNormalizationLayer>(layerDesc, "layer");
+
+    armnn::TensorInfo weightInfo({3}, armnn::DataType::Float32);
+    layer->m_Mean     = std::make_unique<ScopedCpuTensorHandle>(weightInfo);
+    layer->m_Variance = std::make_unique<ScopedCpuTensorHandle>(weightInfo);
+    layer->m_Beta     = std::make_unique<ScopedCpuTensorHandle>(weightInfo);
+    layer->m_Gamma    = std::make_unique<ScopedCpuTensorHandle>(weightInfo);
+    layer->m_Mean->Allocate();
+    layer->m_Variance->Allocate();
+    layer->m_Beta->Allocate();
+    layer->m_Gamma->Allocate();
+
+    // create extra layers
+    Layer* const input = graph.AddLayer<InputLayer>(0, "input");
+    Layer* const output = graph.AddLayer<OutputLayer>(0, "output");
+
+    // connect up
+    armnn::TensorInfo tensorInfo({2, 3, 1, 1}, armnn::DataType::Float32);
+    Connect(input, layer, tensorInfo);
+    Connect(layer, output, tensorInfo);
+    CreateTensorHandles(graph, factory);
+
+    // make the workload and check it
+    auto workload = MakeAndCheckWorkload<BatchNormalizationFloat32Workload>(*layer, graph, factory);
+
+    BatchNormalizationQueueDescriptor queueDescriptor = workload->GetData();
+    BOOST_TEST(queueDescriptor.m_Parameters.m_Eps == 0.05f);
+    BOOST_TEST(queueDescriptor.m_Inputs.size() == 1);
+    BOOST_TEST(queueDescriptor.m_Outputs.size() == 1);
+    BOOST_TEST((queueDescriptor.m_Mean->GetTensorInfo() == TensorInfo({3}, DataType::Float32)));
+    BOOST_TEST((queueDescriptor.m_Variance->GetTensorInfo() == TensorInfo({3}, DataType::Float32)));
+    BOOST_TEST((queueDescriptor.m_Gamma->GetTensorInfo() == TensorInfo({3}, DataType::Float32)));
+    BOOST_TEST((queueDescriptor.m_Beta->GetTensorInfo() == TensorInfo({3}, DataType::Float32)));
+
+    // return so we can do extra, backend-specific tests
+    return workload;
+}
+
+template <typename Convolution2dWorkload>
+std::unique_ptr<Convolution2dWorkload> CreateConvolution2dWorkloadTest(armnn::IWorkloadFactory& factory,
+                                                                              armnn::Graph&            graph)
+{
+    // create the layer we're testing
+    Convolution2dDescriptor layerDesc;
+    layerDesc.m_PadLeft = 3;
+    layerDesc.m_PadRight = 3;
+    layerDesc.m_PadTop = 1;
+    layerDesc.m_PadBottom = 1;
+    layerDesc.m_StrideX = 2;
+    layerDesc.m_StrideY = 4;
+    layerDesc.m_BiasEnabled = true;
+
+    Convolution2dLayer* const layer = graph.AddLayer<Convolution2dLayer>(layerDesc, "layer");
+
+    layer->m_Weight = std::make_unique<ScopedCpuTensorHandle>(TensorInfo({2, 3, 5, 3},
+                                                                         Convolution2dWorkload::ms_DataType));
+    layer->m_Bias   = std::make_unique<ScopedCpuTensorHandle>
+        (TensorInfo({2}, GetBiasDataType(Convolution2dWorkload::ms_DataType)));
+
+    layer->m_Weight->Allocate();
+    layer->m_Bias->Allocate();
+
+    // create extra layers
+    Layer* const input = graph.AddLayer<InputLayer>(0, "input");
+    Layer* const output = graph.AddLayer<OutputLayer>(0, "output");
+
+    // connect up
+    Connect(input, layer, TensorInfo({2, 3, 8, 16}, Convolution2dWorkload::ms_DataType));
+    Connect(layer, output, TensorInfo({2, 2, 2, 10}, Convolution2dWorkload::ms_DataType));
+    CreateTensorHandles(graph, factory);
+
+    // make the workload and check it
+    auto workload = MakeAndCheckWorkload<Convolution2dWorkload>(*layer, graph, factory);
+
+    Convolution2dQueueDescriptor queueDescriptor = workload->GetData();
+    BOOST_TEST(queueDescriptor.m_Parameters.m_StrideX == 2);
+    BOOST_TEST(queueDescriptor.m_Parameters.m_StrideY == 4);
+    BOOST_TEST(queueDescriptor.m_Parameters.m_PadLeft == 3);
+    BOOST_TEST(queueDescriptor.m_Parameters.m_PadRight == 3);
+    BOOST_TEST(queueDescriptor.m_Parameters.m_PadTop == 1);
+    BOOST_TEST(queueDescriptor.m_Parameters.m_PadBottom == 1);
+    BOOST_TEST(queueDescriptor.m_Parameters.m_BiasEnabled == true);
+
+    BOOST_TEST(queueDescriptor.m_Inputs.size() == 1);
+    BOOST_TEST(queueDescriptor.m_Outputs.size() == 1);
+    BOOST_TEST((queueDescriptor.m_Weight->GetTensorInfo() == TensorInfo({2, 3, 5, 3},
+                                                                        Convolution2dWorkload::ms_DataType)));
+    BOOST_TEST((queueDescriptor.m_Bias->GetTensorInfo() ==
+        TensorInfo({2}, GetBiasDataType(Convolution2dWorkload::ms_DataType))));
+
+    // return so we can do extra, backend-specific tests
+    return workload;
+}
+
+template <typename Convolution2dWorkload>
+std::unique_ptr<Convolution2dWorkload> CreateDirectConvolution2dWorkloadTest(armnn::IWorkloadFactory& factory,
+                                                                       armnn::Graph&            graph)
+{
+    // create the layer we're testing
+    Convolution2dDescriptor layerDesc;
+    layerDesc.m_PadLeft = 1;
+    layerDesc.m_PadRight = 1;
+    layerDesc.m_PadTop = 1;
+    layerDesc.m_PadBottom = 1;
+    layerDesc.m_StrideX = 1;
+    layerDesc.m_StrideY = 1;
+    layerDesc.m_BiasEnabled = true;
+
+    Convolution2dLayer* const layer = graph.AddLayer<Convolution2dLayer>(layerDesc, "layer");
+
+    float inputsQScale = Convolution2dWorkload::ms_DataType == DataType::QuantisedAsymm8 ? 1.0f : 0.0;
+    float outputQScale = Convolution2dWorkload::ms_DataType == DataType::QuantisedAsymm8 ? 2.0f : 0.0;
+
+    layer->m_Weight = std::make_unique<ScopedCpuTensorHandle>(TensorInfo({ 2, 3, 3, 3 },
+        Convolution2dWorkload::ms_DataType, inputsQScale));
+    layer->m_Bias   = std::make_unique<ScopedCpuTensorHandle>
+        (TensorInfo({2},  GetBiasDataType(Convolution2dWorkload::ms_DataType), inputsQScale));
+    layer->m_Weight->Allocate();
+    layer->m_Bias->Allocate();
+
+    // create extra layers
+    Layer* const input = graph.AddLayer<InputLayer>(0, "input");
+    Layer* const output = graph.AddLayer<OutputLayer>(0, "output");
+
+    // connect up
+    Connect(input, layer, TensorInfo({2, 3, 6, 6}, Convolution2dWorkload::ms_DataType, inputsQScale));
+    Connect(layer, output, TensorInfo({2, 2, 6, 6}, Convolution2dWorkload::ms_DataType, outputQScale));
+    CreateTensorHandles(graph, factory);
+
+    // make the workload and check it
+    auto workload = MakeAndCheckWorkload<Convolution2dWorkload>(*layer, graph, factory);
+
+    Convolution2dQueueDescriptor queueDescriptor = workload->GetData();
+    BOOST_TEST(queueDescriptor.m_Parameters.m_StrideX == 1);
+    BOOST_TEST(queueDescriptor.m_Parameters.m_StrideY == 1);
+    BOOST_TEST(queueDescriptor.m_Parameters.m_PadLeft == 1);
+    BOOST_TEST(queueDescriptor.m_Parameters.m_PadRight == 1);
+    BOOST_TEST(queueDescriptor.m_Parameters.m_PadTop == 1);
+    BOOST_TEST(queueDescriptor.m_Parameters.m_PadBottom == 1);
+    BOOST_TEST(queueDescriptor.m_Parameters.m_BiasEnabled == true);
+
+    BOOST_TEST(queueDescriptor.m_Inputs.size() == 1);
+    BOOST_TEST(queueDescriptor.m_Outputs.size() == 1);
+    BOOST_TEST((queueDescriptor.m_Weight->GetTensorInfo() == TensorInfo({2, 3, 3, 3},
+        Convolution2dWorkload::ms_DataType, inputsQScale)));
+    BOOST_TEST((queueDescriptor.m_Bias->GetTensorInfo()
+                == TensorInfo({2},  GetBiasDataType(Convolution2dWorkload::ms_DataType), inputsQScale)));
+
+    // return so we can do extra, backend-specific tests
+    return workload;
+}
+
+template <typename DepthwiseConvolution2dFloat32Workload>
+std::unique_ptr<DepthwiseConvolution2dFloat32Workload> CreateDepthwiseConvolution2dWorkloadTest(
+    armnn::IWorkloadFactory& factory, armnn::Graph& graph)
+{
+    // create the layer we're testing
+    DepthwiseConvolution2dDescriptor layerDesc;
+    layerDesc.m_PadLeft         = 3;
+    layerDesc.m_PadRight        = 3;
+    layerDesc.m_PadTop          = 1;
+    layerDesc.m_PadBottom       = 1;
+    layerDesc.m_StrideX         = 2;
+    layerDesc.m_StrideY         = 4;
+    layerDesc.m_BiasEnabled     = true;
+
+    DepthwiseConvolution2dLayer* const layer = graph.AddLayer<DepthwiseConvolution2dLayer>(layerDesc, "layer");
+
+    layer->m_Weight = std::make_unique<ScopedCpuTensorHandle>(TensorInfo({3, 3, 5, 3}, DataType::Float32));
+    layer->m_Bias   = std::make_unique<ScopedCpuTensorHandle>(TensorInfo({9}, DataType::Float32));
+    layer->m_Weight->Allocate();
+    layer->m_Bias->Allocate();
+
+    // create extra layers
+    Layer* const input = graph.AddLayer<InputLayer>(0, "input");
+    Layer* const output = graph.AddLayer<OutputLayer>(0, "output");
+
+    // connect up
+    Connect(input, layer, TensorInfo({2, 3, 8, 16}, armnn::DataType::Float32));
+    Connect(layer, output, TensorInfo({2, 9, 2, 10}, armnn::DataType::Float32));
+    CreateTensorHandles(graph, factory);
+
+    // make the workload and check it
+    auto workload = MakeAndCheckWorkload<DepthwiseConvolution2dFloat32Workload>(*layer, graph, factory);
+
+    DepthwiseConvolution2dQueueDescriptor queueDescriptor = workload->GetData();
+    BOOST_TEST(queueDescriptor.m_Parameters.m_StrideX == 2);
+    BOOST_TEST(queueDescriptor.m_Parameters.m_StrideY == 4);
+    BOOST_TEST(queueDescriptor.m_Parameters.m_PadLeft == 3);
+    BOOST_TEST(queueDescriptor.m_Parameters.m_PadRight == 3);
+    BOOST_TEST(queueDescriptor.m_Parameters.m_PadTop == 1);
+    BOOST_TEST(queueDescriptor.m_Parameters.m_PadBottom == 1);
+    BOOST_TEST(queueDescriptor.m_Parameters.m_BiasEnabled == true);
+
+    BOOST_TEST(queueDescriptor.m_Inputs.size() == 1);
+    BOOST_TEST(queueDescriptor.m_Outputs.size() == 1);
+    BOOST_TEST((queueDescriptor.m_Weight->GetTensorInfo() == TensorInfo({3, 3, 5, 3}, DataType::Float32)));
+    BOOST_TEST((queueDescriptor.m_Bias->GetTensorInfo() == TensorInfo({9}, DataType::Float32)));
+
+    // return so we can do extra, backend-specific tests
+    return workload;
+}
+
+template <typename FullyConnectedWorkload>
+std::unique_ptr<FullyConnectedWorkload> CreateFullyConnectedWorkloadTest(armnn::IWorkloadFactory& factory,
+                                                                         armnn::Graph&            graph)
+{
+    // create the layer we're testing
+    FullyConnectedDescriptor layerDesc;
+    layerDesc.m_BiasEnabled = true;
+    layerDesc.m_TransposeWeightMatrix = true;
+
+    FullyConnectedLayer* const layer = graph.AddLayer<FullyConnectedLayer>(layerDesc, "layer");
+
+    float inputsQScale = FullyConnectedWorkload::ms_DataType == DataType::QuantisedAsymm8 ? 1.0f : 0.0;
+    float outputQScale = FullyConnectedWorkload::ms_DataType == DataType::QuantisedAsymm8 ? 2.0f : 0.0;
+
+    layer->m_Weight = std::make_unique<ScopedCpuTensorHandle>(TensorInfo({7, 20},
+        FullyConnectedWorkload::ms_DataType, inputsQScale, 0));
+    layer->m_Bias   = std::make_unique<ScopedCpuTensorHandle>(TensorInfo({7},
+        GetBiasDataType(FullyConnectedWorkload::ms_DataType), inputsQScale));
+    layer->m_Weight->Allocate();
+    layer->m_Bias->Allocate();
+
+    // create extra layers
+    Layer* const input = graph.AddLayer<InputLayer>(0, "input");
+    Layer* const output = graph.AddLayer<OutputLayer>(0, "output");
+
+    // connect up
+    Connect(input, layer, TensorInfo({3, 1, 4, 5}, FullyConnectedWorkload::ms_DataType, inputsQScale));
+    Connect(layer, output, TensorInfo({3, 7}, FullyConnectedWorkload::ms_DataType, outputQScale));
+    CreateTensorHandles(graph, factory);
+
+    // make the workload and check it
+    auto workload = MakeAndCheckWorkload<FullyConnectedWorkload>(*layer, graph, factory);
+
+    FullyConnectedQueueDescriptor queueDescriptor = workload->GetData();
+    BOOST_TEST(queueDescriptor.m_Parameters.m_BiasEnabled == true);
+    BOOST_TEST(queueDescriptor.m_Parameters.m_TransposeWeightMatrix == true);
+
+    BOOST_TEST(queueDescriptor.m_Inputs.size() == 1);
+    BOOST_TEST(queueDescriptor.m_Outputs.size() == 1);
+    BOOST_TEST((queueDescriptor.m_Weight->GetTensorInfo() ==
+        TensorInfo({7, 20}, FullyConnectedWorkload::ms_DataType, inputsQScale)));
+    BOOST_TEST((queueDescriptor.m_Bias->GetTensorInfo() ==
+        TensorInfo({7}, GetBiasDataType(FullyConnectedWorkload::ms_DataType), inputsQScale)));
+
+    // return so we can do extra, backend-specific tests
+    return workload;
+}
+
+template <typename MultiplicationWorkload>
+std::unique_ptr<MultiplicationWorkload> CreateMultiplicationWorkloadTest(armnn::IWorkloadFactory& factory,
+                                                                         armnn::Graph&            graph)
+{
+    // create the layer we're testing
+    Layer* const layer = graph.AddLayer<MultiplicationLayer>("layer");
+
+    // create extra layers
+    Layer* const input1 = graph.AddLayer<InputLayer>(1, "input1");
+    Layer* const input2 = graph.AddLayer<InputLayer>(2, "input2");
+    Layer* const output = graph.AddLayer<OutputLayer>(0, "output");
+
+    // connect up
+    armnn::TensorInfo tensorInfo({2, 3}, MultiplicationWorkload::ms_DataType);
+    Connect(input1, layer, tensorInfo, 0, 0);
+    Connect(input2, layer, tensorInfo, 0, 1);
+    Connect(layer, output, tensorInfo);
+    CreateTensorHandles(graph, factory);
+
+    // make the workload and check it
+    auto workload = MakeAndCheckWorkload<MultiplicationWorkload>(*layer, graph, factory);
+
+    MultiplicationQueueDescriptor queueDescriptor = workload->GetData();
+    BOOST_TEST(queueDescriptor.m_Inputs.size() == 2);
+    BOOST_TEST(queueDescriptor.m_Outputs.size() == 1);
+
+    // return so we can do extra, backend-specific tests
+    return workload;
+}
+
+template <typename NormalizationFloat32Workload>
+std::unique_ptr<NormalizationFloat32Workload> CreateNormalizationWorkloadTest(armnn::IWorkloadFactory& factory,
+                                                                              armnn::Graph&            graph)
+{
+    // create the layer we're testing
+    NormalizationDescriptor layerDesc;
+    layerDesc.m_NormChannelType = NormalizationAlgorithmChannel::Across;
+    layerDesc.m_NormMethodType = NormalizationAlgorithmMethod::LocalBrightness;
+    layerDesc.m_NormSize = 3;
+    layerDesc.m_Alpha = 0.5f;
+    layerDesc.m_Beta = -1.0f;
+    layerDesc.m_K = 0.2f;
+
+    NormalizationLayer* layer = graph.AddLayer<NormalizationLayer>(layerDesc, "layer");
+
+    // create extra layers
+    Layer* const input = graph.AddLayer<InputLayer>(0, "input");
+    Layer* const output = graph.AddLayer<OutputLayer>(0, "output");
+
+    // connect up
+    Connect(input, layer, TensorInfo({3, 5, 5, 1}, armnn::DataType::Float32));
+    Connect(layer, output, TensorInfo({3, 5, 5, 1}, armnn::DataType::Float32));
+    CreateTensorHandles(graph, factory);
+
+    // make the workload and check it
+    auto workload = MakeAndCheckWorkload<NormalizationFloat32Workload>(*layer, graph, factory);
+
+    NormalizationQueueDescriptor queueDescriptor = workload->GetData();
+    BOOST_TEST((queueDescriptor.m_Parameters.m_NormChannelType == NormalizationAlgorithmChannel::Across));
+    BOOST_TEST((queueDescriptor.m_Parameters.m_NormMethodType == NormalizationAlgorithmMethod::LocalBrightness));
+    BOOST_TEST(queueDescriptor.m_Parameters.m_NormSize == 3);
+    BOOST_TEST(queueDescriptor.m_Parameters.m_Alpha == 0.5f);
+    BOOST_TEST(queueDescriptor.m_Parameters.m_Beta == -1.0f);
+    BOOST_TEST(queueDescriptor.m_Parameters.m_K == 0.2f);
+
+    BOOST_TEST(queueDescriptor.m_Inputs.size() == 1);
+    BOOST_TEST(queueDescriptor.m_Outputs.size() == 1);
+
+    // return so we can do extra, backend-specific tests
+    return workload;
+}
+
+template <typename Pooling2dWorkload>
+std::unique_ptr<Pooling2dWorkload> CreatePooling2dWorkloadTest(armnn::IWorkloadFactory& factory,
+                                                               armnn::Graph&            graph)
+{
+    // create the layer we're testing
+    Pooling2dDescriptor layerDesc;
+    layerDesc.m_PoolType = PoolingAlgorithm::Average;
+    layerDesc.m_PoolWidth = 3;
+    layerDesc.m_PoolHeight = 3;
+    layerDesc.m_PadLeft = 2;
+    layerDesc.m_PadRight = 2;
+    layerDesc.m_PadTop = 1;
+    layerDesc.m_PadBottom = 1;
+    layerDesc.m_StrideX = 2;
+    layerDesc.m_StrideY = 3;
+    layerDesc.m_OutputShapeRounding = OutputShapeRounding::Floor;
+
+    Pooling2dLayer* const layer = graph.AddLayer<Pooling2dLayer>(layerDesc, "layer");
+
+    // create extra layers
+    Layer* const input = graph.AddLayer<InputLayer>(0, "input");
+    Layer* const output = graph.AddLayer<OutputLayer>(0, "output");
+
+    // connect up
+    Connect(input, layer, TensorInfo({3, 2, 5, 5}, Pooling2dWorkload::ms_DataType));
+    Connect(layer, output, TensorInfo({3, 2, 2, 4}, Pooling2dWorkload::ms_DataType));
+    CreateTensorHandles(graph, factory);
+
+    // make the workload and check it
+    auto workload = MakeAndCheckWorkload<Pooling2dWorkload>(*layer, graph, factory);
+
+    Pooling2dQueueDescriptor queueDescriptor = workload->GetData();
+    BOOST_TEST((queueDescriptor.m_Parameters.m_PoolType == PoolingAlgorithm::Average));
+    BOOST_TEST((queueDescriptor.m_Parameters.m_OutputShapeRounding == OutputShapeRounding::Floor));
+    BOOST_TEST(queueDescriptor.m_Parameters.m_PoolWidth == 3);
+    BOOST_TEST(queueDescriptor.m_Parameters.m_PoolHeight == 3);
+    BOOST_TEST(queueDescriptor.m_Parameters.m_StrideX == 2);
+    BOOST_TEST(queueDescriptor.m_Parameters.m_StrideY == 3);
+    BOOST_TEST(queueDescriptor.m_Parameters.m_PadLeft == 2);
+    BOOST_TEST(queueDescriptor.m_Parameters.m_PadRight == 2);
+    BOOST_TEST(queueDescriptor.m_Parameters.m_PadTop == 1);
+    BOOST_TEST(queueDescriptor.m_Parameters.m_PadBottom == 1);
+
+    BOOST_TEST(queueDescriptor.m_Inputs.size() == 1);
+    BOOST_TEST(queueDescriptor.m_Outputs.size() == 1);
+
+    // return so we can do extra, backend-specific tests
+    return workload;
+}
+
+template <typename SoftmaxWorkload>
+std::unique_ptr<SoftmaxWorkload> CreateSoftmaxWorkloadTest(armnn::IWorkloadFactory& factory,
+                                                           armnn::Graph&            graph)
+{
+    // create the layer we're testing
+    SoftmaxDescriptor softmaxDescriptor;
+    Layer* const layer = graph.AddLayer<SoftmaxLayer>(softmaxDescriptor, "layer");
+
+    // create extra layers
+    Layer* const input = graph.AddLayer<InputLayer>(0, "input");
+    Layer* const output = graph.AddLayer<OutputLayer>(0, "output");
+
+    // connect up
+    armnn::TensorInfo tensorInfo({4, 1}, SoftmaxWorkload::ms_DataType);
+    Connect(input, layer, tensorInfo);
+    Connect(layer, output, tensorInfo);
+    CreateTensorHandles(graph, factory);
+
+    // make the workload and check it
+    auto workload = MakeAndCheckWorkload<SoftmaxWorkload>(*layer, graph, factory);
+
+    SoftmaxQueueDescriptor queueDescriptor = workload->GetData();
+    BOOST_TEST(queueDescriptor.m_Inputs.size() == 1);
+    BOOST_TEST(queueDescriptor.m_Outputs.size() == 1);
+
+    // return so we can do extra, backend-specific tests
+    return workload;
+}
+
+template<typename SplitterWorkload>
+std::unique_ptr<SplitterWorkload>
+    CreateSplitterWorkloadTest(armnn::IWorkloadFactory& factory, armnn::Graph& graph)
+{
+    // create the layer we're testing
+    ViewsDescriptor layerDesc(3, 2);
+    layerDesc.SetViewOriginCoord(0, 1, 2); // deliberately add these in a weird order
+    layerDesc.SetViewOriginCoord(2, 1, 0);
+    layerDesc.SetViewOriginCoord(1, 1, 3);
+
+    Layer* const layer = graph.AddLayer<SplitterLayer>(layerDesc, "layer");
+
+    // add extra layers
+    Layer* const input = graph.AddLayer<InputLayer>(0, "input");
+    Layer* const output0 = graph.AddLayer<OutputLayer>(0, "output0");
+    Layer* const output1 = graph.AddLayer<OutputLayer>(1, "output1");
+    Layer* const output2 = graph.AddLayer<OutputLayer>(2, "output2");
+
+    // connect up
+    armnn::TensorInfo tensorInfo({1, 7}, SplitterWorkload::ms_DataType);
+    Connect(input, layer, tensorInfo);
+
+    armnn::TensorInfo output0Info({1, 2}, SplitterWorkload::ms_DataType);
+    armnn::TensorInfo output1Info({1, 1}, SplitterWorkload::ms_DataType);
+    armnn::TensorInfo output2Info({1, 4}, SplitterWorkload::ms_DataType);
+    Connect(layer, output1, output1Info, 1, 0); // deliberately connect these up in a weird order
+    Connect(layer, output0, output0Info, 2, 0);
+    Connect(layer, output2, output2Info, 0, 0);
+
+    CreateTensorHandles(graph, factory);
+
+    // make the workload and check it
+    auto workload = MakeAndCheckWorkload<SplitterWorkload>(*layer, graph, factory);
+
+    SplitterQueueDescriptor queueDescriptor = workload->GetData();
+    BOOST_TEST(queueDescriptor.m_Inputs.size() == 1);
+    BOOST_TEST(queueDescriptor.m_Outputs.size() == 3);
+    BOOST_TEST(queueDescriptor.m_ViewOrigins.size() == 3);
+
+    BOOST_TEST(queueDescriptor.m_ViewOrigins[0].m_Origin[0] == 0);
+    BOOST_TEST(queueDescriptor.m_ViewOrigins[1].m_Origin[0] == 0);
+    BOOST_TEST(queueDescriptor.m_ViewOrigins[2].m_Origin[0] == 0);
+    BOOST_TEST(queueDescriptor.m_ViewOrigins[0].m_Origin[1] == 2);
+    BOOST_TEST(queueDescriptor.m_ViewOrigins[1].m_Origin[1] == 3);
+    BOOST_TEST(queueDescriptor.m_ViewOrigins[2].m_Origin[1] == 0);
+
+    // return so we can do extra, backend-specific tests
+    return workload;
+}
+
+/// This function constructs a graph with both a splitter and a merger, and returns a pair of the workloads
+template<typename SplitterWorkload, typename MergerWorkload>
+std::pair<std::unique_ptr<SplitterWorkload>, std::unique_ptr<MergerWorkload>>
+    CreateSplitterMergerWorkloadTest(armnn::IWorkloadFactory& factory, armnn::Graph& graph)
+{
+    static_assert(SplitterWorkload::ms_DataType == MergerWorkload::ms_DataType,
+        "Splitter and merger workloads must have the same data type");
+
+    armnn::TensorInfo inputTensorInfo({ 1, 1, 100, 10 }, SplitterWorkload::ms_DataType);
+    armnn::TensorInfo splitTensorInfo1({ 1, 1, 60, 10 }, SplitterWorkload::ms_DataType);
+    armnn::TensorInfo splitTensorInfo2({ 1, 1, 40, 10 }, SplitterWorkload::ms_DataType);
+
+    //construct the  graph
+    Layer* const input = graph.AddLayer<InputLayer>(0, "input");
+
+    armnn::ViewsDescriptor splitterViews(2);
+    splitterViews.SetViewOriginCoord(0, 0, 0);
+    splitterViews.SetViewOriginCoord(0, 1, 0);
+    splitterViews.SetViewOriginCoord(0, 2, 0);
+    splitterViews.SetViewOriginCoord(0, 3, 0);
+
+    splitterViews.SetViewOriginCoord(1, 0, 0);
+    splitterViews.SetViewOriginCoord(1, 1, 0);
+    splitterViews.SetViewOriginCoord(1, 2, 60);
+    splitterViews.SetViewOriginCoord(1, 3, 0);
+
+    Layer* const splitter = graph.AddLayer<SplitterLayer>(splitterViews, "splitter");
+
+    armnn::OriginsDescriptor mergerViews(2);
+    mergerViews.SetViewOriginCoord(0, 0, 0);
+    mergerViews.SetViewOriginCoord(0, 1, 0);
+    mergerViews.SetViewOriginCoord(0, 2, 0);
+    mergerViews.SetViewOriginCoord(0, 3, 0);
+
+    mergerViews.SetViewOriginCoord(1, 0, 0);
+    mergerViews.SetViewOriginCoord(1, 1, 0);
+    mergerViews.SetViewOriginCoord(1, 2, 40);
+    mergerViews.SetViewOriginCoord(1, 3, 0);
+
+    Layer* const merger = graph.AddLayer<MergerLayer>(mergerViews, "merger");
+
+    Layer* const output = graph.AddLayer<OutputLayer>(0, "output");
+
+    // add connections
+    Connect(input, splitter, inputTensorInfo, 0, 0);
+    Connect(splitter, merger, splitTensorInfo1, 0, 1); // The splitter & merger are connected up
+    Connect(splitter, merger, splitTensorInfo2, 1, 0); // so that the outputs are flipped round
+    Connect(merger, output, inputTensorInfo, 0, 0);
+
+    CreateTensorHandles(graph, factory);
+
+    auto workloadSplitter = MakeAndCheckWorkload<SplitterWorkload>(*splitter, graph, factory);
+    auto workloadMerger = MakeAndCheckWorkload<MergerWorkload>(*merger, graph, factory);
+
+    return {std::move(workloadSplitter), std::move(workloadMerger)};
+}
+
+
+/// This function constructs a graph with a splitter with two outputs. Each of the outputs is then
+/// connected to two different activation layers
+template<typename SplitterWorkload, typename ActivationWorkload>
+void CreateSplitterMultipleInputsOneOutputWorkloadTest(armnn::IWorkloadFactory& factory, armnn::Graph& graph,
+                                 std::unique_ptr<SplitterWorkload>& wlSplitter,
+                                 std::unique_ptr<ActivationWorkload>& wlActiv0_0,
+                                 std::unique_ptr<ActivationWorkload>& wlActiv0_1,
+                                 std::unique_ptr<ActivationWorkload>& wlActiv1_0,
+                                 std::unique_ptr<ActivationWorkload>& wlActiv1_1)
+{
+    static_assert(SplitterWorkload::ms_DataType == ActivationWorkload::ms_DataType,
+        "Splitter and activation workloads must have the same data type");
+
+    armnn::TensorInfo inputTensorInfo({ 1, 1, 100, 10 }, SplitterWorkload::ms_DataType);
+    armnn::TensorInfo splitTensorInfo1({ 1, 1, 60, 10 }, SplitterWorkload::ms_DataType);
+    armnn::TensorInfo splitTensorInfo2({ 1, 1, 40, 10 }, SplitterWorkload::ms_DataType);
+
+    //construct the  graph
+    Layer* const input = graph.AddLayer<InputLayer>(0, "input");
+
+    armnn::ViewsDescriptor splitterViews(2);
+    splitterViews.SetViewOriginCoord(0, 0, 0);
+    splitterViews.SetViewOriginCoord(0, 1, 0);
+    splitterViews.SetViewOriginCoord(0, 2, 0);
+    splitterViews.SetViewOriginCoord(0, 3, 0);
+
+    splitterViews.SetViewOriginCoord(1, 0, 0);
+    splitterViews.SetViewOriginCoord(1, 1, 0);
+    splitterViews.SetViewOriginCoord(1, 2, 60);
+    splitterViews.SetViewOriginCoord(1, 3, 0);
+
+    Layer* const splitter = graph.AddLayer<SplitterLayer>(splitterViews, "splitter");
+
+    armnn::ActivationDescriptor activationDesc;
+
+    Layer* const activ0_0 = graph.AddLayer<ActivationLayer>(activationDesc, "activ0_0");
+    Layer* const activ0_1 = graph.AddLayer<ActivationLayer>(activationDesc, "activ0_1");
+    Layer* const activ1_0 = graph.AddLayer<ActivationLayer>(activationDesc, "activ1_0");
+    Layer* const activ1_1 = graph.AddLayer<ActivationLayer>(activationDesc, "activ1_1");
+
+    Layer* const output1 = graph.AddLayer<OutputLayer>(1, "output1");
+    Layer* const output2 = graph.AddLayer<OutputLayer>(2, "output2");
+    Layer* const output3 = graph.AddLayer<OutputLayer>(3, "output3");
+    Layer* const output4 = graph.AddLayer<OutputLayer>(4, "output4");
+
+    // add connections
+    Connect(input, splitter, inputTensorInfo, 0, 0);
+    Connect(splitter, activ0_0, splitTensorInfo1, 0, 0);
+    Connect(splitter, activ0_1, splitTensorInfo1, 0, 0);
+
+    Connect(splitter, activ1_0, splitTensorInfo2, 1, 0);
+    Connect(splitter, activ1_1, splitTensorInfo2, 1, 0);
+
+    Connect(activ0_0, output1, splitTensorInfo1, 0, 0);
+    Connect(activ0_1, output2, splitTensorInfo1, 0, 0);
+    Connect(activ1_0, output3, splitTensorInfo2, 0, 0);
+    Connect(activ1_1, output4, splitTensorInfo2, 0, 0);
+
+    CreateTensorHandles(graph, factory);
+
+    auto workloadSplitter = MakeAndCheckWorkload<SplitterWorkload>(*splitter, graph, factory);
+    auto workloadActiv0_0 = MakeAndCheckWorkload<ActivationWorkload>(*activ0_0, graph, factory);
+    auto workloadActiv0_1 = MakeAndCheckWorkload<ActivationWorkload>(*activ0_1, graph, factory);
+    auto workloadActiv1_0 = MakeAndCheckWorkload<ActivationWorkload>(*activ1_0, graph, factory);
+    auto workloadActiv1_1 = MakeAndCheckWorkload<ActivationWorkload>(*activ1_1, graph, factory);
+
+    wlSplitter = std::move(workloadSplitter);
+    wlActiv0_0 = std::move(workloadActiv0_0);
+    wlActiv0_1 = std::move(workloadActiv0_1);
+    wlActiv1_0 = std::move(workloadActiv1_0);
+    wlActiv1_1 = std::move(workloadActiv1_1);
+}
+
+template <typename ResizeBilinearWorkload>
+std::unique_ptr<ResizeBilinearWorkload> CreateResizeBilinearWorkloadTest(armnn::IWorkloadFactory& factory,
+    armnn::Graph& graph)
+{
+    // create the layer we're testing
+    TensorShape outputShape({ 2, 3, 2, 2 });
+    ResizeBilinearDescriptor resizeDesc;
+    resizeDesc.m_TargetWidth = outputShape[3];
+    resizeDesc.m_TargetHeight = outputShape[2];
+    Layer* const layer = graph.AddLayer<ResizeBilinearLayer>(resizeDesc, "layer");
+
+    // create extra layers
+    Layer* const input = graph.AddLayer<InputLayer>(0, "input");
+    Layer* const output = graph.AddLayer<OutputLayer>(0, "output");
+
+    // connect up
+    armnn::TensorInfo inputTensorInfo({ 2, 3, 4, 4 }, ResizeBilinearWorkload::ms_DataType);
+    armnn::TensorInfo outputTensorInfo(outputShape, ResizeBilinearWorkload::ms_DataType);
+    Connect(input, layer, inputTensorInfo);
+    Connect(layer, output, outputTensorInfo);
+    CreateTensorHandles(graph, factory);
+
+    // make the workload and check it
+    auto workload = MakeAndCheckWorkload<ResizeBilinearWorkload>(*layer, graph, factory);
+
+    ResizeBilinearQueueDescriptor queueDescriptor = workload->GetData();
+    BOOST_TEST(queueDescriptor.m_Inputs.size() == 1);
+    BOOST_TEST(queueDescriptor.m_Outputs.size() == 1);
+
+    // return so we can do extra, backend-specific tests
+    return workload;
+}
+
+template <typename L2NormalizationWorkload>
+std::unique_ptr<L2NormalizationWorkload> CreateL2NormalizationWorkloadTest(armnn::IWorkloadFactory& factory,
+    armnn::Graph& graph)
+{
+    // create the layer we're testing
+    Layer* const layer = graph.AddLayer<L2NormalizationLayer>("l2norm");
+
+    // create extra layers
+    Layer* const input = graph.AddLayer<InputLayer>(0, "input");
+    Layer* const output = graph.AddLayer<OutputLayer>(0, "output");
+
+    // connect up
+    armnn::TensorInfo inputTensorInfo({ 5, 20, 50, 67 }, L2NormalizationWorkload::ms_DataType);
+    armnn::TensorInfo outputTensorInfo({ 5, 20, 50, 67 }, L2NormalizationWorkload::ms_DataType);
+    Connect(input, layer, inputTensorInfo);
+    Connect(layer, output, outputTensorInfo);
+    CreateTensorHandles(graph, factory);
+
+    // make the workload and check it
+    auto workload = MakeAndCheckWorkload<L2NormalizationWorkload>(*layer, graph, factory);
+
+    L2NormalizationQueueDescriptor queueDescriptor = workload->GetData();
+    BOOST_TEST(queueDescriptor.m_Inputs.size() == 1);
+    BOOST_TEST(queueDescriptor.m_Outputs.size() == 1);
+
+    // return so we can do extra, backend-specific tests
+    return workload;
+}
+
+template <typename ReshapeWorkload>
+std::unique_ptr<ReshapeWorkload> CreateReshapeWorkloadTest(armnn::IWorkloadFactory& factory,
+    armnn::Graph& graph)
+{
+    // create the layer we're testing
+    TensorShape outputShape({ 1, 4 });
+    ReshapeDescriptor reshapeDesc;
+    reshapeDesc.m_TargetShape = outputShape;
+    Layer* const layer = graph.AddLayer<ReshapeLayer>(reshapeDesc, "layer");
+
+    // create extra layers
+    Layer* const input = graph.AddLayer<InputLayer>(0, "input");
+    Layer* const output = graph.AddLayer<OutputLayer>(0, "output");
+
+    // connect up
+    armnn::TensorInfo inputTensorInfo({ 4, 1 }, ReshapeWorkload::ms_DataType);
+    armnn::TensorInfo outputTensorInfo(outputShape, ReshapeWorkload::ms_DataType);
+    Connect(input, layer, inputTensorInfo);
+    Connect(layer, output, outputTensorInfo);
+    CreateTensorHandles(graph, factory);
+
+    // make the workload and check it
+    auto workload = MakeAndCheckWorkload<ReshapeWorkload>(*layer, graph, factory);
+
+    ReshapeQueueDescriptor queueDescriptor = workload->GetData();
+    BOOST_TEST(queueDescriptor.m_Inputs.size() == 1);
+    BOOST_TEST(queueDescriptor.m_Outputs.size() == 1);
+
+    // return so we can do extra, backend-specific tests
+    return workload;
+}
+
+}
diff --git a/src/armnn/test/CreateWorkloadClNeon.hpp b/src/armnn/test/CreateWorkloadClNeon.hpp
new file mode 100644
index 0000000000..a41a70755f
--- /dev/null
+++ b/src/armnn/test/CreateWorkloadClNeon.hpp
@@ -0,0 +1,107 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// See LICENSE file in the project root for full license information.
+//
+#pragma once
+
+#include "CreateWorkload.hpp"
+#include "backends/RefWorkloadFactory.hpp"
+
+#if ARMCOMPUTECL_ENABLED
+#include "backends/ClTensorHandle.hpp"
+#endif
+
+#if ARMCOMPUTENEON_ENABLED
+#include "backends/NeonTensorHandle.hpp"
+#endif
+
+
+using namespace armnn;
+
+namespace
+{
+
+using namespace std;
+
+template<typename IComputeTensorHandle>
+boost::test_tools::predicate_result CompareTensorHandleShape(IComputeTensorHandle*               tensorHandle,
+                                                             std::initializer_list<unsigned int> expectedDimensions)
+{
+    arm_compute::ITensorInfo* info = tensorHandle->GetTensor().info();
+
+    auto infoNumDims = info->num_dimensions();
+    auto numExpectedDims = expectedDimensions.size();
+    if (infoNumDims != numExpectedDims)
+    {
+        boost::test_tools::predicate_result res(false);
+        res.message() << "Different number of dimensions [" << info->num_dimensions()
+                      << "!=" << expectedDimensions.size() << "]";
+        return res;
+    }
+
+    size_t i = info->num_dimensions() - 1;
+
+    for (unsigned int expectedDimension : expectedDimensions)
+    {
+        if (info->dimension(i) != expectedDimension)
+        {
+            boost::test_tools::predicate_result res(false);
+            res.message() << "Different dimension [" << info->dimension(i) << "!=" << expectedDimension << "]";
+            return res;
+        }
+
+        i--;
+    }
+
+    return true;
+}
+
+template<template <DataType> class CopyFromCpuWorkload, template <DataType> class CopyToCpuWorkload,
+    typename IComputeTensorHandle>
+void CreateMemCopyWorkloads(IWorkloadFactory& factory)
+{
+    Graph graph;
+    RefWorkloadFactory refFactory;
+
+    // create the layers we're testing
+    Layer* const layer1 = graph.AddLayer<MemCopyLayer>("layer1");
+    Layer* const layer2 = graph.AddLayer<MemCopyLayer>("layer2");
+
+    // create extra layers
+    Layer* const input = graph.AddLayer<InputLayer>(0, "input");
+    Layer* const output = graph.AddLayer<OutputLayer>(0, "output");
+
+    // connect up
+    TensorInfo tensorInfo({2, 3}, DataType::Float32);
+    Connect(input, layer1, tensorInfo);
+    Connect(layer1, layer2, tensorInfo);
+    Connect(layer2, output, tensorInfo);
+
+    input->CreateTensorHandles(graph, refFactory);
+    layer1->CreateTensorHandles(graph, factory);
+    layer2->CreateTensorHandles(graph, refFactory);
+    output->CreateTensorHandles(graph, refFactory);
+
+    // make the workloads and check them
+    auto workload1 = MakeAndCheckWorkload<CopyFromCpuWorkload<DataType::Float32>>(*layer1, graph, factory);
+    auto workload2 = MakeAndCheckWorkload<CopyToCpuWorkload<DataType::Float32>>(*layer2, graph, refFactory);
+
+    MemCopyQueueDescriptor queueDescriptor1 = workload1->GetData();
+    BOOST_TEST(queueDescriptor1.m_Inputs.size() == 1);
+    BOOST_TEST(queueDescriptor1.m_Outputs.size() == 1);
+    auto inputHandle1  = boost::polymorphic_downcast<ConstCpuTensorHandle*>(queueDescriptor1.m_Inputs[0]);
+    auto outputHandle1 = boost::polymorphic_downcast<IComputeTensorHandle*>(queueDescriptor1.m_Outputs[0]);
+    BOOST_TEST((inputHandle1->GetTensorInfo() == TensorInfo({2, 3}, DataType::Float32)));
+    BOOST_TEST(CompareTensorHandleShape<IComputeTensorHandle>(outputHandle1, {2, 3}));
+
+
+    MemCopyQueueDescriptor queueDescriptor2 = workload2->GetData();
+    BOOST_TEST(queueDescriptor2.m_Inputs.size() == 1);
+    BOOST_TEST(queueDescriptor2.m_Outputs.size() == 1);
+    auto inputHandle2  = boost::polymorphic_downcast<IComputeTensorHandle*>(queueDescriptor2.m_Inputs[0]);
+    auto outputHandle2 = boost::polymorphic_downcast<CpuTensorHandle*>(queueDescriptor2.m_Outputs[0]);
+    BOOST_TEST(CompareTensorHandleShape<IComputeTensorHandle>(inputHandle2, {2, 3}));
+    BOOST_TEST((outputHandle2->GetTensorInfo() == TensorInfo({2, 3}, DataType::Float32)));
+}
+
+}
+\ No newline at end of file
diff --git a/src/armnn/test/EndToEndTest.cpp b/src/armnn/test/EndToEndTest.cpp
new file mode 100644
index 0000000000..77a1f071a8
--- /dev/null
+++ b/src/armnn/test/EndToEndTest.cpp
@@ -0,0 +1,411 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// See LICENSE file in the project root for full license information.
+//
+#include <boost/test/unit_test.hpp>
+
+#include "armnn/Descriptors.hpp"
+#include "armnn/IRuntime.hpp"
+#include "armnn/INetwork.hpp"
+
+#include "backends/test/QuantizeHelper.hpp"
+#include <boost/core/ignore_unused.hpp>
+
+BOOST_AUTO_TEST_SUITE(EndToEnd)
+
+namespace
+{
+template<typename T>
+bool IsFloatIterFunc(T iter)
+{
+    boost::ignore_unused(iter);
+    return IsFloatingPointIterator<T>::value;
+}
+} //namespace
+
+BOOST_AUTO_TEST_CASE(QuantizedHelper)
+{
+    std::vector<float> fArray;
+    BOOST_TEST(IsFloatIterFunc(fArray.begin()) == true);
+    BOOST_TEST(IsFloatIterFunc(fArray.cbegin()) == true);
+
+    std::vector<double> dArray;
+    BOOST_TEST(IsFloatIterFunc(dArray.begin()) == true);
+
+    std::vector<int> iArray;
+    BOOST_TEST(IsFloatIterFunc(iArray.begin()) == false);
+
+    float floats[5];
+    BOOST_TEST(IsFloatIterFunc(&floats[0]) == true);
+
+    int ints[5];
+    BOOST_TEST(IsFloatIterFunc(&ints[0]) == false);
+}
+
+BOOST_AUTO_TEST_CASE(Unsigned8)
+{
+    using namespace armnn;
+
+    // Create runtime in which test will run
+    armnn::IRuntimePtr runtime(armnn::IRuntime::Create(armnn::Compute::CpuRef));
+
+    // build up the structure of the network
+    armnn::INetworkPtr net(INetwork::Create());
+
+    IConnectableLayer* input = net->AddInputLayer(0, "input");
+    IConnectableLayer* softmax = net->AddSoftmaxLayer(SoftmaxDescriptor(), "softmax");
+    IConnectableLayer* output  = net->AddOutputLayer(0, "output");
+
+    input->GetOutputSlot(0).Connect(softmax->GetInputSlot(0));
+    softmax->GetOutputSlot(0).Connect(output->GetInputSlot(0));
+
+    // set the tensors in the network
+    TensorInfo inputTensorInfo(TensorShape({1, 5}), DataType::QuantisedAsymm8);
+    inputTensorInfo.SetQuantizationOffset(100);
+    inputTensorInfo.SetQuantizationScale(10000.0f);
+    input->GetOutputSlot(0).SetTensorInfo(inputTensorInfo);
+
+    TensorInfo outputTensorInfo(TensorShape({1, 5}), DataType::QuantisedAsymm8);
+    outputTensorInfo.SetQuantizationOffset(0);
+    outputTensorInfo.SetQuantizationScale(1.0f/255.0f);
+    softmax->GetOutputSlot(0).SetTensorInfo(outputTensorInfo);
+
+    // optimize the network
+    IOptimizedNetworkPtr optNet = Optimize(*net, runtime->GetDeviceSpec());
+
+    // load it into the runtime
+    NetworkId netId;
+    runtime->LoadNetwork(netId, std::move(optNet));
+
+    // create structures for input & output
+    std::vector<uint8_t> inputData
+    {
+        1, 10, 3, 200, 5 // some inputs - one of which is sufficiently larger than the others to saturate softmax
+    };
+    std::vector<uint8_t> outputData(5);
+
+    armnn::InputTensors inputTensors
+    {
+        {0, armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 0), inputData.data())}
+    };
+    armnn::OutputTensors outputTensors
+    {
+        {0, armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), outputData.data())}
+    };
+
+    // do the inference
+    runtime->EnqueueWorkload(netId, inputTensors, outputTensors);
+
+    // check the results
+    BOOST_TEST(outputData[0] == 0);
+    BOOST_TEST(outputData[1] == 0);
+    BOOST_TEST(outputData[2] == 0);
+    BOOST_TEST(outputData[3] == 255); // softmax has been saturated
+    BOOST_TEST(outputData[4] == 0);
+}
+
+template <typename T>
+void ConstantUsageTest(armnn::Compute computeDevice,
+    const armnn::TensorInfo& commonTensorInfo,
+    const std::vector<T>& inputData,
+    const std::vector<T>& constantData,
+    const std::vector<T>& expectedOutputData)
+{
+    using namespace armnn;
+
+    // Create runtime in which test will run
+    armnn::IRuntimePtr runtime(armnn::IRuntime::Create(computeDevice));
+
+    // build up the structure of the network
+    INetworkPtr net(INetwork::Create());
+
+    IConnectableLayer* input = net->AddInputLayer(0);
+    IConnectableLayer* constant = net->AddConstantLayer(ConstTensor(commonTensorInfo, constantData));
+    IConnectableLayer* add = net->AddAdditionLayer();
+    IConnectableLayer* output = net->AddOutputLayer(0);
+
+    input->GetOutputSlot(0).Connect(add->GetInputSlot(0));
+    constant->GetOutputSlot(0).Connect(add->GetInputSlot(1));
+    add->GetOutputSlot(0).Connect(output->GetInputSlot(0));
+
+    // set the tensors in the network
+    input->GetOutputSlot(0).SetTensorInfo(commonTensorInfo);
+    constant->GetOutputSlot(0).SetTensorInfo(commonTensorInfo);
+    add->GetOutputSlot(0).SetTensorInfo(commonTensorInfo);
+
+    // optimize the network
+    IOptimizedNetworkPtr optNet = Optimize(*net, runtime->GetDeviceSpec());
+
+    // load it into the runtime
+    NetworkId netId;
+    runtime->LoadNetwork(netId, std::move(optNet));
+
+    // create structures for input & output
+    std::vector<T> outputData(inputData.size());
+
+    InputTensors inputTensors
+    {
+        {0, armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 0), inputData.data())}
+    };
+    OutputTensors outputTensors
+    {
+        {0, armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), outputData.data())}
+    };
+
+    // do the inference
+    runtime->EnqueueWorkload(netId, inputTensors, outputTensors);
+
+    // check the results
+    BOOST_TEST(outputData == expectedOutputData);
+}
+
+static void ConstantUsageFloat32Test(armnn::Compute computeDevice)
+{
+    const armnn::TensorInfo commonTensorInfo({ 2, 3 }, armnn::DataType::Float32);
+
+    ConstantUsageTest(computeDevice,
+        commonTensorInfo,
+        std::vector<float>{ 1.f, 2.f, 3.f, 4.f, 5.f, 6.f }, // input
+        std::vector<float>{ 6.f, 5.f, 4.f, 3.f, 2.f, 1.f }, // const input
+        std::vector<float>{ 7.f, 7.f, 7.f, 7.f, 7.f, 7.f }  // expected output
+    );
+}
+
+static void ConstantUsageUint8Test(armnn::Compute computeDevice)
+{
+    armnn::TensorInfo commonTensorInfo({ 2, 3 }, armnn::DataType::QuantisedAsymm8);
+
+    const float scale = 0.023529f;
+    const int8_t offset = -43;
+
+    commonTensorInfo.SetQuantizationScale(scale);
+    commonTensorInfo.SetQuantizationOffset(offset);
+
+    ConstantUsageTest(computeDevice,
+        commonTensorInfo,
+        QuantizedVector<uint8_t>(scale, offset, { 1.f, 2.f, 3.f, 4.f, 5.f, 6.f }), // input
+        QuantizedVector<uint8_t>(scale, offset, { 6.f, 5.f, 4.f, 3.f, 2.f, 1.f }), // const input
+        QuantizedVector<uint8_t>(scale, offset, { 7.f, 7.f, 7.f, 7.f, 7.f, 7.f })  // expected output
+    );
+}
+
+BOOST_AUTO_TEST_CASE(ConstantUsage_Ref_Float32)
+{
+    ConstantUsageFloat32Test(armnn::Compute::CpuRef);
+}
+
+#if ARMCOMPUTENEON_ENABLED
+BOOST_AUTO_TEST_CASE(ConstantUsage_Neon_Float32)
+{
+    ConstantUsageFloat32Test(armnn::Compute::CpuAcc);
+}
+#endif
+
+#if ARMCOMPUTECL_ENABLED
+BOOST_AUTO_TEST_CASE(ConstantUsage_Cl_Float32)
+{
+    ConstantUsageFloat32Test(armnn::Compute::GpuAcc);
+}
+#endif
+
+BOOST_AUTO_TEST_CASE(ConstantUsage_Ref_Uint8)
+{
+    ConstantUsageUint8Test(armnn::Compute::CpuRef);
+}
+
+BOOST_AUTO_TEST_CASE(TrivialAdd)
+{
+    // This test was designed to match "AddTwo" in android nn/runtime/test/TestTrivialModel.cpp
+
+    using namespace armnn;
+
+    // Create runtime in which test will run
+    armnn::IRuntimePtr runtime(armnn::IRuntime::Create(armnn::Compute::CpuRef));
+
+    // build up the structure of the network
+    armnn::INetworkPtr net(INetwork::Create());
+
+    IConnectableLayer* input1 = net->AddInputLayer(0);
+    IConnectableLayer* input2 = net->AddInputLayer(1);
+    IConnectableLayer* add    = net->AddAdditionLayer();
+    IConnectableLayer* output = net->AddOutputLayer(0);
+
+    input1->GetOutputSlot(0).Connect(add->GetInputSlot(0));
+    input2->GetOutputSlot(0).Connect(add->GetInputSlot(1));
+    add->GetOutputSlot(0).Connect(output->GetInputSlot(0));
+
+    // set the tensors in the network
+    TensorInfo tensorInfo(TensorShape({3, 4}), DataType::Float32);
+    input1->GetOutputSlot(0).SetTensorInfo(tensorInfo);
+    input2->GetOutputSlot(0).SetTensorInfo(tensorInfo);
+    add->GetOutputSlot(0).SetTensorInfo(tensorInfo);
+
+    // optimize the network
+    IOptimizedNetworkPtr optNet = Optimize(*net, runtime->GetDeviceSpec());
+
+    // load it into the runtime
+    NetworkId netId;
+    runtime->LoadNetwork(netId, std::move(optNet));
+
+    // create structures for input & output - matching android nn test
+    std::vector<float> input1Data
+    {
+        1.f, 2.f, 3.f, 4.f, 5.f, 6.f, 7.f, 8.f, 9.f, 10.f, 11.f, 12.f
+    };
+    std::vector<float> input2Data
+    {
+        100.f, 200.f, 300.f, 400.f, 500.f, 600.f, 700.f, 800.f, 900.f, 1000.f, 1100.f, 1200.f
+    };
+    std::vector<float> outputData(12);
+
+    InputTensors inputTensors
+    {
+        {0,armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 0), input1Data.data())},
+        {1,armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 0), input2Data.data())}
+    };
+    OutputTensors outputTensors
+    {
+        {0,armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), outputData.data())}
+    };
+
+    // do the inference
+    runtime->EnqueueWorkload(netId, inputTensors, outputTensors);
+
+    // check the results
+    BOOST_TEST(outputData[0] == 101);
+    BOOST_TEST(outputData[1] == 202);
+    BOOST_TEST(outputData[2] == 303);
+    BOOST_TEST(outputData[3] == 404);
+    BOOST_TEST(outputData[4] == 505);
+    BOOST_TEST(outputData[5] == 606);
+    BOOST_TEST(outputData[6] == 707);
+    BOOST_TEST(outputData[7] == 808);
+    BOOST_TEST(outputData[8] == 909);
+    BOOST_TEST(outputData[9] == 1010);
+    BOOST_TEST(outputData[10] == 1111);
+    BOOST_TEST(outputData[11] == 1212);
+}
+
+BOOST_AUTO_TEST_CASE(MultipleOutputs)
+{
+    using namespace armnn;
+
+    // Create runtime in which test will run
+    armnn::IRuntimePtr  runtime(armnn::IRuntime::Create(armnn::Compute::CpuRef));
+
+    // build up the structure of the network
+    INetworkPtr net(INetwork::Create());
+
+    IConnectableLayer* input = net->AddInputLayer(0);
+
+    // ReLu1
+    ActivationDescriptor activation1Descriptor;
+    activation1Descriptor.m_Function = ActivationFunction::BoundedReLu;
+    activation1Descriptor.m_A = 1.f;
+    activation1Descriptor.m_B = -1.f;
+    IConnectableLayer* activation1 = net->AddActivationLayer(activation1Descriptor);
+
+    // ReLu6
+    ActivationDescriptor activation2Descriptor;
+    activation2Descriptor.m_Function = ActivationFunction::BoundedReLu;
+    activation2Descriptor.m_A = 6.0f;
+    IConnectableLayer* activation2 = net->AddActivationLayer(activation2Descriptor);
+
+    // BoundedReLu(min=2, max=5)
+    ActivationDescriptor activation3Descriptor;
+    activation3Descriptor.m_Function = ActivationFunction::BoundedReLu;
+    activation3Descriptor.m_A = 5.0f;
+    activation3Descriptor.m_B = 2.0f;
+    IConnectableLayer* activation3 = net->AddActivationLayer(activation3Descriptor);
+
+    IConnectableLayer* output1 = net->AddOutputLayer(0);
+    IConnectableLayer* output2 = net->AddOutputLayer(1);
+    IConnectableLayer* output3 = net->AddOutputLayer(2);
+
+    input->GetOutputSlot(0).Connect(activation1->GetInputSlot(0));
+    input->GetOutputSlot(0).Connect(activation2->GetInputSlot(0));
+    input->GetOutputSlot(0).Connect(activation3->GetInputSlot(0));
+
+    activation1->GetOutputSlot(0).Connect(output1->GetInputSlot(0));
+    activation2->GetOutputSlot(0).Connect(output2->GetInputSlot(0));
+    activation3->GetOutputSlot(0).Connect(output3->GetInputSlot(0));
+
+    // set the tensors in the network
+    TensorInfo tensorInfo(TensorShape({ 10 }), DataType::Float32);
+    input->GetOutputSlot(0).SetTensorInfo(tensorInfo);
+    activation1->GetOutputSlot(0).SetTensorInfo(tensorInfo);
+    activation2->GetOutputSlot(0).SetTensorInfo(tensorInfo);
+    activation3->GetOutputSlot(0).SetTensorInfo(tensorInfo);
+
+    // optimize the network
+    IOptimizedNetworkPtr optNet = Optimize(*net, runtime->GetDeviceSpec());
+
+    // load it into the runtime
+    NetworkId netId;
+    runtime->LoadNetwork(netId, std::move(optNet));
+
+    // create structures for input & output
+    const std::vector<float> inputData{ 3.f, 5.f, 2.f, 3.f, 7.f, 0.f, -2.f, -1.f, 3.f, 3.f };
+
+    std::vector<float> output1Data(inputData.size());
+    std::vector<float> output2Data(inputData.size());
+    std::vector<float> output3Data(inputData.size());
+
+    InputTensors inputTensors
+    {
+        {0,armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 0), inputData.data())}
+    };
+    OutputTensors outputTensors
+    {
+        {0,armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), output1Data.data())},
+        {1,armnn::Tensor(runtime->GetOutputTensorInfo(netId, 1), output2Data.data())},
+        {2,armnn::Tensor(runtime->GetOutputTensorInfo(netId, 2), output3Data.data())}
+    };
+
+    // do the inference
+    runtime->EnqueueWorkload(netId, inputTensors, outputTensors);
+
+    // check the results
+    BOOST_TEST(output1Data == std::vector<float>({ 1.f, 1.f, 1.f, 1.f, 1.f, 0.f, -1.f, -1.f, 1.f, 1.f })); // ReLu1
+    BOOST_TEST(output2Data == std::vector<float>({ 3.f, 5.f, 2.f, 3.f, 6.f, 0.f, 0.f, 0.f, 3.f, 3.f })); // ReLu6
+    BOOST_TEST(output3Data == std::vector<float>({ 3.f, 5.f, 2.f, 3.f, 5.f, 2.f, 2.f, 2.f, 3.f, 3.f })); // [2, 5]
+}
+
+#if ARMCOMPUTENEON_ENABLED
+BOOST_AUTO_TEST_CASE(ErrorOnLoadNetwork)
+{
+    using namespace armnn;
+
+    // Create runtime in which test will run
+    // Note we don't allow falling back to CpuRef if an operation (excluding inputs, outputs, etc.) isn't supported
+    armnn::IRuntime::CreationOptions options(armnn::Compute::CpuAcc);
+    options.m_UseCpuRefAsFallback = false;
+    armnn::IRuntimePtr runtime(armnn::IRuntime::Create(options));
+
+    // build up the structure of the network
+    INetworkPtr net(INetwork::Create());
+
+    IConnectableLayer* input = net->AddInputLayer(0);
+
+    // This layer configuration isn't supported by CpuAcc and isn't allowed to fall back, so LoadNetwork will fail.
+    NormalizationDescriptor descriptor;
+    IConnectableLayer* pooling = net->AddNormalizationLayer(descriptor);
+
+    IConnectableLayer* output = net->AddOutputLayer(0);
+
+    input->GetOutputSlot(0).Connect(pooling->GetInputSlot(0));
+    pooling->GetOutputSlot(0).Connect(output->GetInputSlot(0));
+
+    input->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 4, 4 }, DataType::Float32));
+    pooling->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 4, 4 }, DataType::Float32));
+
+    // optimize the network
+    IOptimizedNetworkPtr optNet = Optimize(*net, runtime->GetDeviceSpec());
+
+    // Load it into the runtime. It should fail.
+    NetworkId netId;
+    BOOST_TEST(runtime->LoadNetwork(netId, std::move(optNet)) == Status::Failure);
+}
+#endif // ARMCOMPUTENEON_ENABLED
+
+BOOST_AUTO_TEST_SUITE_END()
diff --git a/src/armnn/test/GraphTests.cpp b/src/armnn/test/GraphTests.cpp
new file mode 100644
index 0000000000..473cda1247
--- /dev/null
+++ b/src/armnn/test/GraphTests.cpp
@@ -0,0 +1,497 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// See LICENSE file in the project root for full license information.
+//
+#include <boost/test/unit_test.hpp>
+
+#include "armnn/ArmNN.hpp"
+#include "Graph.hpp"
+#include "Layer.hpp"
+#include "Layers.hpp"
+#include "armnn/TypesUtils.hpp"
+#include "armnn/Exceptions.hpp"
+
+#include "GraphUtils.hpp"
+#include "backends/CpuTensorHandle.hpp"
+
+#include <boost/cast.hpp>
+
+/// checks that first comes before second in the order
+bool CheckOrder(const armnn::Graph& graph, const armnn::Layer* first, const armnn::Layer* second)
+{
+    graph.Print();
+
+    const auto& order = graph.TopologicalSort();
+
+    auto firstPos = std::find(order.begin(), order.end(), first);
+    auto secondPos = std::find(firstPos, order.end(), second);
+
+    return (secondPos != order.end());
+}
+
+static armnn::Layer* GetFirstLayerWithName(armnn::Graph& graph, const std::string& name)
+{
+    for (auto&& layer : graph)
+    {
+        if (layer->GetNameStr() == name)
+        {
+            return layer;
+        }
+    }
+    return nullptr;
+}
+
+BOOST_AUTO_TEST_SUITE(Graph)
+
+BOOST_AUTO_TEST_CASE(ClassGraph)
+{
+    armnn::Graph graph;
+    BOOST_CHECK_NO_THROW(graph.AddLayer<armnn::InputLayer>(0, "layerA"));
+    BOOST_TEST(GraphHasNamedLayer(graph, "layerA"));
+}
+
+BOOST_AUTO_TEST_CASE(TopologicalSort)
+{
+    armnn::Graph graph;
+
+    armnn::ActivationDescriptor activationDefaults;
+
+    BOOST_CHECK_NO_THROW(graph.AddLayer<armnn::InputLayer>(0, "layerA"));
+    BOOST_CHECK_NO_THROW(graph.AddLayer<armnn::ActivationLayer>(activationDefaults, "layerB"));
+    BOOST_CHECK_NO_THROW(graph.AddLayer<armnn::AdditionLayer>("layerC"));
+    BOOST_CHECK_NO_THROW(graph.AddLayer<armnn::OutputLayer>(0, "output"));
+    BOOST_CHECK_NO_THROW(graph.AddLayer<armnn::ActivationLayer>(activationDefaults, "layerD"));
+    BOOST_CHECK_NO_THROW(graph.AddLayer<armnn::ActivationLayer>(activationDefaults, "layerE"));
+
+    armnn::Layer* const layerA = GetFirstLayerWithName(graph, "layerA");
+    armnn::Layer* const layerB = GetFirstLayerWithName(graph, "layerB");
+    armnn::Layer* const layerC = GetFirstLayerWithName(graph, "layerC");
+    armnn::Layer* const layerO = GetFirstLayerWithName(graph, "output");
+    armnn::Layer* const layerE = GetFirstLayerWithName(graph, "layerE");
+    armnn::Layer* const layerD = GetFirstLayerWithName(graph, "layerD");
+
+    // simple graph which branches and rejoins
+    //    A
+    //   / \'
+    //  D   E
+    //   \  |
+    //    \ B
+    //     \|
+    //      C
+    layerA->GetOutputSlot(0).Connect(layerD->GetInputSlot(0));
+    layerA->GetOutputSlot(0).Connect(layerE->GetInputSlot(0));
+    layerE->GetOutputSlot(0).Connect(layerB->GetInputSlot(0));
+    layerD->GetOutputSlot(0).Connect(layerC->GetInputSlot(0));
+    layerB->GetOutputSlot(0).Connect(layerC->GetInputSlot(1));
+    layerC->GetOutputSlot(0).Connect(layerO->GetInputSlot(0));
+
+    // check order is valid
+    BOOST_TEST(CheckOrder(graph, layerA, layerD));
+    BOOST_TEST(CheckOrder(graph, layerA, layerE));
+    BOOST_TEST(CheckOrder(graph, layerD, layerC));
+    BOOST_TEST(CheckOrder(graph, layerE, layerB));
+    BOOST_TEST(CheckOrder(graph, layerB, layerC));
+}
+
+BOOST_AUTO_TEST_CASE(InsertNewLayer)
+{
+    armnn::Graph graph;
+    armnn::TensorInfo tensorInfo({ 1, 1, 1, 1 }, armnn::DataType::Float32);
+
+    std::vector<armnn::Layer*> order;
+
+    armnn::ActivationDescriptor activationDefaults;
+    BOOST_CHECK_NO_THROW(graph.AddLayer<armnn::InputLayer>(0, "layerA"));
+    BOOST_CHECK_NO_THROW(graph.AddLayer<armnn::ActivationLayer>(activationDefaults, "layerB"));
+    BOOST_CHECK_NO_THROW(graph.AddLayer<armnn::ActivationLayer>(activationDefaults, "layerC"));
+    BOOST_CHECK_NO_THROW(graph.AddLayer<armnn::AdditionLayer>("layerD"));
+    BOOST_CHECK_NO_THROW(graph.AddLayer<armnn::OutputLayer>(0, "output"));
+
+    armnn::Layer* const layerA = GetFirstLayerWithName(graph, "layerA");
+    armnn::Layer* const layerB = GetFirstLayerWithName(graph, "layerB");
+    armnn::Layer* const layerC = GetFirstLayerWithName(graph, "layerC");
+    armnn::Layer* const layerD = GetFirstLayerWithName(graph, "layerD");
+    armnn::Layer* const layerO = GetFirstLayerWithName(graph, "output");
+
+    //    A
+    //   / \'
+    //  B   C
+    //   \ /
+    //    D
+    layerA->GetOutputSlot(0).SetTensorInfo(tensorInfo);
+    layerB->GetOutputSlot(0).SetTensorInfo(tensorInfo);
+    layerC->GetOutputSlot(0).SetTensorInfo(tensorInfo);
+    layerD->GetOutputSlot(0).SetTensorInfo(tensorInfo);
+
+    layerA->GetOutputSlot(0).Connect(layerB->GetInputSlot(0));
+    layerA->GetOutputSlot(0).Connect(layerC->GetInputSlot(0));
+    layerB->GetOutputSlot(0).Connect(layerD->GetInputSlot(0));
+    layerC->GetOutputSlot(0).Connect(layerD->GetInputSlot(1));
+    layerD->GetOutputSlot(0).Connect(layerO->GetInputSlot(0));
+
+    // check order is valid
+    BOOST_TEST(CheckOrder(graph, layerA, layerB));
+    BOOST_TEST(CheckOrder(graph, layerA, layerC));
+    BOOST_TEST(CheckOrder(graph, layerB, layerD));
+    BOOST_TEST(CheckOrder(graph, layerC, layerD));
+
+    //    A
+    //   / \'
+    //  B   C
+    //   \  |
+    //    \ E
+    //     \|
+    //      D
+    BOOST_CHECK_NO_THROW(graph.InsertNewLayer<armnn::ActivationLayer>(layerD->GetInputSlot(1),
+                                                                      activationDefaults,
+                                                                      "layerE"));
+
+    armnn::Layer* const layerE = GetFirstLayerWithName(graph, "layerE");
+
+    // check order is valid
+    BOOST_TEST(CheckOrder(graph, layerA, layerB));
+    BOOST_TEST(CheckOrder(graph, layerA, layerC));
+    BOOST_TEST(CheckOrder(graph, layerB, layerD));
+    BOOST_TEST(CheckOrder(graph, layerC, layerE));
+    BOOST_TEST(CheckOrder(graph, layerE, layerD));
+
+    //      A
+    //     /|
+    //    / F
+    //   /  |
+    //  B   C
+    //   \  |
+    //    \ E
+    //     \|
+    //      D
+    BOOST_CHECK_NO_THROW(graph.InsertNewLayer<armnn::ActivationLayer>(layerC->GetInputSlot(0),
+                                                                      activationDefaults,
+                                                                      "layerF"));
+
+    armnn::Layer* const layerF = GetFirstLayerWithName(graph, "layerF");
+
+    // check order is valid
+    BOOST_TEST(CheckOrder(graph, layerA, layerB));
+    BOOST_TEST(CheckOrder(graph, layerA, layerF));
+    BOOST_TEST(CheckOrder(graph, layerF, layerC));
+    BOOST_TEST(CheckOrder(graph, layerB, layerD));
+    BOOST_TEST(CheckOrder(graph, layerC, layerE));
+    BOOST_TEST(CheckOrder(graph, layerE, layerD));
+}
+
+namespace
+{
+    using Edge = std::pair<const armnn::Layer*, const armnn::Layer*>;
+}
+
+static std::vector<Edge> GetEdgeList(const armnn::Graph& graph)
+{
+    std::vector<Edge> edges;
+
+    for (auto&& srcLayer: graph)
+    {
+        const unsigned int numOutputSlots = srcLayer->GetNumOutputSlots();
+        for (unsigned int s = 0; s < numOutputSlots; ++s)
+        {
+            const armnn::IOutputSlot& outputSlot = srcLayer->GetOutputSlot(s);
+            const unsigned int numConnections = outputSlot.GetNumConnections();
+            for (unsigned int c = 0; c < numConnections; ++c)
+            {
+                auto inputSlot = boost::polymorphic_downcast<const armnn::InputSlot*>(outputSlot.GetConnection(c));
+                edges.emplace_back(srcLayer, &inputSlot->GetOwningLayer());
+            }
+        }
+    }
+
+    return edges;
+}
+
+static void TestGraphAfterAddingCopyLayers(const armnn::Graph& graph, const armnn::Graph& origGraph)
+{
+    std::vector<Edge> origEdges = GetEdgeList(origGraph);
+    std::vector<Edge> newEdges = GetEdgeList(graph);
+
+    // Adding copy layers should not produce any duplicate edges
+    {
+        std::vector<Edge> sortedNewEdges = newEdges;
+        std::sort(sortedNewEdges.begin(), sortedNewEdges.end());
+
+        auto last = std::unique(sortedNewEdges.begin(), sortedNewEdges.end());
+        BOOST_CHECK_MESSAGE(last == sortedNewEdges.end(), "New graph contains duplicate edges!");
+    }
+
+    // Each new edge must be tested
+    while (!newEdges.empty())
+    {
+        const Edge edge = std::move(newEdges.back());
+        newEdges.pop_back();
+
+        // Edge present in the original graph?
+        int originalEdge = -1;
+        for (unsigned int i = 0; i < origEdges.size(); i++)
+        {
+            const Edge& origEdge = origEdges[i];
+            if (origEdge.first->GetNameStr() == edge.first->GetNameStr() &&
+                origEdge.second->GetNameStr() == edge.second->GetNameStr())
+            {
+                originalEdge = boost::numeric_cast<int>(i);
+            }
+        }
+
+        if (originalEdge != -1)
+        {
+            // Each vertex should correspond to a layer.
+            const armnn::Layer* srcLayer = edge.first;
+            const armnn::Layer* dstLayer = edge.second;
+            BOOST_TEST(srcLayer);
+            BOOST_TEST(dstLayer);
+
+            // Both layers must have the same compute device.
+            if (srcLayer && dstLayer)
+            {
+                BOOST_TEST((srcLayer->GetComputeDevice() == dstLayer->GetComputeDevice()));
+            }
+
+            // Mark edge in original graph as observed (by deleting it)
+            origEdges.erase(origEdges.begin() + originalEdge);
+        }
+        else
+        {
+            // Edge did not exist in the original graph.
+            // It must then be an edge connecting a layer and a copy layer.
+            const armnn::Layer* srcLayer = edge.first;
+            const armnn::Layer* dstLayer = edge.second;
+
+            if (srcLayer == nullptr || dstLayer == nullptr)
+            {
+                BOOST_ERROR("At least one of the two ends of a new edge (" << edge.first << ", " << edge.second << ") "
+                            "introduced after adding copy layers to a graph correspond is not known to the graph");
+                continue;
+            }
+
+            // One and only one of the two layers referenced by the edge should be present in the original graph.
+            const bool srcLayerInOrigGraph = GraphHasNamedLayer(origGraph, edge.first->GetNameStr());
+            const bool dstLayerInOrigGraph = GraphHasNamedLayer(origGraph, edge.second->GetNameStr());
+
+            if (srcLayerInOrigGraph == dstLayerInOrigGraph)
+            {
+                BOOST_ERROR("A new edge ("
+                            << edge.first->GetName()
+                            << ", "
+                            << edge.second->GetName()
+                            << ") introduced after adding copy "
+                               "layers to a graph is invalid. One of the ends should be present in the original "
+                               "graph and the other should not, but "
+                            << (srcLayerInOrigGraph ? "both are" : "none are"));
+                continue;
+            }
+
+            const armnn::Layer* copyLayer = srcLayerInOrigGraph ? edge.second : edge.first;
+            const armnn::Layer* nonCopyLayer = srcLayerInOrigGraph ? srcLayer : dstLayer;
+
+            // Find all edges connecting the copy layer to other layers
+            std::vector<Edge> adjEdges;
+            auto it = newEdges.begin();
+            while (it != newEdges.end())
+            {
+                Edge& newEdge = *it;
+                if (copyLayer == (srcLayerInOrigGraph ? newEdge.first : newEdge.second))
+                {
+                    adjEdges.push_back(newEdge);
+
+                    // Since the adjacent edge is immediately tested below, no need to consider it afterwards
+                    it = newEdges.erase(it);
+                }
+                else
+                {
+                    it++;
+                }
+            }
+
+            if (adjEdges.empty())
+            {
+                BOOST_ERROR("An edge connecting a layer and a copy layer exists, (" << edge.first << ", " <<
+                            edge.second << "),  but no other edges connecting the copy layer '" << copyLayer->GetName()
+                            << "' to other layers could be found");
+                continue;
+            }
+
+            // Test adjacent edges now
+            for (const Edge& adjEdge : adjEdges)
+            {
+                // The adjacent edge must connect the copy layer to another layer
+                const armnn::Layer* adjLayer = srcLayerInOrigGraph ? adjEdge.second : adjEdge.first;
+
+                if (!adjLayer)
+                {
+                    BOOST_ERROR("An edge (" << adjEdge.first << ", " << adjEdge.second <<") is adjacent to an edge "
+                                "connecting a layer and a copy layer, (" << edge.first << ", " << edge.second << "), "
+                                "but the non-copy layer in the former, '" << adjLayer->GetName() << "' does not "
+                                "correspond to a layer");
+                    continue;
+                }
+
+                // Both layers must have different compute devices
+                BOOST_TEST((nonCopyLayer->GetComputeDevice() != adjLayer->GetComputeDevice()));
+
+                // There must exist an edge connecting both layers directly in the original graph
+                {
+                    const armnn::Layer* origEdgeN1 = srcLayerInOrigGraph ? nonCopyLayer : adjLayer;
+                    const armnn::Layer* origEdgeN2 = srcLayerInOrigGraph ? adjLayer : nonCopyLayer;
+                    auto origEdgeIter = std::find(origEdges.begin(), origEdges.end(),
+                        Edge(origEdgeN1, origEdgeN2));
+
+                    if (origEdgeIter != origEdges.end())
+                    {
+                        origEdges.erase(origEdgeIter);
+                    }
+                    else
+                    {
+                        BOOST_ERROR("An edge (" << adjEdge.first << ", " << adjEdge.second << ") is adjacent to an "
+                            "edge connecting a layer and a copy layer, (" << edge.first << ", " << edge.second <<
+                            "), but there is no edge connecting the layers in the original graph");
+                    }
+                }
+            }
+        }
+    }
+
+    BOOST_TEST(origEdges.empty(), "Not all of the edges in the original graph correspond to paths in the new graph");
+}
+
+struct CopyLayersFixture
+{
+    CopyLayersFixture()
+    {
+        using namespace armnn;
+        using namespace std;
+
+        Layer* const inputLayer = AddLayer<InputLayer>(0, "input");
+        inputLayer->SetComputeDevice(Compute::CpuRef);
+
+        Convolution2dDescriptor convolutionDefaults;
+        Layer* const convLayer1 = AddLayer<Convolution2dLayer>(convolutionDefaults, "conv1");
+        convLayer1->SetComputeDevice(Compute::CpuRef);
+
+        inputLayer->GetOutputSlot(0).Connect(convLayer1->GetInputSlot(0));
+
+        Layer* const convLayer2 = AddLayer<Convolution2dLayer>(convolutionDefaults, "conv2");
+        convLayer2->SetComputeDevice(Compute::CpuRef);
+
+        convLayer1->GetOutputSlot(0).Connect(convLayer2->GetInputSlot(0));
+
+        armnn::OriginsDescriptor mergerDefaults(2);
+        Layer* const mergerLayer = AddLayer<MergerLayer>(mergerDefaults, "merger");
+        mergerLayer->SetComputeDevice(armnn::Compute::CpuRef);
+
+        convLayer1->GetOutputSlot(0).Connect(mergerLayer->GetInputSlot(0));
+        convLayer2->GetOutputSlot(0).Connect(mergerLayer->GetInputSlot(1));
+
+        armnn::ActivationDescriptor activationDefaults;
+        Layer* const actLayer = AddLayer<ActivationLayer>(activationDefaults, "act");
+        actLayer->SetComputeDevice(armnn::Compute::CpuRef);
+
+        mergerLayer->GetOutputSlot(0).Connect(actLayer->GetInputSlot(0));
+
+        armnn::SoftmaxDescriptor softmaxDefaults;
+        Layer* const softmaxLayer = AddLayer<SoftmaxLayer>(softmaxDefaults, "softmax");
+        softmaxLayer->SetComputeDevice(armnn::Compute::CpuRef);
+
+        actLayer->GetOutputSlot(0).Connect(softmaxLayer->GetInputSlot(0));
+
+        Layer* const outputLayer = AddLayer<OutputLayer>(0, "output");
+        outputLayer->SetComputeDevice(armnn::Compute::CpuRef);
+
+        softmaxLayer->GetOutputSlot(0).Connect(outputLayer->GetInputSlot(0));
+    }
+
+    armnn::TensorInfo m_TensorDesc;
+    armnn::Graph m_Graph;
+
+private:
+
+    template <typename LayerType, typename... Args>
+    LayerType* AddLayer(Args&&... args)
+    {
+        LayerType* const layer = m_Graph.AddLayer<LayerType>(std::forward<Args>(args)...);
+
+        for (auto slot = layer->BeginOutputSlots(); slot != layer->EndOutputSlots(); ++slot)
+        {
+            slot->SetTensorInfo(m_TensorDesc);
+        }
+
+        return layer;
+    };
+};
+
+BOOST_FIXTURE_TEST_CASE(AddCopyLayers, CopyLayersFixture)
+{
+    const armnn::Graph origGraph(m_Graph);
+    m_Graph.AddCopyLayers();
+
+    TestGraphAfterAddingCopyLayers(m_Graph, origGraph);
+}
+
+BOOST_FIXTURE_TEST_CASE(AddCopyLayersSeveralTimes, CopyLayersFixture)
+{
+    m_Graph.AddCopyLayers();
+
+    // Calling AddCopyLayers() several times should not change the connections
+    const std::vector<Edge> edges = GetEdgeList(m_Graph);
+    for (int i = 0; i < 4; ++i)
+    {
+        m_Graph.AddCopyLayers();
+        const std::vector<Edge> otherEdges = GetEdgeList(m_Graph);
+        BOOST_TEST((edges == otherEdges));
+    }
+}
+
+BOOST_AUTO_TEST_CASE(CopyLayersAddedBetweenSameLayersHaveDifferentNames)
+{
+    armnn::Graph graph;
+
+    armnn::InputLayer* const inputLayer = graph.AddLayer<armnn::InputLayer>(0, "input");
+    inputLayer->SetComputeDevice(armnn::Compute::CpuRef);
+
+    armnn::ViewsDescriptor splitterDesc(2);
+    armnn::SplitterLayer* const splitterLayer = graph.AddLayer<armnn::SplitterLayer>(splitterDesc, "splitter");
+    splitterLayer->SetComputeDevice(armnn::Compute::GpuAcc);
+
+    armnn::AdditionLayer* const additionLayer = graph.AddLayer<armnn::AdditionLayer>("addition");
+    additionLayer->SetComputeDevice(armnn::Compute::CpuRef);
+
+    armnn::OutputLayer* const outputLayer = graph.AddLayer<armnn::OutputLayer>(0, "output");
+    outputLayer->SetComputeDevice(armnn::Compute::CpuRef);
+
+    inputLayer->GetOutputSlot(0).Connect(splitterLayer->GetInputSlot(0));
+    splitterLayer->GetOutputSlot(0).Connect(additionLayer->GetInputSlot(0));
+    splitterLayer->GetOutputSlot(1).Connect(additionLayer->GetInputSlot(1));
+    additionLayer->GetOutputSlot(0).Connect(outputLayer->GetInputSlot(0));
+
+    graph.AddCopyLayers();
+
+    std::vector<Edge> edges = GetEdgeList(graph);
+    BOOST_CHECK(edges.size() == 7u);
+    std::sort(edges.begin(), edges.end());
+    auto last = std::unique(edges.begin(), edges.end());
+    BOOST_CHECK_MESSAGE(last == edges.end(), "Found duplicated edges after AddCopyLayers()");
+}
+
+BOOST_AUTO_TEST_CASE(DuplicateLayerNames)
+{
+    armnn::Graph graph;
+
+    armnn::InputLayer* const inputLayer = graph.AddLayer<armnn::InputLayer>(0, "layer");
+    inputLayer->SetComputeDevice(armnn::Compute::CpuRef);
+
+    armnn::OutputLayer* const outputLayer = graph.AddLayer<armnn::OutputLayer>(0, "layer");
+    outputLayer->SetComputeDevice(armnn::Compute::CpuRef);
+
+    inputLayer->GetOutputSlot(0).Connect(outputLayer->GetInputSlot(0));
+
+    auto it = graph.TopologicalSort().begin();
+    BOOST_TEST(((*it)->GetType() == armnn::LayerType::Input));
+    BOOST_TEST(((*std::next(it))->GetType() == armnn::LayerType::Output));
+}
+
+BOOST_AUTO_TEST_SUITE_END()
diff --git a/src/armnn/test/GraphUtils.hpp b/src/armnn/test/GraphUtils.hpp
new file mode 100644
index 0000000000..3ff7d2f67b
--- /dev/null
+++ b/src/armnn/test/GraphUtils.hpp
@@ -0,0 +1,24 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// See LICENSE file in the project root for full license information.
+//
+#pragma once
+
+#include "Graph.hpp"
+#include <string>
+
+namespace
+{
+
+bool GraphHasNamedLayer(const armnn::Graph& graph, const std::string& name)
+{
+    for (auto&& layer : graph)
+    {
+        if (layer->GetName() == name)
+        {
+            return true;
+        }
+    }
+    return false;
+}
+}
+\ No newline at end of file
diff --git a/src/armnn/test/Network_test.cpp b/src/armnn/test/Network_test.cpp
new file mode 100644
index 0000000000..523d47b169
--- /dev/null
+++ b/src/armnn/test/Network_test.cpp
@@ -0,0 +1,425 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// See LICENSE file in the project root for full license information.
+//
+#include <boost/test/unit_test.hpp>
+
+#include "armnn/ArmNN.hpp"
+#include "Network.hpp"
+#include "Graph.hpp"
+#include "backends/RefWorkloadFactory.hpp"
+
+#include "GraphUtils.hpp"
+
+namespace
+{
+
+bool AreAllLayerInputSlotsConnected(const armnn::IConnectableLayer& layer)
+{
+    bool allConnected = true;
+    for (unsigned int i = 0; i < layer.GetNumInputSlots(); ++i)
+    {
+        const bool inputConnected = layer.GetInputSlot(i).GetConnection() != nullptr;
+        allConnected &= inputConnected;
+    }
+    return allConnected;
+}
+
+}
+
+BOOST_AUTO_TEST_SUITE(Network)
+
+BOOST_AUTO_TEST_CASE(NetworkBasic)
+{
+    armnn::Network net;
+    BOOST_TEST(net.PrintGraph() == armnn::Status::Success);
+}
+
+BOOST_AUTO_TEST_CASE(LayerNamesAreOptionalForINetwork)
+{
+    armnn::Network net;
+    armnn::INetwork& inet = net;
+    inet.AddInputLayer(0);
+    inet.AddAdditionLayer();
+    inet.AddActivationLayer(armnn::ActivationDescriptor());
+    inet.AddOutputLayer(0);
+}
+
+BOOST_AUTO_TEST_CASE(LayerNamesAreOptionalForNetwork)
+{
+    armnn::Network net;
+    net.AddInputLayer(0);
+    net.AddAdditionLayer();
+    net.AddActivationLayer(armnn::ActivationDescriptor());
+    net.AddOutputLayer(0);
+}
+
+BOOST_AUTO_TEST_CASE(NetworkModification)
+{
+    armnn::Network net;
+
+    armnn::IConnectableLayer* const inputLayer = net.AddInputLayer(0, "input layer");
+    BOOST_TEST(inputLayer);
+
+    unsigned int dims[] = { 10,1,1,1 };
+    std::vector<float> convWeightsData(10);
+    armnn::ConstTensor weights(armnn::TensorInfo(4, dims, armnn::DataType::Float32), convWeightsData);
+
+    armnn::Convolution2dDescriptor convDesc2d;
+    armnn::IConnectableLayer* const convLayer = net.AddConvolution2dLayer(convDesc2d, weights, "conv layer");
+    BOOST_TEST(convLayer);
+
+    inputLayer->GetOutputSlot(0).Connect(convLayer->GetInputSlot(0));
+
+    armnn::FullyConnectedDescriptor fullyConnectedDesc;
+    armnn::IConnectableLayer* const fullyConnectedLayer = net.AddFullyConnectedLayer(fullyConnectedDesc,
+                                                                                     weights,
+                                                                                     "fully connected");
+    BOOST_TEST(fullyConnectedLayer);
+
+    convLayer->GetOutputSlot(0).Connect(fullyConnectedLayer->GetInputSlot(0));
+
+    armnn::Pooling2dDescriptor pooling2dDesc;
+    armnn::IConnectableLayer* const poolingLayer = net.AddPooling2dLayer(pooling2dDesc, "pooling2d");
+    BOOST_TEST(poolingLayer);
+
+    fullyConnectedLayer->GetOutputSlot(0).Connect(poolingLayer->GetInputSlot(0));
+
+    armnn::ActivationDescriptor activationDesc;
+    armnn::IConnectableLayer* const activationLayer = net.AddActivationLayer(activationDesc, "activation");
+    BOOST_TEST(activationLayer);
+
+    poolingLayer->GetOutputSlot(0).Connect(activationLayer->GetInputSlot(0));
+
+    armnn::NormalizationDescriptor normalizationDesc;
+    armnn::IConnectableLayer* const normalizationLayer = net.AddNormalizationLayer(normalizationDesc, "normalization");
+    BOOST_TEST(normalizationLayer);
+
+    activationLayer->GetOutputSlot(0).Connect(normalizationLayer->GetInputSlot(0));
+
+    armnn::SoftmaxDescriptor softmaxDesc;
+    armnn::IConnectableLayer* const softmaxLayer = net.AddSoftmaxLayer(softmaxDesc, "softmax");
+    BOOST_TEST(softmaxLayer);
+
+    normalizationLayer->GetOutputSlot(0).Connect(softmaxLayer->GetInputSlot(0));
+
+    armnn::BatchNormalizationDescriptor batchNormDesc;
+
+    armnn::TensorInfo tensorInfo({ 1 }, armnn::DataType::Float32);
+    std::vector<float> data(tensorInfo.GetNumBytes() / sizeof(float));
+    armnn::ConstTensor invalidTensor(tensorInfo, data);
+
+    armnn::IConnectableLayer* const batchNormalizationLayer = net.AddBatchNormalizationLayer(batchNormDesc,
+        invalidTensor,
+        invalidTensor,
+        invalidTensor,
+        invalidTensor,
+        "batch norm");
+    BOOST_TEST(batchNormalizationLayer);
+
+    softmaxLayer->GetOutputSlot(0).Connect(batchNormalizationLayer->GetInputSlot(0));
+
+    armnn::IConnectableLayer* const additionLayer = net.AddAdditionLayer("addition");
+    BOOST_TEST(additionLayer);
+
+    batchNormalizationLayer->GetOutputSlot(0).Connect(additionLayer->GetInputSlot(0));
+    batchNormalizationLayer->GetOutputSlot(0).Connect(additionLayer->GetInputSlot(1));
+
+    armnn::IConnectableLayer* const multiplicationLayer = net.AddMultiplicationLayer("multiplication");
+    BOOST_TEST(multiplicationLayer);
+
+    additionLayer->GetOutputSlot(0).Connect(multiplicationLayer->GetInputSlot(0));
+    additionLayer->GetOutputSlot(0).Connect(multiplicationLayer->GetInputSlot(1));
+
+    armnn::IConnectableLayer* const outputLayer = net.AddOutputLayer(0, "output layer");
+    BOOST_TEST(outputLayer);
+
+    multiplicationLayer->GetOutputSlot(0).Connect(outputLayer->GetInputSlot(0));
+
+    //Test that all layers are present in the graph
+    BOOST_TEST(net.GetGraph().GetNumLayers() == 11);
+
+    //Test that the vertices exist and have correct names
+    BOOST_TEST(GraphHasNamedLayer(net.GetGraph(), "input layer"));
+    BOOST_TEST(GraphHasNamedLayer(net.GetGraph(), "conv layer"));
+    BOOST_TEST(GraphHasNamedLayer(net.GetGraph(), "fully connected"));
+    BOOST_TEST(GraphHasNamedLayer(net.GetGraph(), "pooling2d"));
+    BOOST_TEST(GraphHasNamedLayer(net.GetGraph(), "activation"));
+    BOOST_TEST(GraphHasNamedLayer(net.GetGraph(), "normalization"));
+    BOOST_TEST(GraphHasNamedLayer(net.GetGraph(), "softmax"));
+    BOOST_TEST(GraphHasNamedLayer(net.GetGraph(), "batch norm"));
+    BOOST_TEST(GraphHasNamedLayer(net.GetGraph(), "addition"));
+    BOOST_TEST(GraphHasNamedLayer(net.GetGraph(), "multiplication"));
+    BOOST_TEST(GraphHasNamedLayer(net.GetGraph(), "output layer"));
+
+    auto checkOneOutputToOneInputConnection = []
+        (const armnn::IConnectableLayer* const srcLayer,
+         const armnn::IConnectableLayer* const tgtLayer,
+         int expectedSrcNumInputs = 1,
+         int expectedDstNumOutputs = 1)
+        {
+            BOOST_TEST(srcLayer->GetNumInputSlots() == expectedSrcNumInputs);
+            BOOST_TEST(srcLayer->GetNumOutputSlots() == 1);
+            BOOST_TEST(tgtLayer->GetNumInputSlots() == 1);
+            BOOST_TEST(tgtLayer->GetNumOutputSlots() == expectedDstNumOutputs);
+
+            BOOST_TEST(srcLayer->GetOutputSlot(0).GetNumConnections() == 1);
+            BOOST_TEST(srcLayer->GetOutputSlot(0).GetConnection(0) == &tgtLayer->GetInputSlot(0));
+            BOOST_TEST(&srcLayer->GetOutputSlot(0) == tgtLayer->GetInputSlot(0).GetConnection());
+        };
+    auto checkOneOutputToTwoInputsConnections = []
+        (const armnn::IConnectableLayer* const srcLayer,
+         const armnn::IConnectableLayer* const tgtLayer,
+         int expectedSrcNumInputs,
+         int expectedDstNumOutputs = 1)
+        {
+            BOOST_TEST(srcLayer->GetNumInputSlots() == expectedSrcNumInputs);
+            BOOST_TEST(srcLayer->GetNumOutputSlots() == 1);
+            BOOST_TEST(tgtLayer->GetNumInputSlots() == 2);
+            BOOST_TEST(tgtLayer->GetNumOutputSlots() == expectedDstNumOutputs);
+
+            BOOST_TEST(srcLayer->GetOutputSlot(0).GetNumConnections() == 2);
+            for (unsigned int i = 0; i < srcLayer->GetOutputSlot(0).GetNumConnections(); ++i)
+            {
+                BOOST_TEST(srcLayer->GetOutputSlot(0).GetConnection(i) == &tgtLayer->GetInputSlot(i));
+                BOOST_TEST(&srcLayer->GetOutputSlot(0) == tgtLayer->GetInputSlot(i).GetConnection());
+            }
+        };
+
+    BOOST_TEST(AreAllLayerInputSlotsConnected(*convLayer));
+    BOOST_TEST(AreAllLayerInputSlotsConnected(*fullyConnectedLayer));
+    BOOST_TEST(AreAllLayerInputSlotsConnected(*poolingLayer));
+    BOOST_TEST(AreAllLayerInputSlotsConnected(*activationLayer));
+    BOOST_TEST(AreAllLayerInputSlotsConnected(*normalizationLayer));
+    BOOST_TEST(AreAllLayerInputSlotsConnected(*softmaxLayer));
+    BOOST_TEST(AreAllLayerInputSlotsConnected(*batchNormalizationLayer));
+    BOOST_TEST(AreAllLayerInputSlotsConnected(*additionLayer));
+    BOOST_TEST(AreAllLayerInputSlotsConnected(*multiplicationLayer));
+    BOOST_TEST(AreAllLayerInputSlotsConnected(*outputLayer));
+
+    // Check connectivity
+    checkOneOutputToOneInputConnection(inputLayer, convLayer, 0);
+    checkOneOutputToOneInputConnection(convLayer, fullyConnectedLayer);
+    checkOneOutputToOneInputConnection(fullyConnectedLayer, poolingLayer);
+    checkOneOutputToOneInputConnection(poolingLayer, activationLayer);
+    checkOneOutputToOneInputConnection(activationLayer, normalizationLayer);
+    checkOneOutputToOneInputConnection(normalizationLayer, softmaxLayer);
+    checkOneOutputToOneInputConnection(softmaxLayer, batchNormalizationLayer);
+    checkOneOutputToTwoInputsConnections(batchNormalizationLayer, additionLayer, 1);
+    checkOneOutputToTwoInputsConnections(additionLayer, multiplicationLayer, 2);
+    checkOneOutputToOneInputConnection(multiplicationLayer, outputLayer, 2, 0);
+}
+
+BOOST_AUTO_TEST_CASE(NetworkModification_SplitterMerger)
+{
+    armnn::Network net;
+
+    // Add an input layer and an input tensor descriptor.
+    armnn::IConnectableLayer* inputLayer = net.AddInputLayer(0, "input layer");
+    BOOST_TEST(inputLayer);
+
+    // Add a splitter layer
+    armnn::ViewsDescriptor splitterDesc(2,4);
+
+    armnn::IConnectableLayer* splitterLayer = net.AddSplitterLayer(splitterDesc, "splitter layer");
+    BOOST_TEST(splitterLayer);
+
+    inputLayer->GetOutputSlot(0).Connect(splitterLayer->GetInputSlot(0));
+
+    // Add a softmax layer 1
+    armnn::SoftmaxDescriptor softmaxDescriptor;
+    armnn::IConnectableLayer* softmaxLayer1 = net.AddSoftmaxLayer(softmaxDescriptor, "softmax_1");
+    BOOST_TEST(softmaxLayer1);
+
+    splitterLayer->GetOutputSlot(0).Connect(softmaxLayer1->GetInputSlot(0));
+
+    // Add a softmax layer 2
+    armnn::IConnectableLayer* softmaxLayer2 = net.AddSoftmaxLayer(softmaxDescriptor, "softmax_2");
+    BOOST_TEST(softmaxLayer2);
+
+    splitterLayer->GetOutputSlot(1).Connect(softmaxLayer2->GetInputSlot(0));
+
+    // Add a merger layer
+    armnn::OriginsDescriptor mergerDesc(2, 4);
+
+    armnn::IConnectableLayer* mergerLayer = net.AddMergerLayer(mergerDesc, "merger layer");
+    BOOST_TEST(mergerLayer);
+
+    softmaxLayer1->GetOutputSlot(0).Connect(mergerLayer->GetInputSlot(0));
+    softmaxLayer2->GetOutputSlot(0).Connect(mergerLayer->GetInputSlot(1));
+
+    // Add an output layer
+    armnn::IConnectableLayer* outputLayer = net.AddOutputLayer(0, "output layer");
+    BOOST_TEST(outputLayer);
+
+    mergerLayer->GetOutputSlot(0).Connect(outputLayer->GetInputSlot(0));
+
+    BOOST_TEST(splitterLayer->GetNumOutputSlots() == 2);
+    BOOST_TEST(splitterLayer->GetOutputSlot(0).GetConnection(0) == &softmaxLayer1->GetInputSlot(0));
+    BOOST_TEST(&splitterLayer->GetOutputSlot(0) == softmaxLayer1->GetInputSlot(0).GetConnection());
+    BOOST_TEST(splitterLayer->GetOutputSlot(1).GetConnection(0) == &softmaxLayer2->GetInputSlot(0));
+    BOOST_TEST(&splitterLayer->GetOutputSlot(1) == softmaxLayer2->GetInputSlot(0).GetConnection());
+
+    BOOST_TEST(mergerLayer->GetNumInputSlots() == 2);
+    BOOST_TEST(softmaxLayer1->GetOutputSlot(0).GetConnection(0) == &mergerLayer->GetInputSlot(0));
+    BOOST_TEST(&softmaxLayer1->GetOutputSlot(0) == mergerLayer->GetInputSlot(0).GetConnection());
+    BOOST_TEST(softmaxLayer2->GetOutputSlot(0).GetConnection(0) == &mergerLayer->GetInputSlot(1));
+    BOOST_TEST(&softmaxLayer2->GetOutputSlot(0) == mergerLayer->GetInputSlot(1).GetConnection());
+}
+
+BOOST_AUTO_TEST_CASE(NetworkModification_SplitterAddition)
+{
+    armnn::Network net;
+
+    // Add an input layer and an input tensor descriptor.
+    armnn::IConnectableLayer* layer = net.AddInputLayer(0, "input layer");
+    BOOST_TEST(layer);
+
+    // Add a splitter layer
+    armnn::ViewsDescriptor splitterDesc(2,4);
+
+    armnn::IConnectableLayer* const splitterLayer = net.AddSplitterLayer(splitterDesc, "splitter layer");
+    BOOST_TEST(splitterLayer);
+
+    layer->GetOutputSlot(0).Connect(splitterLayer->GetInputSlot(0));
+
+    // Add a softmax layer 1
+    armnn::SoftmaxDescriptor softmaxDescriptor;
+    armnn::IConnectableLayer* const softmax1Layer = net.AddSoftmaxLayer(softmaxDescriptor, "softmax_1");
+    BOOST_TEST(softmax1Layer);
+
+    splitterLayer->GetOutputSlot(0).Connect(softmax1Layer->GetInputSlot(0));
+
+    // Add a softmax layer 2
+    armnn::IConnectableLayer* const softmax2Layer = net.AddSoftmaxLayer(softmaxDescriptor, "softmax_2");
+    BOOST_TEST(softmax2Layer);
+
+    splitterLayer->GetOutputSlot(1).Connect(softmax2Layer->GetInputSlot(0));
+
+    // Add addition layer
+    layer = net.AddAdditionLayer("add layer");
+    BOOST_TEST(layer);
+
+    softmax1Layer->GetOutputSlot(0).Connect(layer->GetInputSlot(0));
+    softmax2Layer->GetOutputSlot(0).Connect(layer->GetInputSlot(1));
+
+    // Add an output layer
+    armnn::IConnectableLayer* prevLayer = layer;
+    layer = net.AddOutputLayer(0, "output layer");
+
+    prevLayer->GetOutputSlot(0).Connect(layer->GetInputSlot(0));
+
+    BOOST_TEST(layer);
+}
+
+BOOST_AUTO_TEST_CASE(NetworkModification_SplitterMultiplication)
+{
+    armnn::Network net;
+
+    // Add an input layer and an input tensor descriptor.
+    armnn::IConnectableLayer* layer = net.AddInputLayer(0, "input layer");
+    BOOST_TEST(layer);
+
+    // Add a splitter layer
+    armnn::ViewsDescriptor splitterDesc(2,4);
+    armnn::IConnectableLayer* const splitterLayer = net.AddSplitterLayer(splitterDesc, "splitter layer");
+    BOOST_TEST(splitterLayer);
+
+    layer->GetOutputSlot(0).Connect(splitterLayer->GetInputSlot(0));
+
+    // Add a softmax layer 1
+    armnn::SoftmaxDescriptor softmaxDescriptor;
+    armnn::IConnectableLayer* const softmax1Layer = net.AddSoftmaxLayer(softmaxDescriptor, "softmax_1");
+    BOOST_TEST(softmax1Layer);
+
+    splitterLayer->GetOutputSlot(0).Connect(softmax1Layer->GetInputSlot(0));
+
+    // Add a softmax layer 2
+    armnn::IConnectableLayer* const softmax2Layer = net.AddSoftmaxLayer(softmaxDescriptor, "softmax_2");
+    BOOST_TEST(softmax2Layer);
+
+    splitterLayer->GetOutputSlot(1).Connect(softmax2Layer->GetInputSlot(0));
+
+    // Add multiplication layer
+    layer = net.AddMultiplicationLayer("multiplication layer");
+    BOOST_TEST(layer);
+
+    softmax1Layer->GetOutputSlot(0).Connect(layer->GetInputSlot(0));
+    softmax2Layer->GetOutputSlot(0).Connect(layer->GetInputSlot(1));
+
+    // Add an output layer
+    armnn::IConnectableLayer* prevLayer = layer;
+    layer = net.AddOutputLayer(0, "output layer");
+    BOOST_TEST(layer);
+
+    prevLayer->GetOutputSlot(0).Connect(layer->GetInputSlot(0));
+}
+
+BOOST_AUTO_TEST_CASE(ValidateWorkloads)
+{
+    const armnn::TensorInfo desc({3, 5}, armnn::DataType::Float32);
+
+    armnn::Network  net;
+
+    armnn::NormalizationDescriptor nmDesc;
+    armnn::ActivationDescriptor acDesc;
+
+    //    in
+    //     |
+    //    nm
+    //   /  |
+    //  ac  |
+    //   \  |
+    //    ml
+    //     |
+    //    sm
+    //     |
+    //    ot
+    armnn::IConnectableLayer* layer = net.AddInputLayer(0, "in");
+    layer->GetOutputSlot(0).SetTensorInfo(desc);
+
+    armnn::IConnectableLayer* const normLayer = net.AddNormalizationLayer(nmDesc, "nm");
+
+    layer->GetOutputSlot(0).Connect(normLayer->GetInputSlot(0));
+    normLayer->GetOutputSlot(0).SetTensorInfo(desc);
+
+    layer = net.AddActivationLayer(acDesc, "ac");
+
+    normLayer->GetOutputSlot(0).Connect(layer->GetInputSlot(0));
+    layer->GetOutputSlot(0).SetTensorInfo(desc);
+
+    armnn::IConnectableLayer* prevLayer = layer;
+    layer = net.AddMultiplicationLayer("ml");
+
+    prevLayer->GetOutputSlot(0).Connect(layer->GetInputSlot(0));
+    normLayer->GetOutputSlot(0).Connect(layer->GetInputSlot(1));
+    layer->GetOutputSlot(0).SetTensorInfo(desc);
+
+    prevLayer = layer;
+    armnn::SoftmaxDescriptor softmaxDescriptor;
+    layer = net.AddSoftmaxLayer(softmaxDescriptor, "sm");
+
+    prevLayer->GetOutputSlot(0).Connect(layer->GetInputSlot(0));
+    layer->GetOutputSlot(0).SetTensorInfo(desc);
+
+    prevLayer = layer;
+    layer = net.AddOutputLayer(0, "ot");
+
+    prevLayer->GetOutputSlot(0).Connect(layer->GetInputSlot(0));
+
+    armnn::DeviceSpec spec;
+    spec.DefaultComputeDevice = armnn::Compute::CpuRef;
+
+    armnn::IOptimizedNetworkPtr optNet = Optimize(net, spec);
+    static_cast<armnn::OptimizedNetwork*>(optNet.get())->GetGraph().AllocateDynamicBuffers();
+
+    // validate workloads
+    armnn::RefWorkloadFactory fact;
+    for (auto&& layer : static_cast<armnn::OptimizedNetwork*>(optNet.get())->GetGraph())
+    {
+        BOOST_CHECK_NO_THROW(
+            layer->CreateWorkload(static_cast<armnn::OptimizedNetwork*>(optNet.get())->GetGraph(), fact));
+    }
+}
+
+BOOST_AUTO_TEST_SUITE_END()
diff --git a/src/armnn/test/RuntimeTests.cpp b/src/armnn/test/RuntimeTests.cpp
new file mode 100644
index 0000000000..117df5e55a
--- /dev/null
+++ b/src/armnn/test/RuntimeTests.cpp
@@ -0,0 +1,190 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// See LICENSE file in the project root for full license information.
+//
+#include <boost/test/unit_test.hpp>
+
+#include "armnn/TypesUtils.hpp"
+
+#include "armnn/IRuntime.hpp"
+#include "armnn/INetwork.hpp"
+#include "armnn/Descriptors.hpp"
+#include "Runtime.hpp"
+
+#ifdef WITH_VALGRIND
+#include "valgrind/memcheck.h"
+#endif
+
+#include <boost/core/ignore_unused.hpp>
+
+namespace armnn
+{
+
+void RuntimeLoadedNetworksReserve(armnn::Runtime* runtime)
+{
+    runtime->m_LoadedNetworks.reserve(1);
+}
+
+}
+
+BOOST_AUTO_TEST_SUITE(Runtime)
+
+BOOST_AUTO_TEST_CASE(RuntimeUnloadNetwork)
+{
+    // build 2 mock-networks and load them into the runtime
+    armnn::IRuntimePtr runtime(armnn::IRuntime::Create(armnn::Compute::CpuRef));
+
+    // mock network 1
+    armnn::NetworkId networkIdentifier1 = 1;
+    armnn::INetworkPtr mockNetwork1(armnn::INetwork::Create());
+    mockNetwork1->AddInputLayer(0, "test layer");
+    runtime->LoadNetwork(networkIdentifier1, Optimize(*mockNetwork1, runtime->GetDeviceSpec()));
+
+    // mock network 2
+    armnn::NetworkId networkIdentifier2 = 2;
+    armnn::INetworkPtr mockNetwork2(armnn::INetwork::Create());
+    mockNetwork2->AddInputLayer(0, "test layer");
+    runtime->LoadNetwork(networkIdentifier2, Optimize(*mockNetwork2, runtime->GetDeviceSpec()));
+
+    // unload one by its networkID
+    BOOST_TEST(runtime->UnloadNetwork(networkIdentifier1) == armnn::Status::Success);
+
+    BOOST_TEST(runtime->UnloadNetwork(networkIdentifier1) == armnn::Status::Failure);
+}
+
+#if defined(ARMCOMPUTECL_ENABLED) && defined(WITH_VALGRIND)
+BOOST_AUTO_TEST_CASE(RuntimeMemoryUsage)
+{
+    // From documentation:
+
+    // This means that no pointer to the block can be found. The block is classified as "lost",
+    // because the programmer could not possibly have freed it at program exit, since no pointer to it exists.
+    unsigned long leakedBefore = 0;
+    unsigned long leakedAfter = 0;
+
+    // A start-pointer or chain of start-pointers to the block is found. Since the block is still pointed at,
+    // the programmer could, at least in principle, have freed it before program exit.
+    // We want to test this in case memory is not freed as early as it could have been
+    unsigned long reachableBefore = 0;
+    unsigned long reachableAfter = 0;
+
+    // needed as out params but we don't test them
+    unsigned long dubious = 0;
+    unsigned long suppressed = 0;
+
+    // ensure that runtime is large enough before checking for memory leaks
+    // otherwise when loading the network it will automatically reserve memory that won't be released until destruction
+    armnn::NetworkId networkIdentifier;
+    armnn::Runtime runtime(armnn::Compute::GpuAcc);
+    armnn::RuntimeLoadedNetworksReserve(&runtime);
+
+    // check for leaks before we load the network and record them so that we can see the delta after unloading
+    VALGRIND_DO_QUICK_LEAK_CHECK;
+    VALGRIND_COUNT_LEAKS(leakedBefore, dubious, reachableBefore, suppressed);
+
+    // build a mock-network and load it into the runtime
+    {
+        armnn::TensorInfo inputTensorInfo(armnn::TensorShape({ 7, 7 }), armnn::DataType::Float32);
+        armnn::TensorInfo outputTensorInfo(armnn::TensorShape({ 7, 7 }), armnn::DataType::Float32);
+
+        armnn::INetworkPtr mockNetwork(armnn::INetwork::Create());
+
+        armnn::IConnectableLayer* input = mockNetwork->AddInputLayer(0, "input");
+        armnn::IConnectableLayer* layer = mockNetwork->AddActivationLayer(armnn::ActivationDescriptor(), "test");
+        armnn::IConnectableLayer* output = mockNetwork->AddOutputLayer(0, "output");
+
+        input->GetOutputSlot(0).Connect(layer->GetInputSlot(0));
+        layer->GetOutputSlot(0).Connect(output->GetInputSlot(0));
+
+        // set the tensors in the network
+        input->GetOutputSlot(0).SetTensorInfo(inputTensorInfo);
+        layer->GetOutputSlot(0).SetTensorInfo(outputTensorInfo);
+
+        // optimize the network
+        armnn::IOptimizedNetworkPtr optNet = Optimize(*mockNetwork, runtime.GetDeviceSpec());
+
+        runtime.LoadNetwork(networkIdentifier, std::move(optNet));
+    }
+
+    runtime.UnloadNetwork(networkIdentifier);
+
+    VALGRIND_DO_ADDED_LEAK_CHECK;
+    VALGRIND_COUNT_LEAKS(leakedAfter, dubious, reachableAfter, suppressed);
+
+    // if we're not running under Valgrind, these vars will have been initialised to 0, so this will always pass
+    BOOST_TEST(leakedBefore == leakedAfter);
+
+    // Add resonable threshold after and before running valgrind with the ACL clear cache function.
+    BOOST_TEST(reachableAfter - reachableBefore < 30000);
+
+    // these are needed because VALGRIND_COUNT_LEAKS is a macro that assigns to the parameters
+    // so they are assigned to, but still considered unused, causing a warning
+    boost::ignore_unused(dubious);
+    boost::ignore_unused(suppressed);
+}
+#endif
+
+#ifdef WITH_VALGRIND
+// run with the following command to get all the amazing output (in the devenv/build folder) :)
+// valgrind --leak-check=full --show-leak-kinds=all --log-file=Valgrind_Memcheck_Leak_Report.txt armnn/test/UnitTests
+BOOST_AUTO_TEST_CASE(RuntimeMemoryLeak)
+{
+    // From documentation:
+
+    // This means that no pointer to the block can be found. The block is classified as "lost",
+    // because the programmer could not possibly have freed it at program exit, since no pointer to it exists.
+    unsigned long leakedBefore = 0;
+    unsigned long leakedAfter = 0;
+
+    // A start-pointer or chain of start-pointers to the block is found. Since the block is still pointed at,
+    // the programmer could, at least in principle, have freed it before program exit.
+    // We want to test this in case memory is not freed as early as it could have been
+    unsigned long reachableBefore = 0;
+    unsigned long reachableAfter = 0;
+
+    // needed as out params but we don't test them
+    unsigned long dubious = 0;
+    unsigned long suppressed = 0;
+
+    armnn::NetworkId networkIdentifier1 = 1;
+
+    // ensure that runtime is large enough before checking for memory leaks
+    // otherwise when loading the network it will automatically reserve memory that won't be released until destruction
+    armnn::Runtime runtime(armnn::Compute::CpuRef);
+    armnn::RuntimeLoadedNetworksReserve(&runtime);
+
+    // check for leaks before we load the network and record them so that we can see the delta after unloading
+    VALGRIND_DO_QUICK_LEAK_CHECK;
+    VALGRIND_COUNT_LEAKS(leakedBefore, dubious, reachableBefore, suppressed);
+
+    // build a mock-network and load it into the runtime
+    {
+        unsigned int inputShape[] = {1, 7, 1, 1};
+        armnn::TensorInfo inputTensorInfo(4, inputShape, armnn::DataType::Float32);
+
+        std::unique_ptr<armnn::Network> mockNetwork1 = std::make_unique<armnn::Network>();
+        mockNetwork1->AddInputLayer(0, "test layer");
+
+        armnn::DeviceSpec device;
+        device.DefaultComputeDevice = armnn::Compute::CpuRef;
+
+        runtime.LoadNetwork(networkIdentifier1, Optimize(*mockNetwork1, device));
+    }
+
+    runtime.UnloadNetwork(networkIdentifier1);
+
+    VALGRIND_DO_ADDED_LEAK_CHECK;
+    VALGRIND_COUNT_LEAKS(leakedAfter, dubious, reachableAfter, suppressed);
+
+    // if we're not running under Valgrind, these vars will have been initialised to 0, so this will always pass
+    BOOST_TEST(leakedBefore == leakedAfter);
+    BOOST_TEST(reachableBefore == reachableAfter);
+
+    // these are needed because VALGRIND_COUNT_LEAKS is a macro that assigns to the parameters
+    // so they are assigned to, but still considered unused, causing a warning
+    boost::ignore_unused(dubious);
+    boost::ignore_unused(suppressed);
+}
+#endif
+
+BOOST_AUTO_TEST_SUITE_END()
diff --git a/src/armnn/test/TensorHelpers.hpp b/src/armnn/test/TensorHelpers.hpp
new file mode 100644
index 0000000000..e4ff899a4e
--- /dev/null
+++ b/src/armnn/test/TensorHelpers.hpp
@@ -0,0 +1,201 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// See LICENSE file in the project root for full license information.
+//
+#pragma once
+
+#include <armnn/TensorFwd.hpp>
+#include <boost/test/unit_test.hpp>
+#include <boost/multi_array.hpp>
+#include <vector>
+#include <array>
+
+#include <boost/assert.hpp>
+#include <boost/test/tools/floating_point_comparison.hpp>
+#include <boost/random/uniform_real_distribution.hpp>
+#include <boost/random/mersenne_twister.hpp>
+#include <boost/numeric/conversion/cast.hpp>
+
+#include "armnn/Tensor.hpp"
+
+#include "backends/test/QuantizeHelper.hpp"
+
+#include <cmath>
+
+constexpr float g_FloatCloseToZeroTolerance = 1.0e-7f;
+
+template<typename T, bool isQuantized = true>
+struct SelectiveComparer
+{
+    static bool Compare(T a, T b)
+    {
+        return (std::max(a, b) - std::min(a, b)) <= 1;
+    }
+
+};
+
+template<typename T>
+struct SelectiveComparer<T, false>
+{
+    static bool Compare(T a, T b)
+    {
+        // if a or b is zero, percent_tolerance does an exact match, so compare to a small, constant tolerance instead
+        if (a == 0.0f || b == 0.0f)
+        {
+            return std::abs(a - b) <= g_FloatCloseToZeroTolerance;
+        }
+        // For unquantized floats we use a tolerance of 1%.
+        boost::math::fpc::close_at_tolerance<float> comparer(boost::math::fpc::percent_tolerance(1.0f));
+        return comparer(a, b);
+    }
+};
+
+template<typename T>
+bool SelectiveCompare(T a, T b)
+{
+    return SelectiveComparer<T, armnn::IsQuantizedType<T>()>::Compare(a, b);
+};
+
+
+
+template <typename T, std::size_t n>
+boost::test_tools::predicate_result CompareTensors(const boost::multi_array<T, n>& a,
+                                                   const boost::multi_array<T, n>& b)
+{
+    // check they are same shape
+    for (unsigned int i=0; i<n; i++)
+    {
+        if (a.shape()[i] != b.shape()[i])
+        {
+            boost::test_tools::predicate_result res(false);
+            res.message() << "Different shapes ["
+                        << a.shape()[i]
+                        << "!="
+                        << b.shape()[i]
+                        << "]";
+            return res;
+        }
+    }
+
+    // now compare element-wise
+
+    // fun iteration over n dimensions
+    std::array<unsigned int, n> indices;
+    for (unsigned int i = 0; i < n; i++)
+    {
+        indices[i] = 0;
+    }
+
+    std::stringstream errorString;
+    int numFailedElements = 0;
+    constexpr int maxReportedDifferences = 3;
+
+    while (true)
+    {
+        bool comparison = SelectiveCompare(a(indices), b(indices));
+        if (!comparison)
+        {
+            ++numFailedElements;
+
+            if (numFailedElements <= maxReportedDifferences)
+            {
+                if (numFailedElements >= 2)
+                {
+                    errorString << ", ";
+                }
+                errorString << "[";
+                for (unsigned int i = 0; i < n; ++i)
+                {
+                    errorString << indices[i];
+                    if (i != n - 1)
+                    {
+                        errorString << ",";
+                    }
+                }
+                errorString << "]";
+
+                errorString << " (" << +a(indices) << " != " << +b(indices) << ")";
+            }
+        }
+
+        ++indices[n - 1];
+        for (unsigned int i=n-1; i>0; i--)
+        {
+            if (indices[i] == a.shape()[i])
+            {
+                indices[i] = 0;
+                ++indices[i - 1];
+            }
+        }
+
+        if (indices[0] == a.shape()[0])
+        {
+            break;
+        }
+    }
+
+    boost::test_tools::predicate_result comparisonResult(true);
+    if (numFailedElements > 0)
+    {
+        comparisonResult = false;
+        comparisonResult.message() << numFailedElements << " different values at: ";
+        if (numFailedElements > maxReportedDifferences)
+        {
+            errorString << ", ... (and " << (numFailedElements - maxReportedDifferences) << " other differences)";
+        }
+        comparisonResult.message() << errorString.str();
+    }
+
+    return comparisonResult;
+}
+
+
+// Creates a boost::multi_array with shape defined by the given TensorInfo.
+template <typename T, std::size_t n>
+boost::multi_array<T, n> MakeTensor(const armnn::TensorInfo& tensorInfo)
+{
+    std::array<unsigned int, n> shape;
+
+    for (unsigned int i = 0; i < n; i++)
+    {
+        shape[i] = tensorInfo.GetShape()[i];
+    }
+
+    return boost::multi_array<T, n>(shape);
+}
+
+// Creates a boost::multi_array with shape defined by the given TensorInfo and contents defined by the given vector.
+template <typename T, std::size_t n>
+boost::multi_array<T, n> MakeTensor(const armnn::TensorInfo& tensorInfo, const std::vector<T>& flat)
+{
+    BOOST_ASSERT_MSG(flat.size() == tensorInfo.GetNumElements(), "Wrong number of components supplied to tensor");
+
+    std::array<unsigned int, n> shape;
+
+    for (unsigned int i = 0; i < n; i++)
+    {
+        shape[i] = tensorInfo.GetShape()[i];
+    }
+
+    boost::const_multi_array_ref<T, n> arrayRef(&flat[0], shape);
+    return boost::multi_array<T, n>(arrayRef);
+}
+
+template <typename T, std::size_t n>
+boost::multi_array<T, n> MakeRandomTensor(const armnn::TensorInfo& tensorInfo,
+                                          unsigned int seed,
+                                          float        min = -10.0f,
+                                          float        max = 10.0f)
+{
+    boost::random::mt19937                          gen(seed);
+    boost::random::uniform_real_distribution<float> dist(min, max);
+
+    std::vector<float> init(tensorInfo.GetNumElements());
+    for (unsigned int i = 0; i < init.size(); i++)
+    {
+        init[i] = dist(gen);
+    }
+    float qScale = tensorInfo.GetQuantizationScale();
+    int32_t qOffset = tensorInfo.GetQuantizationOffset();
+    return MakeTensor<T, n>(tensorInfo, QuantizedVector<T>(qScale, qOffset, init));
+}
diff --git a/src/armnn/test/TensorTest.cpp b/src/armnn/test/TensorTest.cpp
new file mode 100644
index 0000000000..2bb37f4fb8
--- /dev/null
+++ b/src/armnn/test/TensorTest.cpp
@@ -0,0 +1,146 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// See LICENSE file in the project root for full license information.
+//
+#include <boost/test/unit_test.hpp>
+#include <armnn/Tensor.hpp>
+
+namespace armnn
+{
+
+// Add unit test framework for interpreting TensorInfo type
+std::ostream& boost_test_print_type(std::ostream& ostr, const TensorInfo& right)
+{
+    ostr << "TensorInfo[ "
+    << right.GetNumDimensions() << ","
+    << right.GetShape()[0] << ","
+    << right.GetShape()[1] << ","
+    << right.GetShape()[2] << ","
+    << right.GetShape()[3]
+    << " ]" << std::endl;
+    return ostr;
+}
+
+std::ostream& boost_test_print_type(std::ostream& ostr, const TensorShape& shape)
+{
+    ostr << "TensorShape[ "
+        << shape.GetNumDimensions() << ","
+        << shape[0] << ","
+        << shape[1] << ","
+        << shape[2] << ","
+        << shape[3]
+        << " ]" << std::endl;
+    return ostr;
+}
+
+} //namespace armnn
+using namespace armnn;
+
+BOOST_AUTO_TEST_SUITE(Tensor)
+
+struct TensorInfoFixture
+{
+    TensorInfoFixture()
+    {
+        unsigned int sizes[] = {6,7,8,9};
+        m_TensorInfo = TensorInfo(4, sizes, DataType::Float32);
+    }
+    ~TensorInfoFixture() {};
+
+    TensorInfo m_TensorInfo;
+};
+
+BOOST_FIXTURE_TEST_CASE(ConstructShapeUsingListInitialization, TensorInfoFixture)
+{
+    TensorShape listInitializedShape{ 6, 7, 8, 9 };
+    BOOST_TEST(listInitializedShape == m_TensorInfo.GetShape());
+}
+
+BOOST_FIXTURE_TEST_CASE(ConstructTensorInfo, TensorInfoFixture)
+{
+    BOOST_TEST(m_TensorInfo.GetNumDimensions() == 4);
+    BOOST_TEST(m_TensorInfo.GetShape()[0] == 6); // <= Outer most
+    BOOST_TEST(m_TensorInfo.GetShape()[1] == 7);
+    BOOST_TEST(m_TensorInfo.GetShape()[2] == 8);
+    BOOST_TEST(m_TensorInfo.GetShape()[3] == 9);     // <= Inner most
+}
+
+BOOST_FIXTURE_TEST_CASE(CopyConstructTensorInfo, TensorInfoFixture)
+{
+    TensorInfo copyConstructed(m_TensorInfo);
+    BOOST_TEST(copyConstructed.GetNumDimensions() == 4);
+    BOOST_TEST(copyConstructed.GetShape()[0] == 6);
+    BOOST_TEST(copyConstructed.GetShape()[1] == 7);
+    BOOST_TEST(copyConstructed.GetShape()[2] == 8);
+    BOOST_TEST(copyConstructed.GetShape()[3] == 9);
+}
+
+BOOST_FIXTURE_TEST_CASE(TensorInfoEquality, TensorInfoFixture)
+{
+    TensorInfo copyConstructed(m_TensorInfo);
+    BOOST_TEST(copyConstructed == m_TensorInfo);
+}
+
+BOOST_FIXTURE_TEST_CASE(TensorInfoInequality, TensorInfoFixture)
+{
+    TensorInfo other;
+    unsigned int sizes[] = {2,3,4,5};
+    other = TensorInfo(4, sizes, DataType::Float32);
+
+    BOOST_TEST(other != m_TensorInfo);
+}
+
+BOOST_FIXTURE_TEST_CASE(TensorInfoAssignmentOperator, TensorInfoFixture)
+{
+    TensorInfo copy;
+    copy = m_TensorInfo;
+    BOOST_TEST(copy == m_TensorInfo);
+}
+
+void CheckTensor(const ConstTensor& t)
+{
+    t.GetInfo();
+}
+
+BOOST_AUTO_TEST_CASE(TensorVsConstTensor)
+{
+    int mutableDatum = 2;
+    const int immutableDatum = 3;
+
+    armnn::Tensor uninitializedTensor;
+    armnn::ConstTensor uninitializedTensor2;
+
+    uninitializedTensor2 = uninitializedTensor;
+
+    armnn::Tensor t(TensorInfo(), &mutableDatum);
+    armnn::ConstTensor ct(TensorInfo(), &immutableDatum);
+
+    // Check that both Tensor and ConstTensor can be passed as a ConstTensor
+    CheckTensor(t);
+    CheckTensor(ct);
+}
+
+BOOST_AUTO_TEST_CASE(ModifyTensorInfo)
+{
+    TensorInfo info;
+    info.SetShape({ 5, 6, 7, 8 });
+    BOOST_TEST((info.GetShape() == TensorShape({ 5, 6, 7, 8 })));
+    info.SetDataType(DataType::QuantisedAsymm8);
+    BOOST_TEST((info.GetDataType() == DataType::QuantisedAsymm8));
+    info.SetQuantizationScale(10.0f);
+    BOOST_TEST(info.GetQuantizationScale() == 10.0f);
+    info.SetQuantizationOffset(5);
+    BOOST_TEST(info.GetQuantizationOffset() == 5);
+}
+
+BOOST_AUTO_TEST_CASE(TensorShapeOperatorBrackets)
+{
+    TensorShape shape({0,1,2,3});
+    // Check version of operator[] which returns an unsigned int
+    BOOST_TEST(shape[2] == 2);
+    // Check the version of operator[] which returns a reference
+    shape[2] = 20;
+    BOOST_TEST(shape[2] == 20);
+}
+
+BOOST_AUTO_TEST_SUITE_END()
diff --git a/src/armnn/test/UnitTests.cpp b/src/armnn/test/UnitTests.cpp
new file mode 100644
index 0000000000..0e2f99583f
--- /dev/null
+++ b/src/armnn/test/UnitTests.cpp
@@ -0,0 +1,60 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// See LICENSE file in the project root for full license information.
+//
+#define BOOST_TEST_MODULE UnitTests
+#include <boost/test/unit_test.hpp>
+
+#include "UnitTests.hpp"
+
+struct ConfigureLoggingFixture
+{
+    ConfigureLoggingFixture()
+    {
+        ConfigureLoggingTest();
+    }
+};
+
+BOOST_GLOBAL_FIXTURE(ConfigureLoggingFixture);
+
+// On Windows, duplicate the boost test logging output to the Visual Studio output window using OutputDebugString.
+#if defined(_MSC_VER)
+
+#include <boost/iostreams/filtering_stream.hpp>
+#include <boost/iostreams/tee.hpp>
+#include <iostream>
+#include <Windows.h>
+
+using namespace boost::iostreams;
+using namespace std;
+
+struct DebugOutputSink : boost::iostreams::sink
+{
+    std::streamsize write(const char* s, std::streamsize n)
+    {
+        // The given string is not null-terminated, so we need to copy it.
+        std::string s2(s, boost::numeric_cast<size_t>(n));
+        OutputDebugString(s2.c_str());
+        return n;
+    }
+};
+
+class SetupDebugOutput
+{
+public:
+    SetupDebugOutput()
+    {
+        // Send the output to both cout (as standard) and the debug output.
+        m_OutputStream.push(tee(std::cout));
+        m_OutputStream.push(m_DebugOutputSink);
+
+        boost::unit_test::unit_test_log.set_stream(m_OutputStream);
+    }
+private:
+    filtering_ostream m_OutputStream;
+    DebugOutputSink m_DebugOutputSink;
+};
+
+BOOST_GLOBAL_FIXTURE(SetupDebugOutput);
+
+#endif // defined(_MSC_VER)
+\ No newline at end of file
diff --git a/src/armnn/test/UnitTests.hpp b/src/armnn/test/UnitTests.hpp
new file mode 100644
index 0000000000..040048ad99
--- /dev/null
+++ b/src/armnn/test/UnitTests.hpp
@@ -0,0 +1,79 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// See LICENSE file in the project root for full license information.
+//
+#pragma once
+
+#include "Logging.hpp"
+#include "armnn/Utils.hpp"
+#include "backends/RefWorkloadFactory.hpp"
+#include "backends/test/LayerTests.hpp"
+#include <boost/test/unit_test.hpp>
+
+inline void ConfigureLoggingTest()
+{
+    // Configure logging for both the ARMNN library and this test program
+    armnn::ConfigureLogging(true, true, armnn::LogSeverity::Fatal);
+    armnnUtils::ConfigureLogging(boost::log::core::get().get(), true, true, armnn::LogSeverity::Fatal);
+}
+
+// The following macros require the caller to have defined FactoryType, with one of the following using statements:
+//
+//      using FactoryType = armnn::RefWorkloadFactory;
+//      using FactoryType = armnn::ClWorkloadFactory;
+//      using FactoryType = armnn::NeonWorkloadFactory;
+
+/// Executes BOOST_TEST on CompareTensors() return value so that the predicate_result message is reported.
+/// If the test reports itself as not supported then the tensors are not compared.
+/// Additionally this checks that the supportedness reported by the test matches the name of the test.
+/// Unsupported tests must be 'tagged' by including "UNSUPPORTED" in their name.
+/// This is useful because it clarifies that the feature being tested is not actually supported
+/// (a passed test with the name of a feature would imply that feature was supported).
+/// If support is added for a feature, the test case will fail because the name incorrectly contains UNSUPPORTED.
+/// If support is removed for a feature, the test case will fail because the name doesn't contain UNSUPPORTED.
+template <typename T, std::size_t n>
+void CompareTestResultIfSupported(const std::string& testName, LayerTestResult<T, n> testResult)
+{
+    bool testNameIndicatesUnsupported = testName.find("UNSUPPORTED") != std::string::npos;
+    BOOST_CHECK_MESSAGE(testNameIndicatesUnsupported != testResult.supported,
+        "The test name does not match the supportedness it is reporting");
+    if (testResult.supported)
+    {
+        BOOST_TEST(CompareTensors(testResult.output, testResult.outputExpected));
+    }
+}
+
+template<typename FactoryType, typename TFuncPtr, typename... Args>
+void RunTestFunction(const char* testName, TFuncPtr testFunction, Args... args)
+{
+    FactoryType workloadFactory;
+    auto testResult = (*testFunction)(workloadFactory, args...);
+    CompareTestResultIfSupported(testName, testResult);
+}
+
+#define ARMNN_AUTO_TEST_CASE(TestName, TestFunction, ...) \
+    BOOST_AUTO_TEST_CASE(TestName) \
+    { \
+        RunTestFunction<FactoryType>(#TestName, &TestFunction, ##__VA_ARGS__); \
+    }
+
+template<typename FactoryType, typename TFuncPtr, typename... Args>
+void CompareRefTestFunction(const char* testName, TFuncPtr testFunction, Args... args)
+{
+    FactoryType workloadFactory;
+    armnn::RefWorkloadFactory refWorkloadFactory;
+    auto testResult = (*testFunction)(workloadFactory, refWorkloadFactory, args...);
+    CompareTestResultIfSupported(testName, testResult);
+}
+
+#define ARMNN_COMPARE_REF_AUTO_TEST_CASE(TestName, TestFunction, ...) \
+    BOOST_AUTO_TEST_CASE(TestName) \
+    { \
+        CompareRefTestFunction<FactoryType>(#TestName, &TestFunction, ##__VA_ARGS__); \
+    }
+
+#define ARMNN_COMPARE_REF_FIXTURE_TEST_CASE(TestName, Fixture, TestFunction, ...) \
+    BOOST_FIXTURE_TEST_CASE(TestName, Fixture) \
+    { \
+        CompareRefTestFunction<FactoryType>(#TestName, &TestFunction, ##__VA_ARGS__); \
+    }
diff --git a/src/armnn/test/UtilsTests.cpp b/src/armnn/test/UtilsTests.cpp
new file mode 100644
index 0000000000..11fa51626c
--- /dev/null
+++ b/src/armnn/test/UtilsTests.cpp
@@ -0,0 +1,58 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// See LICENSE file in the project root for full license information.
+//
+#include <boost/test/unit_test.hpp>
+
+#include <armnn/Utils.hpp>
+#include <armnn/Types.hpp>
+#include <armnn/TypesUtils.hpp>
+#include <armnn/Descriptors.hpp>
+
+BOOST_AUTO_TEST_SUITE(Utils)
+
+BOOST_AUTO_TEST_CASE(DataTypeSize)
+{
+    BOOST_TEST(armnn::GetDataTypeSize(armnn::DataType::Float32) == 4);
+    BOOST_TEST(armnn::GetDataTypeSize(armnn::DataType::QuantisedAsymm8) == 1);
+    BOOST_TEST(armnn::GetDataTypeSize(armnn::DataType::Signed32) == 4);
+}
+
+BOOST_AUTO_TEST_CASE(GetDataTypeTest)
+{
+    BOOST_TEST((armnn::GetDataType<float>() == armnn::DataType::Float32));
+    BOOST_TEST((armnn::GetDataType<uint8_t>() == armnn::DataType::QuantisedAsymm8));
+    BOOST_TEST((armnn::GetDataType<int32_t>() == armnn::DataType::Signed32));
+}
+
+BOOST_AUTO_TEST_CASE(PermuteDescriptorWithTooManyMappings)
+{
+    BOOST_CHECK_THROW(armnn::PermuteDescriptor({ 0u, 1u, 2u, 3u, 4u }), armnn::InvalidArgumentException);
+}
+
+BOOST_AUTO_TEST_CASE(PermuteDescriptorWithInvalidMappings1d)
+{
+    BOOST_CHECK_THROW(armnn::PermuteDescriptor({ 1u }), armnn::InvalidArgumentException);
+}
+
+BOOST_AUTO_TEST_CASE(PermuteDescriptorWithInvalidMappings2d)
+{
+    BOOST_CHECK_THROW(armnn::PermuteDescriptor({ 2u, 0u }), armnn::InvalidArgumentException);
+}
+
+BOOST_AUTO_TEST_CASE(PermuteDescriptorWithInvalidMappings3d)
+{
+    BOOST_CHECK_THROW(armnn::PermuteDescriptor({ 0u, 3u, 1u }), armnn::InvalidArgumentException);
+}
+
+BOOST_AUTO_TEST_CASE(PermuteDescriptorWithInvalidMappings4d)
+{
+    BOOST_CHECK_THROW(armnn::PermuteDescriptor({ 0u, 1u, 2u, 4u }), armnn::InvalidArgumentException);
+}
+
+BOOST_AUTO_TEST_CASE(PermuteDescriptorWithDuplicatedMappings)
+{
+    BOOST_CHECK_THROW(armnn::PermuteDescriptor({ 1u, 1u, 0u }), armnn::InvalidArgumentException);
+}
+
+BOOST_AUTO_TEST_SUITE_END()