aboutsummaryrefslogtreecommitdiff
path: root/src/armnn/test
diff options
context:
space:
mode:
authortelsoa01 <telmo.soares@arm.com>2018-08-31 09:22:23 +0100
committertelsoa01 <telmo.soares@arm.com>2018-08-31 09:22:23 +0100
commitc577f2c6a3b4ddb6ba87a882723c53a248afbeba (patch)
treebd7d4c148df27f8be6649d313efb24f536b7cf34 /src/armnn/test
parent4c7098bfeab1ffe1cdc77f6c15548d3e73274746 (diff)
downloadarmnn-c577f2c6a3b4ddb6ba87a882723c53a248afbeba.tar.gz
Release 18.08
Diffstat (limited to 'src/armnn/test')
-rw-r--r--src/armnn/test/CreateWorkload.hpp487
-rw-r--r--src/armnn/test/CreateWorkloadClNeon.hpp15
-rw-r--r--src/armnn/test/CsvReaderTest.cpp124
-rw-r--r--src/armnn/test/EndToEndTest.cpp158
-rw-r--r--src/armnn/test/FP16SupportTest.cpp114
-rw-r--r--src/armnn/test/FloatingPointConverterTest.cpp58
-rw-r--r--src/armnn/test/GraphTests.cpp119
-rw-r--r--src/armnn/test/InstrumentTests.cpp62
-rw-r--r--src/armnn/test/JsonPrinterTests.cpp378
-rw-r--r--src/armnn/test/NeonTimerTest.cpp104
-rw-r--r--src/armnn/test/NetworkTests.cpp968
-rw-r--r--src/armnn/test/Network_test.cpp483
-rw-r--r--src/armnn/test/ObservableTest.cpp94
-rw-r--r--src/armnn/test/OpenClTimerTest.cpp137
-rw-r--r--src/armnn/test/OptimizerTests.cpp498
-rw-r--r--src/armnn/test/ProfilerTests.cpp235
-rw-r--r--src/armnn/test/ProfilingEventTest.cpp95
-rw-r--r--src/armnn/test/RuntimeTests.cpp251
-rw-r--r--src/armnn/test/TensorHelpers.hpp12
-rw-r--r--src/armnn/test/TensorTest.cpp8
-rw-r--r--src/armnn/test/UnitTests.cpp2
-rw-r--r--src/armnn/test/UnitTests.hpp20
-rw-r--r--src/armnn/test/UtilsTests.cpp110
23 files changed, 3706 insertions, 826 deletions
diff --git a/src/armnn/test/CreateWorkload.hpp b/src/armnn/test/CreateWorkload.hpp
index c3f4b8a1bf..ee0c584b13 100644
--- a/src/armnn/test/CreateWorkload.hpp
+++ b/src/armnn/test/CreateWorkload.hpp
@@ -22,7 +22,7 @@ namespace
using namespace std;
-// Calls CreateWorkload for a layer, and checks the returned pointer is of the correct type
+// Calls CreateWorkload for a layer, and checks the returned pointer is of the correct type.
template<typename Workload>
std::unique_ptr<Workload> MakeAndCheckWorkload(Layer& layer, Graph& graph, const IWorkloadFactory& factory)
{
@@ -30,18 +30,19 @@ std::unique_ptr<Workload> MakeAndCheckWorkload(Layer& layer, Graph& graph, const
BOOST_TEST(workload.get() == boost::polymorphic_downcast<Workload*>(workload.get()),
"Cannot convert to derived class");
std::string reasonIfUnsupported;
+ layer.SetComputeDevice(factory.GetCompute());
BOOST_TEST(factory.IsLayerSupported(layer, layer.GetDataType(), reasonIfUnsupported));
return std::unique_ptr<Workload>(static_cast<Workload*>(workload.release()));
}
-// connects two layers
+// Connects two layers.
void Connect(Layer* from, Layer* to, const TensorInfo& tensorInfo, unsigned int fromIndex = 0, unsigned int toIndex = 0)
{
from->GetOutputSlot(fromIndex).Connect(to->GetInputSlot(toIndex));
from->GetOutputHandler(fromIndex).SetTensorInfo(tensorInfo);
}
-// helper function to create tensor handlers for workloads, assuming they all use the same factory
+// Helper function to create tensor handlers for workloads, assuming they all use the same factory.
void CreateTensorHandles(armnn::Graph& graph, armnn::IWorkloadFactory& factory)
{
for (auto&& layer : graph.TopologicalSort())
@@ -57,11 +58,11 @@ void CreateTensorHandles(armnn::Graph& graph, armnn::IWorkloadFactory& factory)
// They return the created workloads so that backend-specific checks can be performed.
/////////////////////////////////////////////////////////////////////////////////////////////
-template <typename ActivationWorkload>
+template <typename ActivationWorkload, armnn::DataType DataType>
std::unique_ptr<ActivationWorkload> CreateActivationWorkloadTest(armnn::IWorkloadFactory& factory,
armnn::Graph& graph)
{
- // create the layer we're testing
+ // Creates the layer we're testing.
ActivationDescriptor layerDesc;
layerDesc.m_Function = ActivationFunction::Abs;
layerDesc.m_A = 3.5f;
@@ -69,19 +70,19 @@ std::unique_ptr<ActivationWorkload> CreateActivationWorkloadTest(armnn::IWorkloa
ActivationLayer* const layer = graph.AddLayer<ActivationLayer>(layerDesc, "layer");
- // create extra layers
+ // Creates extra layers.
Layer* const input = graph.AddLayer<InputLayer>(0, "input");
Layer* const output = graph.AddLayer<OutputLayer>(0, "output");
- // connect up
- armnn::TensorInfo tensorInfo({1, 1}, ActivationWorkload::ms_DataType);
+ // Connects up.
+ armnn::TensorInfo tensorInfo({1, 1}, DataType);
Connect(input, layer, tensorInfo);
Connect(layer, output, tensorInfo);
CreateTensorHandles(graph, factory);
- // make the workload and check it
+ // Makes the workload and checks it.
auto workload = MakeAndCheckWorkload<ActivationWorkload>(*layer, graph, factory);
ActivationQueueDescriptor queueDescriptor = workload->GetData();
@@ -91,51 +92,51 @@ std::unique_ptr<ActivationWorkload> CreateActivationWorkloadTest(armnn::IWorkloa
BOOST_TEST(queueDescriptor.m_Parameters.m_B == -10.0f);
BOOST_TEST((queueDescriptor.m_Parameters.m_Function == ActivationFunction::Abs));
- // return so we can do extra, backend-specific tests
+ // Returns so we can do extra, backend-specific tests.
return workload;
}
-template <typename AdditionWorkload>
+template <typename AdditionWorkload, armnn::DataType DataType>
std::unique_ptr<AdditionWorkload> CreateAdditionWorkloadTest(armnn::IWorkloadFactory& factory,
armnn::Graph& graph)
{
- // create the layer we're testing
+ // Creates the layer we're testing.
Layer* const layer = graph.AddLayer<AdditionLayer>("layer");
- // create extra layers
+ // Creates extra layers.
Layer* const input1 = graph.AddLayer<InputLayer>(1, "input1");
Layer* const input2 = graph.AddLayer<InputLayer>(2, "input2");
Layer* const output = graph.AddLayer<OutputLayer>(0, "output");
- // connect up
- armnn::TensorInfo tensorInfo({2, 3}, AdditionWorkload::ms_DataType);
+ // Connects up.
+ armnn::TensorInfo tensorInfo({2, 3}, DataType);
Connect(input1, layer, tensorInfo, 0, 0);
Connect(input2, layer, tensorInfo, 0, 1);
Connect(layer, output, tensorInfo);
CreateTensorHandles(graph, factory);
- // make the workload and check it
+ // Makes the workload and checks it.
auto workload = MakeAndCheckWorkload<AdditionWorkload>(*layer, graph, factory);
AdditionQueueDescriptor queueDescriptor = workload->GetData();
BOOST_TEST(queueDescriptor.m_Inputs.size() == 2);
BOOST_TEST(queueDescriptor.m_Outputs.size() == 1);
- // return so we can do extra, backend-specific tests
+ // Returns so we can do extra, backend-specific tests.
return workload;
}
-template <typename BatchNormalizationFloat32Workload>
+template <typename BatchNormalizationFloat32Workload, armnn::DataType DataType>
std::unique_ptr<BatchNormalizationFloat32Workload> CreateBatchNormalizationWorkloadTest(
armnn::IWorkloadFactory& factory, armnn::Graph& graph)
{
- // create the layer we're testing
+ // Creates the layer we're testing.
BatchNormalizationDescriptor layerDesc;
layerDesc.m_Eps = 0.05f;
BatchNormalizationLayer* const layer = graph.AddLayer<BatchNormalizationLayer>(layerDesc, "layer");
- armnn::TensorInfo weightInfo({3}, armnn::DataType::Float32);
+ armnn::TensorInfo weightInfo({3}, DataType);
layer->m_Mean = std::make_unique<ScopedCpuTensorHandle>(weightInfo);
layer->m_Variance = std::make_unique<ScopedCpuTensorHandle>(weightInfo);
layer->m_Beta = std::make_unique<ScopedCpuTensorHandle>(weightInfo);
@@ -145,37 +146,37 @@ std::unique_ptr<BatchNormalizationFloat32Workload> CreateBatchNormalizationWorkl
layer->m_Beta->Allocate();
layer->m_Gamma->Allocate();
- // create extra layers
+ // Creates extra layers.
Layer* const input = graph.AddLayer<InputLayer>(0, "input");
Layer* const output = graph.AddLayer<OutputLayer>(0, "output");
- // connect up
- armnn::TensorInfo tensorInfo({2, 3, 1, 1}, armnn::DataType::Float32);
+ // Connects up.
+ armnn::TensorInfo tensorInfo({2, 3, 1, 1}, DataType);
Connect(input, layer, tensorInfo);
Connect(layer, output, tensorInfo);
CreateTensorHandles(graph, factory);
- // make the workload and check it
+ // Makes the workload and checks it.
auto workload = MakeAndCheckWorkload<BatchNormalizationFloat32Workload>(*layer, graph, factory);
BatchNormalizationQueueDescriptor queueDescriptor = workload->GetData();
BOOST_TEST(queueDescriptor.m_Parameters.m_Eps == 0.05f);
BOOST_TEST(queueDescriptor.m_Inputs.size() == 1);
BOOST_TEST(queueDescriptor.m_Outputs.size() == 1);
- BOOST_TEST((queueDescriptor.m_Mean->GetTensorInfo() == TensorInfo({3}, DataType::Float32)));
- BOOST_TEST((queueDescriptor.m_Variance->GetTensorInfo() == TensorInfo({3}, DataType::Float32)));
- BOOST_TEST((queueDescriptor.m_Gamma->GetTensorInfo() == TensorInfo({3}, DataType::Float32)));
- BOOST_TEST((queueDescriptor.m_Beta->GetTensorInfo() == TensorInfo({3}, DataType::Float32)));
+ BOOST_TEST((queueDescriptor.m_Mean->GetTensorInfo() == TensorInfo({3}, DataType)));
+ BOOST_TEST((queueDescriptor.m_Variance->GetTensorInfo() == TensorInfo({3}, DataType)));
+ BOOST_TEST((queueDescriptor.m_Gamma->GetTensorInfo() == TensorInfo({3}, DataType)));
+ BOOST_TEST((queueDescriptor.m_Beta->GetTensorInfo() == TensorInfo({3}, DataType)));
- // return so we can do extra, backend-specific tests
+ // Returns so we can do extra, backend-specific tests.
return workload;
}
-template <typename Convolution2dWorkload>
+template <typename Convolution2dWorkload, armnn::DataType DataType>
std::unique_ptr<Convolution2dWorkload> CreateConvolution2dWorkloadTest(armnn::IWorkloadFactory& factory,
armnn::Graph& graph)
{
- // create the layer we're testing
+ // Creates the layer we're testing.
Convolution2dDescriptor layerDesc;
layerDesc.m_PadLeft = 3;
layerDesc.m_PadRight = 3;
@@ -187,24 +188,22 @@ std::unique_ptr<Convolution2dWorkload> CreateConvolution2dWorkloadTest(armnn::IW
Convolution2dLayer* const layer = graph.AddLayer<Convolution2dLayer>(layerDesc, "layer");
- layer->m_Weight = std::make_unique<ScopedCpuTensorHandle>(TensorInfo({2, 3, 5, 3},
- Convolution2dWorkload::ms_DataType));
- layer->m_Bias = std::make_unique<ScopedCpuTensorHandle>
- (TensorInfo({2}, GetBiasDataType(Convolution2dWorkload::ms_DataType)));
+ layer->m_Weight = std::make_unique<ScopedCpuTensorHandle>(TensorInfo({2, 3, 5, 3}, DataType));
+ layer->m_Bias = std::make_unique<ScopedCpuTensorHandle>(TensorInfo({2}, GetBiasDataType(DataType)));
layer->m_Weight->Allocate();
layer->m_Bias->Allocate();
- // create extra layers
+ // Creates extra layers.
Layer* const input = graph.AddLayer<InputLayer>(0, "input");
Layer* const output = graph.AddLayer<OutputLayer>(0, "output");
- // connect up
- Connect(input, layer, TensorInfo({2, 3, 8, 16}, Convolution2dWorkload::ms_DataType));
- Connect(layer, output, TensorInfo({2, 2, 2, 10}, Convolution2dWorkload::ms_DataType));
+ // Connecst up.
+ Connect(input, layer, TensorInfo({2, 3, 8, 16}, DataType));
+ Connect(layer, output, TensorInfo({2, 2, 2, 10}, DataType));
CreateTensorHandles(graph, factory);
- // make the workload and check it
+ // Makes the workload and checks it.
auto workload = MakeAndCheckWorkload<Convolution2dWorkload>(*layer, graph, factory);
Convolution2dQueueDescriptor queueDescriptor = workload->GetData();
@@ -218,20 +217,123 @@ std::unique_ptr<Convolution2dWorkload> CreateConvolution2dWorkloadTest(armnn::IW
BOOST_TEST(queueDescriptor.m_Inputs.size() == 1);
BOOST_TEST(queueDescriptor.m_Outputs.size() == 1);
- BOOST_TEST((queueDescriptor.m_Weight->GetTensorInfo() == TensorInfo({2, 3, 5, 3},
- Convolution2dWorkload::ms_DataType)));
+ BOOST_TEST((queueDescriptor.m_Weight->GetTensorInfo() == TensorInfo({2, 3, 5, 3}, DataType)));
BOOST_TEST((queueDescriptor.m_Bias->GetTensorInfo() ==
- TensorInfo({2}, GetBiasDataType(Convolution2dWorkload::ms_DataType))));
+ TensorInfo({2}, GetBiasDataType(DataType))));
- // return so we can do extra, backend-specific tests
+ // Returns so we can do extra, backend-specific tests.
return workload;
}
-template <typename Convolution2dWorkload>
+template <typename LstmWorkload>
+std::unique_ptr<LstmWorkload> CreateLstmWorkloadTest(armnn::IWorkloadFactory& factory, armnn::Graph& graph)
+{
+ // This parameter setting is for withCifgWithPeepholeNoProjection
+ LstmDescriptor layerDesc;
+ layerDesc.m_ActivationFunc = 4;
+ layerDesc.m_ClippingThresCell = 0.0f;
+ layerDesc.m_ClippingThresProj = 0.0f;
+ layerDesc.m_CifgEnabled = true;
+ layerDesc.m_PeepholeEnabled = true;
+ layerDesc.m_ProjectionEnabled = false;
+
+ LstmLayer* const layer = graph.AddLayer<LstmLayer>(layerDesc, "layer");
+ unsigned int batchSize = 2;
+ unsigned int inputSize = 2;
+ unsigned int numUnits = 4;
+ unsigned int outputSize = 4;
+
+ layer->m_BasicParameters.m_InputToForgetWeights = std::make_unique<ScopedCpuTensorHandle>
+ (TensorInfo({ numUnits, inputSize }, DataType::Float32));
+ layer->m_BasicParameters.m_InputToCellWeights = std::make_unique<ScopedCpuTensorHandle>
+ (TensorInfo({ numUnits, inputSize }, DataType::Float32));
+ layer->m_BasicParameters.m_InputToOutputWeights = std::make_unique<ScopedCpuTensorHandle>
+ (TensorInfo({ numUnits, inputSize }, DataType::Float32));
+ layer->m_BasicParameters.m_RecurrentToForgetWeights = std::make_unique<ScopedCpuTensorHandle>
+ (TensorInfo({ numUnits, outputSize }, DataType::Float32));
+ layer->m_BasicParameters.m_RecurrentToCellWeights = std::make_unique<ScopedCpuTensorHandle>
+ (TensorInfo({ numUnits, outputSize }, DataType::Float32));
+ layer->m_BasicParameters.m_RecurrentToOutputWeights = std::make_unique<ScopedCpuTensorHandle>
+ (TensorInfo({ numUnits, outputSize }, DataType::Float32));
+ layer->m_BasicParameters.m_ForgetGateBias = std::make_unique<ScopedCpuTensorHandle>
+ (TensorInfo({ numUnits }, DataType::Float32));
+ layer->m_BasicParameters.m_CellBias = std::make_unique<ScopedCpuTensorHandle>
+ (TensorInfo({ numUnits }, DataType::Float32));
+ layer->m_BasicParameters.m_OutputGateBias = std::make_unique<ScopedCpuTensorHandle>
+ (TensorInfo({ numUnits }, DataType::Float32));
+
+ layer->m_BasicParameters.m_InputToForgetWeights->Allocate();
+ layer->m_BasicParameters.m_InputToCellWeights->Allocate();
+ layer->m_BasicParameters.m_InputToOutputWeights->Allocate();
+ layer->m_BasicParameters.m_RecurrentToForgetWeights->Allocate();
+ layer->m_BasicParameters.m_RecurrentToCellWeights->Allocate();
+ layer->m_BasicParameters.m_RecurrentToOutputWeights->Allocate();
+ layer->m_BasicParameters.m_ForgetGateBias->Allocate();
+ layer->m_BasicParameters.m_CellBias->Allocate();
+ layer->m_BasicParameters.m_OutputGateBias->Allocate();
+
+
+ if (layerDesc.m_PeepholeEnabled)
+ {
+ layer->m_PeepholeParameters.m_CellToForgetWeights = std::make_unique<ScopedCpuTensorHandle>
+ (TensorInfo({ numUnits }, DataType::Float32));
+ layer->m_PeepholeParameters.m_CellToOutputWeights = std::make_unique<ScopedCpuTensorHandle>
+ (TensorInfo({ numUnits }, DataType::Float32));
+ layer->m_PeepholeParameters.m_CellToForgetWeights->Allocate();
+ layer->m_PeepholeParameters.m_CellToOutputWeights->Allocate();
+ }
+
+ // create input and output layers
+ Layer* const input = graph.AddLayer<InputLayer>(0, "input");
+ Layer* const outputStateIn = graph.AddLayer<InputLayer>(1, "outputStateIn");
+ Layer* const cellStateIn = graph.AddLayer<InputLayer>(2, "cellStateIn");
+ Layer* const scratchBuffer = graph.AddLayer<OutputLayer>(0, "scratchBuffer");
+ Layer* const outputStateOut = graph.AddLayer<OutputLayer>(1, "outputStateOut");
+ Layer* const cellStateOut = graph.AddLayer<OutputLayer>(2, "cellStateOut");
+ Layer* const output = graph.AddLayer<OutputLayer>(3, "output");
+
+ // connect up
+ armnn::TensorInfo lstmTensorInfo1({ batchSize, inputSize }, DataType::Float32);
+ armnn::TensorInfo lstmTensorInfo2({ batchSize, numUnits}, DataType::Float32);
+ armnn::TensorInfo lstmTensorInfo3({ batchSize, outputSize }, DataType::Float32);
+ armnn::TensorInfo lstmTensorInfoScratchBuff({ batchSize, numUnits*3 }, DataType::Float32);
+ if (layerDesc.m_CifgEnabled)
+ {
+ lstmTensorInfoScratchBuff.SetShape({ batchSize, numUnits*4 });
+ }
+
+ Connect(input, layer, lstmTensorInfo1, 0, 0);
+ Connect(cellStateIn, layer, lstmTensorInfo2, 0, 1);
+ Connect(outputStateIn, layer, lstmTensorInfo3, 0, 2);
+ Connect(layer, scratchBuffer, lstmTensorInfoScratchBuff, 0, 0);
+ Connect(layer, outputStateOut, lstmTensorInfo3, 1, 0);
+ Connect(layer, cellStateOut, lstmTensorInfo2, 2, 0);
+ Connect(layer, output, lstmTensorInfo3, 3, 0);
+
+ CreateTensorHandles(graph, factory);
+
+ // make the workload and check it
+ auto workload = MakeAndCheckWorkload<LstmWorkload>(*layer, graph, factory);
+ LstmQueueDescriptor queueDescriptor = workload->GetData();
+ BOOST_TEST(queueDescriptor.m_Parameters.m_ActivationFunc == 4);
+ BOOST_TEST(queueDescriptor.m_Parameters.m_ClippingThresCell == 0.0f);
+ BOOST_TEST(queueDescriptor.m_Parameters.m_ClippingThresProj == 0.0f);
+ BOOST_TEST(queueDescriptor.m_Inputs.size() == 3);
+ BOOST_TEST(queueDescriptor.m_Outputs.size() == 4);
+
+ BOOST_TEST((queueDescriptor.m_InputToForgetWeights->GetTensorInfo() == TensorInfo({ numUnits, inputSize },
+ DataType::Float32)));
+ BOOST_TEST((queueDescriptor.m_OutputGateBias->GetTensorInfo() == TensorInfo({ numUnits },
+ DataType::Float32)));
+ BOOST_TEST((queueDescriptor.m_CellBias->GetTensorInfo() == TensorInfo({ numUnits }, DataType::Float32)));
+ return workload;
+}
+
+template <typename Convolution2dWorkload, armnn::DataType DataType>
std::unique_ptr<Convolution2dWorkload> CreateDirectConvolution2dWorkloadTest(armnn::IWorkloadFactory& factory,
armnn::Graph& graph)
{
- // create the layer we're testing
+ // Creates the layer we're testing.
Convolution2dDescriptor layerDesc;
layerDesc.m_PadLeft = 1;
layerDesc.m_PadRight = 1;
@@ -243,26 +345,25 @@ std::unique_ptr<Convolution2dWorkload> CreateDirectConvolution2dWorkloadTest(arm
Convolution2dLayer* const layer = graph.AddLayer<Convolution2dLayer>(layerDesc, "layer");
- float inputsQScale = Convolution2dWorkload::ms_DataType == DataType::QuantisedAsymm8 ? 1.0f : 0.0;
- float outputQScale = Convolution2dWorkload::ms_DataType == DataType::QuantisedAsymm8 ? 2.0f : 0.0;
+ float inputsQScale = DataType == armnn::DataType::QuantisedAsymm8 ? 1.0f : 0.0;
+ float outputQScale = DataType == armnn::DataType::QuantisedAsymm8 ? 2.0f : 0.0;
- layer->m_Weight = std::make_unique<ScopedCpuTensorHandle>(TensorInfo({ 2, 3, 3, 3 },
- Convolution2dWorkload::ms_DataType, inputsQScale));
+ layer->m_Weight = std::make_unique<ScopedCpuTensorHandle>(TensorInfo({ 2, 3, 3, 3 }, DataType, inputsQScale));
layer->m_Bias = std::make_unique<ScopedCpuTensorHandle>
- (TensorInfo({2}, GetBiasDataType(Convolution2dWorkload::ms_DataType), inputsQScale));
+ (TensorInfo({2}, GetBiasDataType(DataType), inputsQScale));
layer->m_Weight->Allocate();
layer->m_Bias->Allocate();
- // create extra layers
+ // Creates extra layers.
Layer* const input = graph.AddLayer<InputLayer>(0, "input");
Layer* const output = graph.AddLayer<OutputLayer>(0, "output");
- // connect up
- Connect(input, layer, TensorInfo({2, 3, 6, 6}, Convolution2dWorkload::ms_DataType, inputsQScale));
- Connect(layer, output, TensorInfo({2, 2, 6, 6}, Convolution2dWorkload::ms_DataType, outputQScale));
+ // Connects up.
+ Connect(input, layer, TensorInfo({2, 3, 6, 6}, DataType, inputsQScale));
+ Connect(layer, output, TensorInfo({2, 2, 6, 6}, DataType, outputQScale));
CreateTensorHandles(graph, factory);
- // make the workload and check it
+ // Makes the workload and checks it.
auto workload = MakeAndCheckWorkload<Convolution2dWorkload>(*layer, graph, factory);
Convolution2dQueueDescriptor queueDescriptor = workload->GetData();
@@ -277,11 +378,11 @@ std::unique_ptr<Convolution2dWorkload> CreateDirectConvolution2dWorkloadTest(arm
BOOST_TEST(queueDescriptor.m_Inputs.size() == 1);
BOOST_TEST(queueDescriptor.m_Outputs.size() == 1);
BOOST_TEST((queueDescriptor.m_Weight->GetTensorInfo() == TensorInfo({2, 3, 3, 3},
- Convolution2dWorkload::ms_DataType, inputsQScale)));
+ DataType, inputsQScale)));
BOOST_TEST((queueDescriptor.m_Bias->GetTensorInfo()
- == TensorInfo({2}, GetBiasDataType(Convolution2dWorkload::ms_DataType), inputsQScale)));
+ == TensorInfo({2}, GetBiasDataType(DataType), inputsQScale)));
- // return so we can do extra, backend-specific tests
+ // Returns so we can do extra, backend-specific tests.
return workload;
}
@@ -289,7 +390,7 @@ template <typename DepthwiseConvolution2dFloat32Workload>
std::unique_ptr<DepthwiseConvolution2dFloat32Workload> CreateDepthwiseConvolution2dWorkloadTest(
armnn::IWorkloadFactory& factory, armnn::Graph& graph)
{
- // create the layer we're testing
+ // Creates the layer we're testing.
DepthwiseConvolution2dDescriptor layerDesc;
layerDesc.m_PadLeft = 3;
layerDesc.m_PadRight = 3;
@@ -306,16 +407,16 @@ std::unique_ptr<DepthwiseConvolution2dFloat32Workload> CreateDepthwiseConvolutio
layer->m_Weight->Allocate();
layer->m_Bias->Allocate();
- // create extra layers
+ // Creates extra layers.
Layer* const input = graph.AddLayer<InputLayer>(0, "input");
Layer* const output = graph.AddLayer<OutputLayer>(0, "output");
- // connect up
+ // Connects up.
Connect(input, layer, TensorInfo({2, 3, 8, 16}, armnn::DataType::Float32));
Connect(layer, output, TensorInfo({2, 9, 2, 10}, armnn::DataType::Float32));
CreateTensorHandles(graph, factory);
- // make the workload and check it
+ // Makes the workload and checks it.
auto workload = MakeAndCheckWorkload<DepthwiseConvolution2dFloat32Workload>(*layer, graph, factory);
DepthwiseConvolution2dQueueDescriptor queueDescriptor = workload->GetData();
@@ -332,41 +433,39 @@ std::unique_ptr<DepthwiseConvolution2dFloat32Workload> CreateDepthwiseConvolutio
BOOST_TEST((queueDescriptor.m_Weight->GetTensorInfo() == TensorInfo({3, 3, 5, 3}, DataType::Float32)));
BOOST_TEST((queueDescriptor.m_Bias->GetTensorInfo() == TensorInfo({9}, DataType::Float32)));
- // return so we can do extra, backend-specific tests
+ // Returns so we can do extra, backend-specific tests.
return workload;
}
-template <typename FullyConnectedWorkload>
+template <typename FullyConnectedWorkload, armnn::DataType DataType>
std::unique_ptr<FullyConnectedWorkload> CreateFullyConnectedWorkloadTest(armnn::IWorkloadFactory& factory,
armnn::Graph& graph)
{
- // create the layer we're testing
+ // Creates the layer we're testing.
FullyConnectedDescriptor layerDesc;
layerDesc.m_BiasEnabled = true;
layerDesc.m_TransposeWeightMatrix = true;
FullyConnectedLayer* const layer = graph.AddLayer<FullyConnectedLayer>(layerDesc, "layer");
- float inputsQScale = FullyConnectedWorkload::ms_DataType == DataType::QuantisedAsymm8 ? 1.0f : 0.0;
- float outputQScale = FullyConnectedWorkload::ms_DataType == DataType::QuantisedAsymm8 ? 2.0f : 0.0;
+ float inputsQScale = DataType == armnn::DataType::QuantisedAsymm8 ? 1.0f : 0.0;
+ float outputQScale = DataType == armnn::DataType::QuantisedAsymm8 ? 2.0f : 0.0;
- layer->m_Weight = std::make_unique<ScopedCpuTensorHandle>(TensorInfo({7, 20},
- FullyConnectedWorkload::ms_DataType, inputsQScale, 0));
- layer->m_Bias = std::make_unique<ScopedCpuTensorHandle>(TensorInfo({7},
- GetBiasDataType(FullyConnectedWorkload::ms_DataType), inputsQScale));
+ layer->m_Weight = std::make_unique<ScopedCpuTensorHandle>(TensorInfo({7, 20}, DataType, inputsQScale, 0));
+ layer->m_Bias = std::make_unique<ScopedCpuTensorHandle>(TensorInfo({7}, GetBiasDataType(DataType), inputsQScale));
layer->m_Weight->Allocate();
layer->m_Bias->Allocate();
- // create extra layers
+ // Creates extra layers.
Layer* const input = graph.AddLayer<InputLayer>(0, "input");
Layer* const output = graph.AddLayer<OutputLayer>(0, "output");
- // connect up
- Connect(input, layer, TensorInfo({3, 1, 4, 5}, FullyConnectedWorkload::ms_DataType, inputsQScale));
- Connect(layer, output, TensorInfo({3, 7}, FullyConnectedWorkload::ms_DataType, outputQScale));
+ // Connects up.
+ Connect(input, layer, TensorInfo({3, 1, 4, 5}, DataType, inputsQScale));
+ Connect(layer, output, TensorInfo({3, 7}, DataType, outputQScale));
CreateTensorHandles(graph, factory);
- // make the workload and check it
+ // Makes the workload and checks it.
auto workload = MakeAndCheckWorkload<FullyConnectedWorkload>(*layer, graph, factory);
FullyConnectedQueueDescriptor queueDescriptor = workload->GetData();
@@ -375,50 +474,48 @@ std::unique_ptr<FullyConnectedWorkload> CreateFullyConnectedWorkloadTest(armnn::
BOOST_TEST(queueDescriptor.m_Inputs.size() == 1);
BOOST_TEST(queueDescriptor.m_Outputs.size() == 1);
- BOOST_TEST((queueDescriptor.m_Weight->GetTensorInfo() ==
- TensorInfo({7, 20}, FullyConnectedWorkload::ms_DataType, inputsQScale)));
- BOOST_TEST((queueDescriptor.m_Bias->GetTensorInfo() ==
- TensorInfo({7}, GetBiasDataType(FullyConnectedWorkload::ms_DataType), inputsQScale)));
+ BOOST_TEST((queueDescriptor.m_Weight->GetTensorInfo() == TensorInfo({7, 20}, DataType, inputsQScale)));
+ BOOST_TEST((queueDescriptor.m_Bias->GetTensorInfo() == TensorInfo({7}, GetBiasDataType(DataType), inputsQScale)));
- // return so we can do extra, backend-specific tests
+ // Returns so we can do extra, backend-specific tests.
return workload;
}
-template <typename MultiplicationWorkload>
+template <typename MultiplicationWorkload, armnn::DataType DataType>
std::unique_ptr<MultiplicationWorkload> CreateMultiplicationWorkloadTest(armnn::IWorkloadFactory& factory,
armnn::Graph& graph)
{
- // create the layer we're testing
+ // Creates the layer we're testing.
Layer* const layer = graph.AddLayer<MultiplicationLayer>("layer");
- // create extra layers
+ // Creates extra layers.
Layer* const input1 = graph.AddLayer<InputLayer>(1, "input1");
Layer* const input2 = graph.AddLayer<InputLayer>(2, "input2");
Layer* const output = graph.AddLayer<OutputLayer>(0, "output");
- // connect up
- armnn::TensorInfo tensorInfo({2, 3}, MultiplicationWorkload::ms_DataType);
+ // Connects up.
+ armnn::TensorInfo tensorInfo({2, 3}, DataType);
Connect(input1, layer, tensorInfo, 0, 0);
Connect(input2, layer, tensorInfo, 0, 1);
Connect(layer, output, tensorInfo);
CreateTensorHandles(graph, factory);
- // make the workload and check it
+ // Makes the workload and checks it.
auto workload = MakeAndCheckWorkload<MultiplicationWorkload>(*layer, graph, factory);
MultiplicationQueueDescriptor queueDescriptor = workload->GetData();
BOOST_TEST(queueDescriptor.m_Inputs.size() == 2);
BOOST_TEST(queueDescriptor.m_Outputs.size() == 1);
- // return so we can do extra, backend-specific tests
+ // Returns so we can do extra, backend-specific tests.
return workload;
}
-template <typename NormalizationFloat32Workload>
+template <typename NormalizationFloat32Workload, armnn::DataType DataType>
std::unique_ptr<NormalizationFloat32Workload> CreateNormalizationWorkloadTest(armnn::IWorkloadFactory& factory,
armnn::Graph& graph)
{
- // create the layer we're testing
+ // Creates the layer we're testing.
NormalizationDescriptor layerDesc;
layerDesc.m_NormChannelType = NormalizationAlgorithmChannel::Across;
layerDesc.m_NormMethodType = NormalizationAlgorithmMethod::LocalBrightness;
@@ -429,16 +526,16 @@ std::unique_ptr<NormalizationFloat32Workload> CreateNormalizationWorkloadTest(ar
NormalizationLayer* layer = graph.AddLayer<NormalizationLayer>(layerDesc, "layer");
- // create extra layers
+ // Creatse extra layers.
Layer* const input = graph.AddLayer<InputLayer>(0, "input");
Layer* const output = graph.AddLayer<OutputLayer>(0, "output");
- // connect up
- Connect(input, layer, TensorInfo({3, 5, 5, 1}, armnn::DataType::Float32));
- Connect(layer, output, TensorInfo({3, 5, 5, 1}, armnn::DataType::Float32));
+ // Connects up.
+ Connect(input, layer, TensorInfo({3, 5, 5, 1}, DataType));
+ Connect(layer, output, TensorInfo({3, 5, 5, 1}, DataType));
CreateTensorHandles(graph, factory);
- // make the workload and check it
+ // Makes the workload and checks it.
auto workload = MakeAndCheckWorkload<NormalizationFloat32Workload>(*layer, graph, factory);
NormalizationQueueDescriptor queueDescriptor = workload->GetData();
@@ -452,15 +549,15 @@ std::unique_ptr<NormalizationFloat32Workload> CreateNormalizationWorkloadTest(ar
BOOST_TEST(queueDescriptor.m_Inputs.size() == 1);
BOOST_TEST(queueDescriptor.m_Outputs.size() == 1);
- // return so we can do extra, backend-specific tests
+ // Returns so we can do extra, backend-specific tests.
return workload;
}
-template <typename Pooling2dWorkload>
+template <typename Pooling2dWorkload, armnn::DataType DataType>
std::unique_ptr<Pooling2dWorkload> CreatePooling2dWorkloadTest(armnn::IWorkloadFactory& factory,
armnn::Graph& graph)
{
- // create the layer we're testing
+ // Creates the layer we're testing.
Pooling2dDescriptor layerDesc;
layerDesc.m_PoolType = PoolingAlgorithm::Average;
layerDesc.m_PoolWidth = 3;
@@ -475,16 +572,16 @@ std::unique_ptr<Pooling2dWorkload> CreatePooling2dWorkloadTest(armnn::IWorkloadF
Pooling2dLayer* const layer = graph.AddLayer<Pooling2dLayer>(layerDesc, "layer");
- // create extra layers
+ // Create extra layers
Layer* const input = graph.AddLayer<InputLayer>(0, "input");
Layer* const output = graph.AddLayer<OutputLayer>(0, "output");
- // connect up
- Connect(input, layer, TensorInfo({3, 2, 5, 5}, Pooling2dWorkload::ms_DataType));
- Connect(layer, output, TensorInfo({3, 2, 2, 4}, Pooling2dWorkload::ms_DataType));
+ // Connect up
+ Connect(input, layer, TensorInfo({3, 2, 5, 5}, DataType));
+ Connect(layer, output, TensorInfo({3, 2, 2, 4}, DataType));
CreateTensorHandles(graph, factory);
- // make the workload and check it
+ // Make the workload and checks it
auto workload = MakeAndCheckWorkload<Pooling2dWorkload>(*layer, graph, factory);
Pooling2dQueueDescriptor queueDescriptor = workload->GetData();
@@ -502,70 +599,70 @@ std::unique_ptr<Pooling2dWorkload> CreatePooling2dWorkloadTest(armnn::IWorkloadF
BOOST_TEST(queueDescriptor.m_Inputs.size() == 1);
BOOST_TEST(queueDescriptor.m_Outputs.size() == 1);
- // return so we can do extra, backend-specific tests
+ // Return so we can do extra, backend-specific tests
return workload;
}
-template <typename SoftmaxWorkload>
+template <typename SoftmaxWorkload, armnn::DataType DataType>
std::unique_ptr<SoftmaxWorkload> CreateSoftmaxWorkloadTest(armnn::IWorkloadFactory& factory,
armnn::Graph& graph)
{
- // create the layer we're testing
+ // Create the layer we're testing.
SoftmaxDescriptor softmaxDescriptor;
Layer* const layer = graph.AddLayer<SoftmaxLayer>(softmaxDescriptor, "layer");
- // create extra layers
+ // Create extra layers.
Layer* const input = graph.AddLayer<InputLayer>(0, "input");
Layer* const output = graph.AddLayer<OutputLayer>(0, "output");
- // connect up
- armnn::TensorInfo tensorInfo({4, 1}, SoftmaxWorkload::ms_DataType);
+ // Connect up
+ armnn::TensorInfo tensorInfo({4, 1}, DataType);
Connect(input, layer, tensorInfo);
Connect(layer, output, tensorInfo);
CreateTensorHandles(graph, factory);
- // make the workload and check it
+ // Make the workload and checks it.
auto workload = MakeAndCheckWorkload<SoftmaxWorkload>(*layer, graph, factory);
SoftmaxQueueDescriptor queueDescriptor = workload->GetData();
BOOST_TEST(queueDescriptor.m_Inputs.size() == 1);
BOOST_TEST(queueDescriptor.m_Outputs.size() == 1);
- // return so we can do extra, backend-specific tests
+ // Return so we can do extra, backend-specific tests.
return workload;
}
-template<typename SplitterWorkload>
+template<typename SplitterWorkload, armnn::DataType DataType>
std::unique_ptr<SplitterWorkload>
CreateSplitterWorkloadTest(armnn::IWorkloadFactory& factory, armnn::Graph& graph)
{
- // create the layer we're testing
+ // Create the layer we're testing.
// NOTE: need three dimensions channels, height/y, width/x because the Compute
// library restricts subtensors to have the same x and y dimensions as
// their parent tensors, and therefore the origin on the x and y dimension
// has to be zero for any view. So we need a third dimension to split...
- // NOTE: arguments are: number of views, number of dimensions
+ // NOTE: arguments are: number of views, number of dimensions.
ViewsDescriptor layerDesc(3, 3);
- // NOTE: arguments are: view, dimension, value
+ // NOTE: arguments are: view, dimension, value.
layerDesc.SetViewOriginCoord(0, 0, 0);
layerDesc.SetViewOriginCoord(1, 0, 1);
layerDesc.SetViewOriginCoord(2, 0, 3);
Layer* const layer = graph.AddLayer<SplitterLayer>(layerDesc, "layer");
- // add extra layers
+ // Adds extra layers.
Layer* const input = graph.AddLayer<InputLayer>(0, "input");
Layer* const output0 = graph.AddLayer<OutputLayer>(0, "output0");
Layer* const output1 = graph.AddLayer<OutputLayer>(1, "output1");
Layer* const output2 = graph.AddLayer<OutputLayer>(2, "output2");
- // connect up
- armnn::TensorInfo tensorInfo({5, 7, 7}, SplitterWorkload::ms_DataType);
+ // Connects up.
+ armnn::TensorInfo tensorInfo({5, 7, 7}, DataType);
Connect(input, layer, tensorInfo);
- armnn::TensorInfo output0Info({1, 7, 7}, SplitterWorkload::ms_DataType);
- armnn::TensorInfo output1Info({2, 7, 7}, SplitterWorkload::ms_DataType);
- armnn::TensorInfo output2Info({2, 7, 7}, SplitterWorkload::ms_DataType);
+ armnn::TensorInfo output0Info({1, 7, 7}, DataType);
+ armnn::TensorInfo output1Info({2, 7, 7}, DataType);
+ armnn::TensorInfo output2Info({2, 7, 7}, DataType);
Connect(layer, output0, output0Info, 0, 0);
Connect(layer, output1, output1Info, 1, 0);
@@ -573,7 +670,7 @@ std::unique_ptr<SplitterWorkload>
CreateTensorHandles(graph, factory);
- // make the workload and check it
+ // Makes the workload and checks it.
auto workload = MakeAndCheckWorkload<SplitterWorkload>(*layer, graph, factory);
SplitterQueueDescriptor queueDescriptor = workload->GetData();
@@ -591,24 +688,21 @@ std::unique_ptr<SplitterWorkload>
BOOST_TEST(queueDescriptor.m_ViewOrigins[1].m_Origin[2] == 0);
BOOST_TEST(queueDescriptor.m_ViewOrigins[2].m_Origin[2] == 0);
- // return so we can do extra, backend-specific tests
+ // Returns so we can do extra, backend-specific tests.
return workload;
}
-/// This function constructs a graph with both a splitter and a merger, and returns a pair of the workloads
-template<typename SplitterWorkload, typename MergerWorkload>
+/// This function constructs a graph with both a splitter and a merger, and returns a pair of the workloads.
+template<typename SplitterWorkload, typename MergerWorkload, armnn::DataType DataType>
std::pair<std::unique_ptr<SplitterWorkload>, std::unique_ptr<MergerWorkload>>
CreateSplitterMergerWorkloadTest(armnn::IWorkloadFactory& factory, armnn::Graph& graph)
{
- static_assert(SplitterWorkload::ms_DataType == MergerWorkload::ms_DataType,
- "Splitter and merger workloads must have the same data type");
+ armnn::TensorInfo inputTensorInfo({ 1, 2, 100, 10 }, DataType);
- armnn::TensorInfo inputTensorInfo({ 1, 2, 100, 10 }, SplitterWorkload::ms_DataType);
+ armnn::TensorInfo splitTensorInfo1({ 1, 1, 100, 10 }, DataType);
+ armnn::TensorInfo splitTensorInfo2({ 1, 1, 100, 10 }, DataType);
- armnn::TensorInfo splitTensorInfo1({ 1, 1, 100, 10 }, SplitterWorkload::ms_DataType);
- armnn::TensorInfo splitTensorInfo2({ 1, 1, 100, 10 }, SplitterWorkload::ms_DataType);
-
- //construct the graph
+ //Constructs the graph.
Layer* const input = graph.AddLayer<InputLayer>(0, "input");
armnn::ViewsDescriptor splitterViews(2);
@@ -641,12 +735,12 @@ std::pair<std::unique_ptr<SplitterWorkload>, std::unique_ptr<MergerWorkload>>
Layer* const output = graph.AddLayer<OutputLayer>(0, "output");
- // add connections
+ // Adds connections.
Connect(input, splitter, inputTensorInfo, 0, 0);
BOOST_TEST_CHECKPOINT("connect input to splitter");
- Connect(splitter, merger, splitTensorInfo1, 0, 1); // The splitter & merger are connected up
+ Connect(splitter, merger, splitTensorInfo1, 0, 1); // The splitter & merger are connected up.
BOOST_TEST_CHECKPOINT("connect splitter[0] to merger[1]");
- Connect(splitter, merger, splitTensorInfo2, 1, 0); // so that the outputs are flipped round
+ Connect(splitter, merger, splitTensorInfo2, 1, 0); // So that the outputs are flipped round.
BOOST_TEST_CHECKPOINT("connect splitter[1] to merger[0]");
Connect(merger, output, inputTensorInfo, 0, 0);
BOOST_TEST_CHECKPOINT("connect merger to output");
@@ -665,7 +759,7 @@ std::pair<std::unique_ptr<SplitterWorkload>, std::unique_ptr<MergerWorkload>>
/// This function constructs a graph with a splitter with two outputs. Each of the outputs is then
/// connected to two different activation layers
-template<typename SplitterWorkload, typename ActivationWorkload>
+template<typename SplitterWorkload, typename ActivationWorkload, armnn::DataType DataType>
void CreateSplitterMultipleInputsOneOutputWorkloadTest(armnn::IWorkloadFactory& factory, armnn::Graph& graph,
std::unique_ptr<SplitterWorkload>& wlSplitter,
std::unique_ptr<ActivationWorkload>& wlActiv0_0,
@@ -673,14 +767,11 @@ void CreateSplitterMultipleInputsOneOutputWorkloadTest(armnn::IWorkloadFactory&
std::unique_ptr<ActivationWorkload>& wlActiv1_0,
std::unique_ptr<ActivationWorkload>& wlActiv1_1)
{
- static_assert(SplitterWorkload::ms_DataType == ActivationWorkload::ms_DataType,
- "Splitter and activation workloads must have the same data type");
-
- armnn::TensorInfo inputTensorInfo ({ 1, 3, 100, 50 }, SplitterWorkload::ms_DataType);
- armnn::TensorInfo splitTensorInfo1({ 1, 1, 100, 50 }, SplitterWorkload::ms_DataType);
- armnn::TensorInfo splitTensorInfo2({ 1, 2, 100, 50 }, SplitterWorkload::ms_DataType);
+ armnn::TensorInfo inputTensorInfo ({ 1, 3, 100, 50 }, DataType);
+ armnn::TensorInfo splitTensorInfo1({ 1, 1, 100, 50 }, DataType);
+ armnn::TensorInfo splitTensorInfo2({ 1, 2, 100, 50 }, DataType);
- //construct the graph
+ //Constructs the graph.
Layer* const input = graph.AddLayer<InputLayer>(0, "input");
armnn::ViewsDescriptor splitterViews(2);
@@ -709,7 +800,7 @@ void CreateSplitterMultipleInputsOneOutputWorkloadTest(armnn::IWorkloadFactory&
Layer* const output3 = graph.AddLayer<OutputLayer>(3, "output3");
Layer* const output4 = graph.AddLayer<OutputLayer>(4, "output4");
- // add connections
+ // Adds connections.
Connect(input, splitter, inputTensorInfo, 0, 0);
Connect(splitter, activ0_0, splitTensorInfo1, 0, 0);
Connect(splitter, activ0_1, splitTensorInfo1, 0, 0);
@@ -737,97 +828,155 @@ void CreateSplitterMultipleInputsOneOutputWorkloadTest(armnn::IWorkloadFactory&
wlActiv1_1 = std::move(workloadActiv1_1);
}
-template <typename ResizeBilinearWorkload>
+template <typename ResizeBilinearWorkload, armnn::DataType DataType>
std::unique_ptr<ResizeBilinearWorkload> CreateResizeBilinearWorkloadTest(armnn::IWorkloadFactory& factory,
armnn::Graph& graph)
{
- // create the layer we're testing
+ // Creates the layer we're testing.
TensorShape outputShape({ 2, 3, 2, 2 });
ResizeBilinearDescriptor resizeDesc;
resizeDesc.m_TargetWidth = outputShape[3];
resizeDesc.m_TargetHeight = outputShape[2];
Layer* const layer = graph.AddLayer<ResizeBilinearLayer>(resizeDesc, "layer");
- // create extra layers
+ // Creates extra layers.
Layer* const input = graph.AddLayer<InputLayer>(0, "input");
Layer* const output = graph.AddLayer<OutputLayer>(0, "output");
- // connect up
- armnn::TensorInfo inputTensorInfo({ 2, 3, 4, 4 }, ResizeBilinearWorkload::ms_DataType);
- armnn::TensorInfo outputTensorInfo(outputShape, ResizeBilinearWorkload::ms_DataType);
+ // Connects up.
+ armnn::TensorInfo inputTensorInfo({ 2, 3, 4, 4 }, DataType);
+ armnn::TensorInfo outputTensorInfo(outputShape, DataType);
Connect(input, layer, inputTensorInfo);
Connect(layer, output, outputTensorInfo);
CreateTensorHandles(graph, factory);
- // make the workload and check it
+ // Makes the workload and checks it.
auto workload = MakeAndCheckWorkload<ResizeBilinearWorkload>(*layer, graph, factory);
ResizeBilinearQueueDescriptor queueDescriptor = workload->GetData();
BOOST_TEST(queueDescriptor.m_Inputs.size() == 1);
BOOST_TEST(queueDescriptor.m_Outputs.size() == 1);
- // return so we can do extra, backend-specific tests
+ // Returns so we can do extra, backend-specific tests.
return workload;
}
-template <typename L2NormalizationWorkload>
+template <typename L2NormalizationWorkload, armnn::DataType DataType>
std::unique_ptr<L2NormalizationWorkload> CreateL2NormalizationWorkloadTest(armnn::IWorkloadFactory& factory,
armnn::Graph& graph)
{
- // create the layer we're testing
+ // Creates the layer we're testing.
Layer* const layer = graph.AddLayer<L2NormalizationLayer>("l2norm");
- // create extra layers
+ // Creates extra layers.
Layer* const input = graph.AddLayer<InputLayer>(0, "input");
Layer* const output = graph.AddLayer<OutputLayer>(0, "output");
- // connect up
- armnn::TensorInfo inputTensorInfo({ 5, 20, 50, 67 }, L2NormalizationWorkload::ms_DataType);
- armnn::TensorInfo outputTensorInfo({ 5, 20, 50, 67 }, L2NormalizationWorkload::ms_DataType);
+ // Connects up.
+ armnn::TensorInfo inputTensorInfo({ 5, 20, 50, 67 }, DataType);
+ armnn::TensorInfo outputTensorInfo({ 5, 20, 50, 67 }, DataType);
Connect(input, layer, inputTensorInfo);
Connect(layer, output, outputTensorInfo);
CreateTensorHandles(graph, factory);
- // make the workload and check it
+ // Makes the workload and checks it.
auto workload = MakeAndCheckWorkload<L2NormalizationWorkload>(*layer, graph, factory);
L2NormalizationQueueDescriptor queueDescriptor = workload->GetData();
BOOST_TEST(queueDescriptor.m_Inputs.size() == 1);
BOOST_TEST(queueDescriptor.m_Outputs.size() == 1);
- // return so we can do extra, backend-specific tests
+ // Returns so we can do extra, backend-specific tests.
return workload;
}
-template <typename ReshapeWorkload>
+template <typename ReshapeWorkload, armnn::DataType DataType>
std::unique_ptr<ReshapeWorkload> CreateReshapeWorkloadTest(armnn::IWorkloadFactory& factory,
armnn::Graph& graph)
{
- // create the layer we're testing
+ // Creates the layer we're testing.
TensorShape outputShape({ 1, 4 });
ReshapeDescriptor reshapeDesc;
reshapeDesc.m_TargetShape = outputShape;
Layer* const layer = graph.AddLayer<ReshapeLayer>(reshapeDesc, "layer");
- // create extra layers
+ // Creates extra layers.
Layer* const input = graph.AddLayer<InputLayer>(0, "input");
Layer* const output = graph.AddLayer<OutputLayer>(0, "output");
- // connect up
- armnn::TensorInfo inputTensorInfo({ 4, 1 }, ReshapeWorkload::ms_DataType);
- armnn::TensorInfo outputTensorInfo(outputShape, ReshapeWorkload::ms_DataType);
+ // Connects up.
+ armnn::TensorInfo inputTensorInfo({ 4, 1 }, DataType);
+ armnn::TensorInfo outputTensorInfo(outputShape, DataType);
Connect(input, layer, inputTensorInfo);
Connect(layer, output, outputTensorInfo);
CreateTensorHandles(graph, factory);
- // make the workload and check it
+ // Makes the workload and checks it.
auto workload = MakeAndCheckWorkload<ReshapeWorkload>(*layer, graph, factory);
ReshapeQueueDescriptor queueDescriptor = workload->GetData();
BOOST_TEST(queueDescriptor.m_Inputs.size() == 1);
BOOST_TEST(queueDescriptor.m_Outputs.size() == 1);
- // return so we can do extra, backend-specific tests
+ // Returns so we can do extra, backend-specific tests.
+ return workload;
+}
+
+template <typename ConvertFp16ToFp32Float32Workload>
+std::unique_ptr<ConvertFp16ToFp32Float32Workload> CreateConvertFp16ToFp32WorkloadTest(
+ armnn::IWorkloadFactory& factory, armnn::Graph& graph)
+{
+ // Creates the layer we're testing.
+ ConvertFp16ToFp32Layer* const layer = graph.AddLayer<ConvertFp16ToFp32Layer>("Fp16ToFp32Converter");
+
+ // Creates extra layers.
+ Layer* const input = graph.AddLayer<InputLayer>(0, "input");
+ Layer* const output = graph.AddLayer<OutputLayer>(0, "output");
+
+ // Connects up.
+ armnn::TensorInfo inputTensorInfo({1, 3, 2, 3}, armnn::DataType::Float16);
+ armnn::TensorInfo outputTensorInfo({1, 3, 2, 3}, armnn::DataType::Float32);
+ Connect(input, layer, inputTensorInfo);
+ Connect(layer, output, outputTensorInfo);
+ CreateTensorHandles(graph, factory);
+
+ // Makes the workload and checks it.
+ auto workload = MakeAndCheckWorkload<ConvertFp16ToFp32Float32Workload>(*layer, graph, factory);
+
+ ConvertFp16ToFp32QueueDescriptor queueDescriptor = workload->GetData();
+ BOOST_TEST(queueDescriptor.m_Inputs.size() == 1);
+ BOOST_TEST(queueDescriptor.m_Outputs.size() == 1);
+
+ // Returns so we can do extra, backend-specific tests.
+ return workload;
+}
+
+template <typename ConvertFp32ToFp16Float16Workload>
+std::unique_ptr<ConvertFp32ToFp16Float16Workload> CreateConvertFp32ToFp16WorkloadTest(
+ armnn::IWorkloadFactory& factory, armnn::Graph& graph)
+{
+ // Creates the layer we're testing.
+ ConvertFp32ToFp16Layer* const layer = graph.AddLayer<ConvertFp32ToFp16Layer>("Fp32ToFp16Converter");
+
+ // Creates extra layers.
+ Layer* const input = graph.AddLayer<InputLayer>(0, "input");
+ Layer* const output = graph.AddLayer<OutputLayer>(0, "output");
+
+ // Connects up.
+ armnn::TensorInfo inputTensorInfo({1, 3, 2, 3}, armnn::DataType::Float32);
+ armnn::TensorInfo outputTensorInfo({1, 3, 2, 3}, armnn::DataType::Float16);
+ Connect(input, layer, inputTensorInfo);
+ Connect(layer, output, outputTensorInfo);
+ CreateTensorHandles(graph, factory);
+
+ // Makes the workload and checks it.
+ auto workload = MakeAndCheckWorkload<ConvertFp32ToFp16Float16Workload>(*layer, graph, factory);
+
+ ConvertFp32ToFp16QueueDescriptor queueDescriptor = workload->GetData();
+ BOOST_TEST(queueDescriptor.m_Inputs.size() == 1);
+ BOOST_TEST(queueDescriptor.m_Outputs.size() == 1);
+
+ // Returns so we can do extra, backend-specific tests.
return workload;
}
diff --git a/src/armnn/test/CreateWorkloadClNeon.hpp b/src/armnn/test/CreateWorkloadClNeon.hpp
index a41a70755f..d92111ac41 100644
--- a/src/armnn/test/CreateWorkloadClNeon.hpp
+++ b/src/armnn/test/CreateWorkloadClNeon.hpp
@@ -56,22 +56,21 @@ boost::test_tools::predicate_result CompareTensorHandleShape(IComputeTensorHandl
return true;
}
-template<template <DataType> class CopyFromCpuWorkload, template <DataType> class CopyToCpuWorkload,
- typename IComputeTensorHandle>
+template<typename IComputeTensorHandle>
void CreateMemCopyWorkloads(IWorkloadFactory& factory)
{
Graph graph;
RefWorkloadFactory refFactory;
- // create the layers we're testing
+ // Creates the layers we're testing.
Layer* const layer1 = graph.AddLayer<MemCopyLayer>("layer1");
Layer* const layer2 = graph.AddLayer<MemCopyLayer>("layer2");
- // create extra layers
+ // Creates extra layers.
Layer* const input = graph.AddLayer<InputLayer>(0, "input");
Layer* const output = graph.AddLayer<OutputLayer>(0, "output");
- // connect up
+ // Connects up.
TensorInfo tensorInfo({2, 3}, DataType::Float32);
Connect(input, layer1, tensorInfo);
Connect(layer1, layer2, tensorInfo);
@@ -83,8 +82,8 @@ void CreateMemCopyWorkloads(IWorkloadFactory& factory)
output->CreateTensorHandles(graph, refFactory);
// make the workloads and check them
- auto workload1 = MakeAndCheckWorkload<CopyFromCpuWorkload<DataType::Float32>>(*layer1, graph, factory);
- auto workload2 = MakeAndCheckWorkload<CopyToCpuWorkload<DataType::Float32>>(*layer2, graph, refFactory);
+ auto workload1 = MakeAndCheckWorkload<CopyMemGenericWorkload>(*layer1, graph, factory);
+ auto workload2 = MakeAndCheckWorkload<CopyMemGenericWorkload>(*layer2, graph, refFactory);
MemCopyQueueDescriptor queueDescriptor1 = workload1->GetData();
BOOST_TEST(queueDescriptor1.m_Inputs.size() == 1);
@@ -104,4 +103,4 @@ void CreateMemCopyWorkloads(IWorkloadFactory& factory)
BOOST_TEST((outputHandle2->GetTensorInfo() == TensorInfo({2, 3}, DataType::Float32)));
}
-} \ No newline at end of file
+} //namespace \ No newline at end of file
diff --git a/src/armnn/test/CsvReaderTest.cpp b/src/armnn/test/CsvReaderTest.cpp
new file mode 100644
index 0000000000..8df61e1fdd
--- /dev/null
+++ b/src/armnn/test/CsvReaderTest.cpp
@@ -0,0 +1,124 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// See LICENSE file in the project root for full license information.
+//
+#include "CsvReader.hpp"
+
+#include <boost/algorithm/string.hpp>
+#include <boost/test/unit_test.hpp>
+
+#include <iostream>
+#include <string>
+#include <boost/filesystem.hpp>
+
+using namespace armnnUtils;
+
+struct TestHelper {
+
+ TestHelper()
+ {
+ BOOST_TEST_MESSAGE("setup fixture");
+ }
+
+ ~TestHelper()
+ {
+ BOOST_TEST_MESSAGE("teardown fixture");
+ TearDown();
+ }
+
+ std::string CreateTempCsvFile()
+ {
+ std::string fileDir = boost::filesystem::temp_directory_path().c_str();
+ boost::filesystem::path p{fileDir + "/sampleFile.csv"};
+ try
+ {
+ boost::filesystem::ofstream ofs{p};
+ ofs << "airplane, bicycle , bird , \"m,o,n,k,e,y\"\n";
+ ofs << "banana, shoe, \"ice\"";
+ ofs.close();
+ } catch (std::exception &e)
+ {
+ std::cerr << "Unable to write to file at location [" << p.c_str() << "] : " << e.what() << std::endl;
+ BOOST_TEST(false);
+ }
+ return fileDir + "/sampleFile.csv";
+ }
+
+ int CheckStringsMatch(CsvRow &row, unsigned int index, std::string expectedValue)
+ {
+ return row.values.at(index).compare(expectedValue);
+ }
+
+ void TearDown()
+ {
+ RemoveCsvFile();
+ }
+
+ void RemoveCsvFile()
+ {
+ std::string fileDir = boost::filesystem::temp_directory_path().c_str();
+ std::string filePath = fileDir + "/sampleFile.csv";
+ try
+ {
+ boost::filesystem::remove(filePath);
+ }
+ catch (std::exception &e)
+ {
+ std::cerr << "Unable to delete file [" << filePath << "] : " << e.what() << std::endl;
+ BOOST_TEST(false);
+ }
+ }
+};
+
+BOOST_AUTO_TEST_SUITE(CsvReaderTest)
+
+BOOST_FIXTURE_TEST_CASE(TestParseVector, TestHelper)
+{
+ CsvReader reader;
+ std::vector<std::string> csvStrings;
+ csvStrings.reserve(2);
+ csvStrings.push_back("airplane, automobile , bird , \"c,a,t\"");
+ csvStrings.push_back("banana, shoe, \"ice\"");
+
+ std::vector<CsvRow> row = reader.ParseVector(csvStrings);
+ CsvRow row1 = row[0];
+ CsvRow row2 = row[1];
+
+ BOOST_CHECK(row.size() == 2);
+
+ BOOST_CHECK(row1.values.size() == 4);
+ BOOST_CHECK(CheckStringsMatch(row1, 0, "airplane") == 0);
+ BOOST_CHECK(CheckStringsMatch(row1, 1, "automobile") == 0);
+ BOOST_CHECK(CheckStringsMatch(row1, 2, "bird") == 0);
+ BOOST_CHECK(CheckStringsMatch(row1, 3, "c,a,t") == 0);
+
+ BOOST_CHECK(row2.values.size() == 3);
+ BOOST_CHECK(CheckStringsMatch(row2, 0, "banana") == 0);
+ BOOST_CHECK(CheckStringsMatch(row2, 1, "shoe") == 0);
+ BOOST_CHECK(CheckStringsMatch(row2, 2, "ice") == 0);
+}
+
+BOOST_FIXTURE_TEST_CASE(TestLoadingFileFromDisk, TestHelper)
+{
+ CsvReader reader;
+ std::string theFilePath = TestHelper::CreateTempCsvFile();
+
+ std::vector<CsvRow> row = reader.ParseFile(theFilePath);
+ CsvRow row1 = row[0];
+ CsvRow row2 = row[1];
+
+ BOOST_CHECK(row.size() == 2);
+
+ BOOST_CHECK(row1.values.size() == 4);
+ BOOST_CHECK(CheckStringsMatch(row1, 0, "airplane") == 0);
+ BOOST_CHECK(CheckStringsMatch(row1, 1, "bicycle") == 0);
+ BOOST_CHECK(CheckStringsMatch(row1, 2, "bird") == 0);
+ BOOST_CHECK(CheckStringsMatch(row1, 3, "m,o,n,k,e,y") == 0);
+
+ BOOST_CHECK(row2.values.size() == 3);
+ BOOST_CHECK(CheckStringsMatch(row2, 0, "banana") == 0);
+ BOOST_CHECK(CheckStringsMatch(row2, 1, "shoe") == 0);
+ BOOST_CHECK(CheckStringsMatch(row2, 2, "ice") == 0);
+}
+
+BOOST_AUTO_TEST_SUITE_END() \ No newline at end of file
diff --git a/src/armnn/test/EndToEndTest.cpp b/src/armnn/test/EndToEndTest.cpp
index 5ed84d22d0..4a8a0dfd81 100644
--- a/src/armnn/test/EndToEndTest.cpp
+++ b/src/armnn/test/EndToEndTest.cpp
@@ -11,6 +11,8 @@
#include "backends/test/QuantizeHelper.hpp"
#include <boost/core/ignore_unused.hpp>
+#include <set>
+
BOOST_AUTO_TEST_SUITE(EndToEnd)
namespace
@@ -47,9 +49,10 @@ BOOST_AUTO_TEST_CASE(Unsigned8)
using namespace armnn;
// Create runtime in which test will run
- armnn::IRuntimePtr runtime(armnn::IRuntime::Create(armnn::Compute::CpuRef));
+ armnn::IRuntime::CreationOptions options;
+ armnn::IRuntimePtr runtime(armnn::IRuntime::Create(options));
- // build up the structure of the network
+ // Builds up the structure of the network.
armnn::INetworkPtr net(INetwork::Create());
IConnectableLayer* input = net->AddInputLayer(0, "input");
@@ -59,7 +62,7 @@ BOOST_AUTO_TEST_CASE(Unsigned8)
input->GetOutputSlot(0).Connect(softmax->GetInputSlot(0));
softmax->GetOutputSlot(0).Connect(output->GetInputSlot(0));
- // set the tensors in the network
+ // Sets the tensors in the network.
TensorInfo inputTensorInfo(TensorShape({1, 5}), DataType::QuantisedAsymm8);
inputTensorInfo.SetQuantizationOffset(100);
inputTensorInfo.SetQuantizationScale(10000.0f);
@@ -71,17 +74,18 @@ BOOST_AUTO_TEST_CASE(Unsigned8)
softmax->GetOutputSlot(0).SetTensorInfo(outputTensorInfo);
// optimize the network
- IOptimizedNetworkPtr optNet = Optimize(*net, runtime->GetDeviceSpec());
+ std::vector<armnn::Compute> backends = {armnn::Compute::CpuRef};
+ IOptimizedNetworkPtr optNet = Optimize(*net, backends, runtime->GetDeviceSpec());
- // load it into the runtime
+ // Loads it into the runtime.
NetworkId netId;
auto error = runtime->LoadNetwork(netId, std::move(optNet));
BOOST_TEST(error == Status::Success);
- // create structures for input & output
+ // Creates structures for input & output.
std::vector<uint8_t> inputData
{
- 1, 10, 3, 200, 5 // some inputs - one of which is sufficiently larger than the others to saturate softmax
+ 1, 10, 3, 200, 5 // Some inputs - one of which is sufficiently larger than the others to saturate softmax.
};
std::vector<uint8_t> outputData(5);
@@ -94,19 +98,19 @@ BOOST_AUTO_TEST_CASE(Unsigned8)
{0, armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), outputData.data())}
};
- // do the inference
+ // Does the inference.
runtime->EnqueueWorkload(netId, inputTensors, outputTensors);
- // check the results
+ // Checks the results.
BOOST_TEST(outputData[0] == 0);
BOOST_TEST(outputData[1] == 0);
BOOST_TEST(outputData[2] == 0);
- BOOST_TEST(outputData[3] == 255); // softmax has been saturated
+ BOOST_TEST(outputData[3] == 255); // softmax has been saturated.
BOOST_TEST(outputData[4] == 0);
}
template <typename T>
-void ConstantUsageTest(armnn::Compute computeDevice,
+void ConstantUsageTest(const std::vector<armnn::Compute>& computeDevice,
const armnn::TensorInfo& commonTensorInfo,
const std::vector<T>& inputData,
const std::vector<T>& constantData,
@@ -115,9 +119,10 @@ void ConstantUsageTest(armnn::Compute computeDevice,
using namespace armnn;
// Create runtime in which test will run
- armnn::IRuntimePtr runtime(armnn::IRuntime::Create(computeDevice));
+ armnn::IRuntime::CreationOptions options;
+ armnn::IRuntimePtr runtime(armnn::IRuntime::Create(options));
- // build up the structure of the network
+ // Builds up the structure of the network.
INetworkPtr net(INetwork::Create());
IConnectableLayer* input = net->AddInputLayer(0);
@@ -129,19 +134,19 @@ void ConstantUsageTest(armnn::Compute computeDevice,
constant->GetOutputSlot(0).Connect(add->GetInputSlot(1));
add->GetOutputSlot(0).Connect(output->GetInputSlot(0));
- // set the tensors in the network
+ // Sets the tensors in the network.
input->GetOutputSlot(0).SetTensorInfo(commonTensorInfo);
constant->GetOutputSlot(0).SetTensorInfo(commonTensorInfo);
add->GetOutputSlot(0).SetTensorInfo(commonTensorInfo);
// optimize the network
- IOptimizedNetworkPtr optNet = Optimize(*net, runtime->GetDeviceSpec());
+ IOptimizedNetworkPtr optNet = Optimize(*net, computeDevice, runtime->GetDeviceSpec());
- // load it into the runtime
+ // Loads it into the runtime.
NetworkId netId;
runtime->LoadNetwork(netId, std::move(optNet));
- // create structures for input & output
+ // Creates structures for input & output.
std::vector<T> outputData(inputData.size());
InputTensors inputTensors
@@ -153,26 +158,26 @@ void ConstantUsageTest(armnn::Compute computeDevice,
{0, armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), outputData.data())}
};
- // do the inference
+ // Does the inference.
runtime->EnqueueWorkload(netId, inputTensors, outputTensors);
- // check the results
+ // Checks the results.
BOOST_TEST(outputData == expectedOutputData);
}
-static void ConstantUsageFloat32Test(armnn::Compute computeDevice)
+static void ConstantUsageFloat32Test(const std::vector<armnn::Compute>& computeDevice)
{
const armnn::TensorInfo commonTensorInfo({ 2, 3 }, armnn::DataType::Float32);
ConstantUsageTest(computeDevice,
commonTensorInfo,
- std::vector<float>{ 1.f, 2.f, 3.f, 4.f, 5.f, 6.f }, // input
- std::vector<float>{ 6.f, 5.f, 4.f, 3.f, 2.f, 1.f }, // const input
- std::vector<float>{ 7.f, 7.f, 7.f, 7.f, 7.f, 7.f } // expected output
+ std::vector<float>{ 1.f, 2.f, 3.f, 4.f, 5.f, 6.f }, // Input.
+ std::vector<float>{ 6.f, 5.f, 4.f, 3.f, 2.f, 1.f }, // Const input.
+ std::vector<float>{ 7.f, 7.f, 7.f, 7.f, 7.f, 7.f } // Expected output.
);
}
-static void ConstantUsageUint8Test(armnn::Compute computeDevice)
+static void ConstantUsageUint8Test(const std::vector<armnn::Compute>& computeDevice)
{
armnn::TensorInfo commonTensorInfo({ 2, 3 }, armnn::DataType::QuantisedAsymm8);
@@ -184,46 +189,49 @@ static void ConstantUsageUint8Test(armnn::Compute computeDevice)
ConstantUsageTest(computeDevice,
commonTensorInfo,
- QuantizedVector<uint8_t>(scale, offset, { 1.f, 2.f, 3.f, 4.f, 5.f, 6.f }), // input
- QuantizedVector<uint8_t>(scale, offset, { 6.f, 5.f, 4.f, 3.f, 2.f, 1.f }), // const input
- QuantizedVector<uint8_t>(scale, offset, { 7.f, 7.f, 7.f, 7.f, 7.f, 7.f }) // expected output
+ QuantizedVector<uint8_t>(scale, offset, { 1.f, 2.f, 3.f, 4.f, 5.f, 6.f }), // Input.
+ QuantizedVector<uint8_t>(scale, offset, { 6.f, 5.f, 4.f, 3.f, 2.f, 1.f }), // Const input.
+ QuantizedVector<uint8_t>(scale, offset, { 7.f, 7.f, 7.f, 7.f, 7.f, 7.f }) // Expected output.
);
}
BOOST_AUTO_TEST_CASE(ConstantUsage_Ref_Float32)
{
- ConstantUsageFloat32Test(armnn::Compute::CpuRef);
+ std::vector<armnn::Compute> backends = {armnn::Compute::CpuRef};
+ ConstantUsageFloat32Test(backends);
}
#if ARMCOMPUTENEON_ENABLED
BOOST_AUTO_TEST_CASE(ConstantUsage_Neon_Float32)
{
- ConstantUsageFloat32Test(armnn::Compute::CpuAcc);
+ ConstantUsageFloat32Test({armnn::Compute::CpuAcc});
}
#endif
#if ARMCOMPUTECL_ENABLED
BOOST_AUTO_TEST_CASE(ConstantUsage_Cl_Float32)
{
- ConstantUsageFloat32Test(armnn::Compute::GpuAcc);
+ ConstantUsageFloat32Test({armnn::Compute::GpuAcc});
}
#endif
BOOST_AUTO_TEST_CASE(ConstantUsage_Ref_Uint8)
{
- ConstantUsageUint8Test(armnn::Compute::CpuRef);
+ std::vector<armnn::Compute> backends = {armnn::Compute::CpuRef};
+ ConstantUsageUint8Test(backends);
}
BOOST_AUTO_TEST_CASE(TrivialAdd)
{
- // This test was designed to match "AddTwo" in android nn/runtime/test/TestTrivialModel.cpp
+ // This test was designed to match "AddTwo" in android nn/runtime/test/TestTrivialModel.cpp.
using namespace armnn;
// Create runtime in which test will run
- armnn::IRuntimePtr runtime(armnn::IRuntime::Create(armnn::Compute::CpuRef));
+ armnn::IRuntime::CreationOptions options;
+ armnn::IRuntimePtr runtime(armnn::IRuntime::Create(options));
- // build up the structure of the network
+ // Builds up the structure of the network.
armnn::INetworkPtr net(INetwork::Create());
IConnectableLayer* input1 = net->AddInputLayer(0);
@@ -235,20 +243,21 @@ BOOST_AUTO_TEST_CASE(TrivialAdd)
input2->GetOutputSlot(0).Connect(add->GetInputSlot(1));
add->GetOutputSlot(0).Connect(output->GetInputSlot(0));
- // set the tensors in the network
+ // Sets the tensors in the network.
TensorInfo tensorInfo(TensorShape({3, 4}), DataType::Float32);
input1->GetOutputSlot(0).SetTensorInfo(tensorInfo);
input2->GetOutputSlot(0).SetTensorInfo(tensorInfo);
add->GetOutputSlot(0).SetTensorInfo(tensorInfo);
// optimize the network
- IOptimizedNetworkPtr optNet = Optimize(*net, runtime->GetDeviceSpec());
+ std::vector<armnn::Compute> backends = {armnn::Compute::CpuRef};
+ IOptimizedNetworkPtr optNet = Optimize(*net, backends, runtime->GetDeviceSpec());
- // load it into the runtime
+ // Loads it into the runtime.
NetworkId netId;
runtime->LoadNetwork(netId, std::move(optNet));
- // create structures for input & output - matching android nn test
+ // Creates structures for input & output - matching android nn test.
std::vector<float> input1Data
{
1.f, 2.f, 3.f, 4.f, 5.f, 6.f, 7.f, 8.f, 9.f, 10.f, 11.f, 12.f
@@ -269,10 +278,10 @@ BOOST_AUTO_TEST_CASE(TrivialAdd)
{0,armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), outputData.data())}
};
- // do the inference
+ // Does the inference.
runtime->EnqueueWorkload(netId, inputTensors, outputTensors);
- // check the results
+ // Checks the results
BOOST_TEST(outputData[0] == 101);
BOOST_TEST(outputData[1] == 202);
BOOST_TEST(outputData[2] == 303);
@@ -292,9 +301,10 @@ BOOST_AUTO_TEST_CASE(MultipleOutputs)
using namespace armnn;
// Create runtime in which test will run
- armnn::IRuntimePtr runtime(armnn::IRuntime::Create(armnn::Compute::CpuRef));
+ armnn::IRuntime::CreationOptions options;
+ armnn::IRuntimePtr runtime(armnn::IRuntime::Create(options));
- // build up the structure of the network
+ // Builds up the structure of the network.
INetworkPtr net(INetwork::Create());
IConnectableLayer* input = net->AddInputLayer(0);
@@ -331,7 +341,7 @@ BOOST_AUTO_TEST_CASE(MultipleOutputs)
activation2->GetOutputSlot(0).Connect(output2->GetInputSlot(0));
activation3->GetOutputSlot(0).Connect(output3->GetInputSlot(0));
- // set the tensors in the network
+ // Sets the tensors in the network.
TensorInfo tensorInfo(TensorShape({ 10 }), DataType::Float32);
input->GetOutputSlot(0).SetTensorInfo(tensorInfo);
activation1->GetOutputSlot(0).SetTensorInfo(tensorInfo);
@@ -339,13 +349,14 @@ BOOST_AUTO_TEST_CASE(MultipleOutputs)
activation3->GetOutputSlot(0).SetTensorInfo(tensorInfo);
// optimize the network
- IOptimizedNetworkPtr optNet = Optimize(*net, runtime->GetDeviceSpec());
+ std::vector<armnn::Compute> backends = {armnn::Compute::CpuRef};
+ IOptimizedNetworkPtr optNet = Optimize(*net, backends, runtime->GetDeviceSpec());
- // load it into the runtime
+ // Loads it into the runtime.
NetworkId netId;
runtime->LoadNetwork(netId, std::move(optNet));
- // create structures for input & output
+ // Creates structures for input & output.
const std::vector<float> inputData{ 3.f, 5.f, 2.f, 3.f, 7.f, 0.f, -2.f, -1.f, 3.f, 3.f };
std::vector<float> output1Data(inputData.size());
@@ -363,32 +374,66 @@ BOOST_AUTO_TEST_CASE(MultipleOutputs)
{2,armnn::Tensor(runtime->GetOutputTensorInfo(netId, 2), output3Data.data())}
};
- // do the inference
+ // Does the inference.
runtime->EnqueueWorkload(netId, inputTensors, outputTensors);
- // check the results
+ // Checks the results.
BOOST_TEST(output1Data == std::vector<float>({ 1.f, 1.f, 1.f, 1.f, 1.f, 0.f, -1.f, -1.f, 1.f, 1.f })); // ReLu1
BOOST_TEST(output2Data == std::vector<float>({ 3.f, 5.f, 2.f, 3.f, 6.f, 0.f, 0.f, 0.f, 3.f, 3.f })); // ReLu6
BOOST_TEST(output3Data == std::vector<float>({ 3.f, 5.f, 2.f, 3.f, 5.f, 2.f, 2.f, 2.f, 3.f, 3.f })); // [2, 5]
}
#if ARMCOMPUTENEON_ENABLED
+BOOST_AUTO_TEST_CASE(FallbackToCpuRef)
+{
+ using namespace armnn;
+
+ // Create runtime in which test will run and allow fallback to CpuRef.
+ IRuntime::CreationOptions options;
+ IRuntimePtr runtime(IRuntime::Create(options));
+
+ // Builds up the structure of the network.
+ INetworkPtr net(INetwork::Create());
+
+ IConnectableLayer* input = net->AddInputLayer(0);
+
+ // This layer configuration isn't supported by CpuAcc but we allow fallback to CpuRef so it shoud pass.
+ NormalizationDescriptor descriptor;
+ IConnectableLayer* pooling = net->AddNormalizationLayer(descriptor);
+
+ IConnectableLayer* output = net->AddOutputLayer(0);
+
+ input->GetOutputSlot(0).Connect(pooling->GetInputSlot(0));
+ pooling->GetOutputSlot(0).Connect(output->GetInputSlot(0));
+
+ input->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 4, 4 }, DataType::Float32));
+ pooling->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 4, 4 }, DataType::Float32));
+
+ // optimize the network
+ std::vector<Compute> backends = {Compute::CpuAcc, Compute::CpuRef};
+ IOptimizedNetworkPtr optNet = Optimize(*net, backends, runtime->GetDeviceSpec());
+
+ // Load it into the runtime. It should pass.
+ NetworkId netId;
+ BOOST_TEST(runtime->LoadNetwork(netId, std::move(optNet)) == Status::Success);
+}
+#endif // ARMCOMPUTENEON_ENABLED
+
BOOST_AUTO_TEST_CASE(ErrorOnLoadNetwork)
{
using namespace armnn;
// Create runtime in which test will run
// Note we don't allow falling back to CpuRef if an operation (excluding inputs, outputs, etc.) isn't supported
- armnn::IRuntime::CreationOptions options(armnn::Compute::CpuAcc);
- options.m_UseCpuRefAsFallback = false;
- armnn::IRuntimePtr runtime(armnn::IRuntime::Create(options));
+ IRuntime::CreationOptions options;
+ IRuntimePtr runtime(IRuntime::Create(options));
// build up the structure of the network
INetworkPtr net(INetwork::Create());
IConnectableLayer* input = net->AddInputLayer(0);
- // This layer configuration isn't supported by CpuAcc and isn't allowed to fall back, so LoadNetwork will fail.
+ // This layer configuration isn't supported by CpuAcc and isn't allowed to fall back, so Optimize will return null.
NormalizationDescriptor descriptor;
IConnectableLayer* pooling = net->AddNormalizationLayer(descriptor);
@@ -401,12 +446,9 @@ BOOST_AUTO_TEST_CASE(ErrorOnLoadNetwork)
pooling->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 4, 4 }, DataType::Float32));
// optimize the network
- IOptimizedNetworkPtr optNet = Optimize(*net, runtime->GetDeviceSpec());
-
- // Load it into the runtime. It should fail.
- NetworkId netId;
- BOOST_TEST(runtime->LoadNetwork(netId, std::move(optNet)) == Status::Failure);
+ std::vector<Compute> backends = {Compute::CpuAcc};
+ IOptimizedNetworkPtr optNet = Optimize(*net, backends, runtime->GetDeviceSpec());
+ BOOST_CHECK(!optNet);
}
-#endif // ARMCOMPUTENEON_ENABLED
BOOST_AUTO_TEST_SUITE_END()
diff --git a/src/armnn/test/FP16SupportTest.cpp b/src/armnn/test/FP16SupportTest.cpp
new file mode 100644
index 0000000000..cc3b60369c
--- /dev/null
+++ b/src/armnn/test/FP16SupportTest.cpp
@@ -0,0 +1,114 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// See LICENSE file in the project root for full license information.
+//
+
+#include "armnn/ArmNN.hpp"
+#include "armnn/Descriptors.hpp"
+#include "Graph.hpp"
+#include "armnn/IRuntime.hpp"
+#include "armnn/INetwork.hpp"
+#include "Optimizer.hpp"
+#include "backends/CpuTensorHandle.hpp"
+#include "backends/test/QuantizeHelper.hpp"
+
+#include <boost/core/ignore_unused.hpp>
+#include <boost/test/unit_test.hpp>
+
+#include <Half.hpp>
+#include <set>
+
+using namespace armnn;
+
+BOOST_AUTO_TEST_SUITE(Fp16Support)
+
+BOOST_AUTO_TEST_CASE(Fp16DataTypeSupport)
+{
+ Graph graph;
+
+ Layer* const inputLayer1 = graph.AddLayer<InputLayer>(1, "input1");
+ Layer* const inputLayer2 = graph.AddLayer<InputLayer>(2, "input2");
+
+ Layer* const additionLayer = graph.AddLayer<AdditionLayer>("addition");
+ Layer* const outputLayer = graph.AddLayer<armnn::OutputLayer>(0, "output");
+
+ TensorInfo fp16TensorInfo({1, 2, 3, 5}, armnn::DataType::Float16);
+ inputLayer1->GetOutputSlot(0).Connect(additionLayer->GetInputSlot(0));
+ inputLayer2->GetOutputSlot(0).Connect(additionLayer->GetInputSlot(1));
+ additionLayer->GetOutputSlot(0).Connect(outputLayer->GetInputSlot(0));
+
+ inputLayer1->GetOutputSlot().SetTensorInfo(fp16TensorInfo);
+ inputLayer2->GetOutputSlot().SetTensorInfo(fp16TensorInfo);
+ additionLayer->GetOutputSlot().SetTensorInfo(fp16TensorInfo);
+
+ BOOST_CHECK(inputLayer1->GetOutputSlot(0).GetTensorInfo().GetDataType() == armnn::DataType::Float16);
+ BOOST_CHECK(inputLayer2->GetOutputSlot(0).GetTensorInfo().GetDataType() == armnn::DataType::Float16);
+ BOOST_CHECK(additionLayer->GetOutputSlot(0).GetTensorInfo().GetDataType() == armnn::DataType::Float16);
+
+}
+
+BOOST_AUTO_TEST_CASE(Fp16AdditionTest)
+{
+ using namespace half_float::literal;
+ // Create runtime in which test will run
+ IRuntime::CreationOptions options;
+ IRuntimePtr runtime(IRuntime::Create(options));
+
+ // Builds up the structure of the network.
+ INetworkPtr net(INetwork::Create());
+
+
+ IConnectableLayer* inputLayer1 = net->AddInputLayer(0);
+ IConnectableLayer* inputLayer2 = net->AddInputLayer(1);
+ IConnectableLayer* additionLayer = net->AddAdditionLayer();
+ IConnectableLayer* outputLayer = net->AddOutputLayer(0);
+
+ inputLayer1->GetOutputSlot(0).Connect(additionLayer->GetInputSlot(0));
+ inputLayer2->GetOutputSlot(0).Connect(additionLayer->GetInputSlot(1));
+ additionLayer->GetOutputSlot(0).Connect(outputLayer->GetInputSlot(0));
+
+ //change to float16
+ TensorInfo fp16TensorInfo(TensorShape({4}), DataType::Float16);
+ inputLayer1->GetOutputSlot(0).SetTensorInfo(fp16TensorInfo);
+ inputLayer2->GetOutputSlot(0).SetTensorInfo(fp16TensorInfo);
+ additionLayer->GetOutputSlot(0).SetTensorInfo(fp16TensorInfo);
+
+ // optimize the network
+ std::vector<Compute> backends = {Compute::GpuAcc};
+ IOptimizedNetworkPtr optNet = Optimize(*net, backends, runtime->GetDeviceSpec());
+
+ // Loads it into the runtime.
+
+ NetworkId netId;
+ runtime->LoadNetwork(netId, std::move(optNet));
+
+ std::vector<Half> input1Data
+ {
+ 1.0_h, 2.0_h, 3.0_h, 4.0_h
+ };
+
+ std::vector<Half> input2Data
+ {
+ 100.0_h, 200.0_h, 300.0_h, 400.0_h
+ };
+
+ InputTensors inputTensors
+ {
+ {0,ConstTensor(runtime->GetInputTensorInfo(netId, 0), input1Data.data())},
+ {1,ConstTensor(runtime->GetInputTensorInfo(netId, 0), input2Data.data())}
+ };
+
+ std::vector<Half> outputData(input1Data.size());
+ OutputTensors outputTensors
+ {
+ {0,Tensor(runtime->GetOutputTensorInfo(netId, 0), outputData.data())}
+ };
+
+ // Does the inference.
+ runtime->EnqueueWorkload(netId, inputTensors, outputTensors);
+
+ // Checks the results.
+ BOOST_TEST(outputData == std::vector<Half>({ 101.0_h, 202.0_h, 303.0_h, 404.0_h})); // Add
+}
+
+BOOST_AUTO_TEST_SUITE_END() \ No newline at end of file
diff --git a/src/armnn/test/FloatingPointConverterTest.cpp b/src/armnn/test/FloatingPointConverterTest.cpp
new file mode 100644
index 0000000000..d936e801ef
--- /dev/null
+++ b/src/armnn/test/FloatingPointConverterTest.cpp
@@ -0,0 +1,58 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// See LICENSE file in the project root for full license information.
+//
+
+#include "FloatingPointConverter.hpp"
+#include "Half.hpp"
+
+#include <malloc.h>
+#include <iostream>
+#include <algorithm>
+
+#include <boost/test/unit_test.hpp>
+
+BOOST_AUTO_TEST_SUITE(TestFPConversion)
+
+BOOST_AUTO_TEST_CASE(TestConvertFp32ToFp16)
+{
+ using namespace half_float::literal;
+
+ float floatArray[] = { 1.0f, 2.0f, 0.5f, 3.1f, 2.4f,
+ 5.666f, 6.444f, 7.1f, 432.121f, 12.22f };
+ size_t numFloats = sizeof(floatArray) / sizeof(floatArray[0]);
+ std::vector<armnn::Half> convertedBuffer(numFloats, 0.0_h);
+
+ armnnUtils::FloatingPointConverter::ConvertFloat32To16(floatArray, numFloats, convertedBuffer.data());
+
+ for (size_t i = 0; i < numFloats; i++)
+ {
+ armnn::Half expected(floatArray[i]);
+ armnn::Half actual = convertedBuffer[i];
+ BOOST_CHECK_EQUAL(expected, actual);
+
+ float convertedHalf = actual;
+ BOOST_CHECK_CLOSE(floatArray[i], convertedHalf, 0.07);
+ }
+}
+
+BOOST_AUTO_TEST_CASE(TestConvertFp16ToFp32)
+{
+ using namespace half_float::literal;
+
+ armnn::Half halfArray[] = { 1.0_h, 2.0_h, 0.5_h, 3.1_h, 2.4_h,
+ 5.666_h, 6.444_h, 7.1_h, 432.121_h, 12.22_h };
+ size_t numFloats = sizeof(halfArray) / sizeof(halfArray[0]);
+ std::vector<float> convertedBuffer(numFloats, 0.0f);
+
+ armnnUtils::FloatingPointConverter::ConvertFloat16To32(halfArray, numFloats, convertedBuffer.data());
+
+ for (size_t i = 0; i < numFloats; i++)
+ {
+ float expected(halfArray[i]);
+ float actual = convertedBuffer[i];
+ BOOST_CHECK_EQUAL(expected, actual);
+ }
+}
+
+BOOST_AUTO_TEST_SUITE_END()
diff --git a/src/armnn/test/GraphTests.cpp b/src/armnn/test/GraphTests.cpp
index 99789e4737..ccbcb8b00b 100644
--- a/src/armnn/test/GraphTests.cpp
+++ b/src/armnn/test/GraphTests.cpp
@@ -15,7 +15,7 @@
#include <boost/cast.hpp>
-/// checks that first comes before second in the order
+/// Checks that first comes before second in the order.
bool CheckOrder(const armnn::Graph& graph, const armnn::Layer* first, const armnn::Layer* second)
{
graph.Print();
@@ -69,7 +69,7 @@ BOOST_AUTO_TEST_CASE(TopologicalSort)
armnn::Layer* const layerE = GetFirstLayerWithName(graph, "layerE");
armnn::Layer* const layerD = GetFirstLayerWithName(graph, "layerD");
- // simple graph which branches and rejoins
+ // Simple graph which branches and rejoins.
// A
// / \'
// D E
@@ -92,7 +92,7 @@ BOOST_AUTO_TEST_CASE(TopologicalSort)
BOOST_TEST(CheckOrder(graph, layerB, layerC));
}
-BOOST_AUTO_TEST_CASE(InsertNewLayer)
+BOOST_AUTO_TEST_CASE(InsertNewLayerBefore)
{
armnn::Graph graph;
armnn::TensorInfo tensorInfo({ 1, 1, 1, 1 }, armnn::DataType::Float32);
@@ -128,7 +128,7 @@ BOOST_AUTO_TEST_CASE(InsertNewLayer)
layerC->GetOutputSlot(0).Connect(layerD->GetInputSlot(1));
layerD->GetOutputSlot(0).Connect(layerO->GetInputSlot(0));
- // check order is valid
+ // Checks order is valid.
BOOST_TEST(CheckOrder(graph, layerA, layerB));
BOOST_TEST(CheckOrder(graph, layerA, layerC));
BOOST_TEST(CheckOrder(graph, layerB, layerD));
@@ -147,7 +147,7 @@ BOOST_AUTO_TEST_CASE(InsertNewLayer)
armnn::Layer* const layerE = GetFirstLayerWithName(graph, "layerE");
- // check order is valid
+ // Checks order is valid.
BOOST_TEST(CheckOrder(graph, layerA, layerB));
BOOST_TEST(CheckOrder(graph, layerA, layerC));
BOOST_TEST(CheckOrder(graph, layerB, layerD));
@@ -169,7 +169,7 @@ BOOST_AUTO_TEST_CASE(InsertNewLayer)
armnn::Layer* const layerF = GetFirstLayerWithName(graph, "layerF");
- // check order is valid
+ // Checks order is valid.
BOOST_TEST(CheckOrder(graph, layerA, layerB));
BOOST_TEST(CheckOrder(graph, layerA, layerF));
BOOST_TEST(CheckOrder(graph, layerF, layerC));
@@ -178,6 +178,93 @@ BOOST_AUTO_TEST_CASE(InsertNewLayer)
BOOST_TEST(CheckOrder(graph, layerE, layerD));
}
+BOOST_AUTO_TEST_CASE(InsertNewLayerAfter)
+{
+ armnn::Graph graph;
+ armnn::TensorInfo tensorInfo({ 1, 1, 1, 1 }, armnn::DataType::Float32);
+
+ std::vector<armnn::Layer*> order;
+
+ armnn::ActivationDescriptor activationDefaults;
+ BOOST_CHECK_NO_THROW(graph.AddLayer<armnn::InputLayer>(0, "layerA"));
+ BOOST_CHECK_NO_THROW(graph.AddLayer<armnn::ActivationLayer>(activationDefaults, "layerB"));
+ BOOST_CHECK_NO_THROW(graph.AddLayer<armnn::ActivationLayer>(activationDefaults, "layerC"));
+ BOOST_CHECK_NO_THROW(graph.AddLayer<armnn::AdditionLayer>("layerD"));
+ BOOST_CHECK_NO_THROW(graph.AddLayer<armnn::OutputLayer>(0, "output"));
+
+ armnn::Layer* const layerA = GetFirstLayerWithName(graph, "layerA");
+ armnn::Layer* const layerB = GetFirstLayerWithName(graph, "layerB");
+ armnn::Layer* const layerC = GetFirstLayerWithName(graph, "layerC");
+ armnn::Layer* const layerD = GetFirstLayerWithName(graph, "layerD");
+ armnn::Layer* const layerO = GetFirstLayerWithName(graph, "output");
+
+ // A
+ // / \'
+ // B C
+ // \ /
+ // D
+ layerA->GetOutputSlot(0).SetTensorInfo(tensorInfo);
+ layerB->GetOutputSlot(0).SetTensorInfo(tensorInfo);
+ layerC->GetOutputSlot(0).SetTensorInfo(tensorInfo);
+ layerD->GetOutputSlot(0).SetTensorInfo(tensorInfo);
+
+ layerA->GetOutputSlot(0).Connect(layerB->GetInputSlot(0));
+ layerA->GetOutputSlot(0).Connect(layerC->GetInputSlot(0));
+ layerB->GetOutputSlot(0).Connect(layerD->GetInputSlot(0));
+ layerC->GetOutputSlot(0).Connect(layerD->GetInputSlot(1));
+ layerD->GetOutputSlot(0).Connect(layerO->GetInputSlot(0));
+
+ // Checks order is valid.
+ BOOST_TEST(CheckOrder(graph, layerA, layerB));
+ BOOST_TEST(CheckOrder(graph, layerA, layerC));
+ BOOST_TEST(CheckOrder(graph, layerB, layerD));
+ BOOST_TEST(CheckOrder(graph, layerC, layerD));
+
+ // A
+ // / \'
+ // B C
+ // \ |
+ // \ E
+ // \|
+ // D
+ BOOST_CHECK_NO_THROW(graph.InsertNewLayer<armnn::ActivationLayer>(layerC->GetOutputSlot(),
+ activationDefaults,
+ "layerE"));
+
+ armnn::Layer* const layerE = GetFirstLayerWithName(graph, "layerE");
+
+ // Checks order is valid.
+ BOOST_TEST(CheckOrder(graph, layerA, layerB));
+ BOOST_TEST(CheckOrder(graph, layerA, layerC));
+ BOOST_TEST(CheckOrder(graph, layerB, layerD));
+ BOOST_TEST(CheckOrder(graph, layerC, layerE));
+ BOOST_TEST(CheckOrder(graph, layerE, layerD));
+
+
+ // A
+ // |
+ // F
+ // / \'
+ // B C
+ // \ |
+ // \ E
+ // \ /
+ // D
+ BOOST_CHECK_NO_THROW(graph.InsertNewLayer<armnn::ActivationLayer>(layerA->GetOutputSlot(),
+ activationDefaults,
+ "layerF"));
+
+ armnn::Layer* const layerF = GetFirstLayerWithName(graph, "layerF");
+
+ // Checks order is valid.
+ BOOST_TEST(CheckOrder(graph, layerA, layerF));
+ BOOST_TEST(CheckOrder(graph, layerF, layerB));
+ BOOST_TEST(CheckOrder(graph, layerF, layerC));
+ BOOST_TEST(CheckOrder(graph, layerB, layerD));
+ BOOST_TEST(CheckOrder(graph, layerC, layerE));
+ BOOST_TEST(CheckOrder(graph, layerE, layerD));
+}
+
namespace
{
using Edge = std::pair<const armnn::Layer*, const armnn::Layer*>;
@@ -210,7 +297,7 @@ static void TestGraphAfterAddingCopyLayers(const armnn::Graph& graph, const armn
std::vector<Edge> origEdges = GetEdgeList(origGraph);
std::vector<Edge> newEdges = GetEdgeList(graph);
- // Adding copy layers should not produce any duplicate edges
+ // Adding copy layers should not produce any duplicate edges.
{
std::vector<Edge> sortedNewEdges = newEdges;
std::sort(sortedNewEdges.begin(), sortedNewEdges.end());
@@ -219,7 +306,7 @@ static void TestGraphAfterAddingCopyLayers(const armnn::Graph& graph, const armn
BOOST_CHECK_MESSAGE(last == sortedNewEdges.end(), "New graph contains duplicate edges!");
}
- // Each new edge must be tested
+ // Each new edge must be tested.
while (!newEdges.empty())
{
const Edge edge = std::move(newEdges.back());
@@ -251,7 +338,7 @@ static void TestGraphAfterAddingCopyLayers(const armnn::Graph& graph, const armn
BOOST_TEST((srcLayer->GetComputeDevice() == dstLayer->GetComputeDevice()));
}
- // Mark edge in original graph as observed (by deleting it)
+ // Marks edge in original graph as observed (by deleting it).
origEdges.erase(origEdges.begin() + originalEdge);
}
else
@@ -288,7 +375,7 @@ static void TestGraphAfterAddingCopyLayers(const armnn::Graph& graph, const armn
const armnn::Layer* copyLayer = srcLayerInOrigGraph ? edge.second : edge.first;
const armnn::Layer* nonCopyLayer = srcLayerInOrigGraph ? srcLayer : dstLayer;
- // Find all edges connecting the copy layer to other layers
+ // Finds all edges connecting the copy layer to other layers.
std::vector<Edge> adjEdges;
auto it = newEdges.begin();
while (it != newEdges.end())
@@ -298,7 +385,7 @@ static void TestGraphAfterAddingCopyLayers(const armnn::Graph& graph, const armn
{
adjEdges.push_back(newEdge);
- // Since the adjacent edge is immediately tested below, no need to consider it afterwards
+ // Since the adjacent edge is immediately tested below, there is no need to consider it afterwards.
it = newEdges.erase(it);
}
else
@@ -315,10 +402,10 @@ static void TestGraphAfterAddingCopyLayers(const armnn::Graph& graph, const armn
continue;
}
- // Test adjacent edges now
+ // Tests adjacent edges now.
for (const Edge& adjEdge : adjEdges)
{
- // The adjacent edge must connect the copy layer to another layer
+ // The adjacent edge must connect the copy layer to another layer.
const armnn::Layer* adjLayer = srcLayerInOrigGraph ? adjEdge.second : adjEdge.first;
if (!adjLayer)
@@ -329,10 +416,10 @@ static void TestGraphAfterAddingCopyLayers(const armnn::Graph& graph, const armn
continue;
}
- // Both layers must have different compute devices
+ // Both layers must have different compute devices.
BOOST_TEST((nonCopyLayer->GetComputeDevice() != adjLayer->GetComputeDevice()));
- // There must exist an edge connecting both layers directly in the original graph
+ // There must exist an edge connecting both layers directly in the original graph.
{
const armnn::Layer* origEdgeN1 = srcLayerInOrigGraph ? nonCopyLayer : adjLayer;
const armnn::Layer* origEdgeN2 = srcLayerInOrigGraph ? adjLayer : nonCopyLayer;
@@ -434,7 +521,7 @@ BOOST_FIXTURE_TEST_CASE(AddCopyLayersSeveralTimes, CopyLayersFixture)
{
m_Graph.AddCopyLayers();
- // Calling AddCopyLayers() several times should not change the connections
+ // Calling AddCopyLayers() several times should not change the connections.
const std::vector<Edge> edges = GetEdgeList(m_Graph);
for (int i = 0; i < 4; ++i)
{
diff --git a/src/armnn/test/InstrumentTests.cpp b/src/armnn/test/InstrumentTests.cpp
new file mode 100644
index 0000000000..a219b39b0d
--- /dev/null
+++ b/src/armnn/test/InstrumentTests.cpp
@@ -0,0 +1,62 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// See LICENSE file in the project root for full license information.
+//
+#include <boost/test/unit_test.hpp>
+
+#include "WallClockTimer.hpp"
+
+#include <chrono>
+#include <thread>
+
+using namespace armnn;
+
+BOOST_AUTO_TEST_SUITE(Instruments)
+
+BOOST_AUTO_TEST_CASE(WallClockTimerInMilliseconds)
+{
+ WallClockTimer wallClockTimer;
+
+ BOOST_CHECK_EQUAL(wallClockTimer.GetName(), "WallClockTimer");
+
+ // start the timer
+ wallClockTimer.Start();
+
+ // wait for 10 milliseconds
+ std::this_thread::sleep_for(std::chrono::milliseconds(10));
+
+ // stop the timer
+ wallClockTimer.Stop();
+
+ BOOST_CHECK_EQUAL(wallClockTimer.GetMeasurements().front().m_Name, WallClockTimer::WALL_CLOCK_TIME);
+
+ // check that WallClockTimer measurement should be >= 10 milliseconds
+ BOOST_CHECK_GE(wallClockTimer.GetMeasurements().front().m_Value, std::chrono::milliseconds(10).count());
+}
+
+BOOST_AUTO_TEST_CASE(WallClockTimerInNanoseconds)
+{
+ WallClockTimer wallClockTimer;
+
+ BOOST_CHECK_EQUAL(wallClockTimer.GetName(), "WallClockTimer");
+
+ // start the timer
+ wallClockTimer.Start();
+
+ // wait for 500 nanoseconds - 0.0005 milliseconds
+ std::this_thread::sleep_for(std::chrono::nanoseconds(500));
+
+ // stop the timer
+ wallClockTimer.Stop();
+
+ BOOST_CHECK_EQUAL(wallClockTimer.GetMeasurements().front().m_Name, WallClockTimer::WALL_CLOCK_TIME);
+
+ // delta is 0.0005 milliseconds
+ const auto delta =
+ std::chrono::duration_cast<std::chrono::duration<double, std::milli>>(std::chrono::nanoseconds(500));
+
+ // check that WallClockTimer measurement should be >= 0.0005 milliseconds
+ BOOST_CHECK_GE(wallClockTimer.GetMeasurements().front().m_Value, delta.count());
+}
+
+BOOST_AUTO_TEST_SUITE_END()
diff --git a/src/armnn/test/JsonPrinterTests.cpp b/src/armnn/test/JsonPrinterTests.cpp
new file mode 100644
index 0000000000..28cbfd61a5
--- /dev/null
+++ b/src/armnn/test/JsonPrinterTests.cpp
@@ -0,0 +1,378 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// See LICENSE file in the project root for full license information.
+//
+#include <boost/test/unit_test.hpp>
+#include <boost/algorithm/string.hpp>
+#include <boost/lexical_cast.hpp>
+#include <stack>
+#include <string>
+#include <vector>
+#include <sstream>
+
+#include "Profiling.hpp"
+#include "armnn/Descriptors.hpp"
+#include "armnn/IRuntime.hpp"
+#include "armnn/INetwork.hpp"
+#include "backends/test/ClContextControlFixture.hpp"
+#include "backends/ClWorkloadFactory.hpp"
+
+BOOST_FIXTURE_TEST_SUITE(JsonPrinterTests, ClProfilingContextControlFixture)
+
+bool AreMatchingPair(const char opening, const char closing)
+{
+ return (opening == '{' && closing == '}') || (opening == '[' && closing == ']');
+}
+
+bool AreParenthesesMatching(const std::string& exp)
+{
+ std::stack<char> expStack;
+ for (size_t i = 0; i < exp.length(); ++i)
+ {
+ if (exp[i] == '{' || exp[i] == '[')
+ {
+ expStack.push(exp[i]);
+ }
+ else if (exp[i] == '}' || exp[i] == ']')
+ {
+ if (expStack.empty() || !AreMatchingPair(expStack.top(), exp[i]))
+ {
+ return false;
+ }
+ else
+ {
+ expStack.pop();
+ }
+ }
+ }
+ return expStack.empty();
+}
+
+std::vector<double> ExtractMeasurements(const std::string& exp)
+{
+ std::vector<double> numbers;
+ bool inArray = false;
+ std::string numberString;
+ for (size_t i = 0; i < exp.size(); ++i)
+ {
+ if (exp[i] == '[')
+ {
+ inArray = true;
+ }
+ else if (exp[i] == ']' && inArray)
+ {
+ try
+ {
+ boost::trim_if(numberString, boost::is_any_of("\t,\n"));
+ numbers.push_back(std::stod(numberString));
+ }
+ catch (std::invalid_argument const& e)
+ {
+ BOOST_FAIL("Could not convert measurements to double: " + numberString);
+ }
+
+ numberString.clear();
+ inArray = false;
+ }
+ else if (exp[i] == ',' && inArray)
+ {
+ try
+ {
+ boost::trim_if(numberString, boost::is_any_of("\t,\n"));
+ numbers.push_back(std::stod(numberString));
+ }
+ catch (std::invalid_argument const& e)
+ {
+ BOOST_FAIL("Could not convert measurements to double: " + numberString);
+ }
+ numberString.clear();
+ }
+ else if (exp[i] != '[' && inArray && exp[i] != ',' && exp[i] != ' ')
+ {
+ numberString += exp[i];
+ }
+ }
+ return numbers;
+}
+
+std::vector<std::string> ExtractSections(const std::string& exp)
+{
+ std::vector<std::string> sections;
+
+ std::stack<size_t> s;
+ for (size_t i = 0; i < exp.size(); i++)
+ {
+ if (exp.at(i) == '{')
+ {
+ s.push(i);
+ }
+ else if (exp.at(i) == '}')
+ {
+ size_t from = s.top();
+ s.pop();
+ sections.push_back(exp.substr(from, i - from + 1));
+ }
+ }
+
+ return sections;
+}
+
+std::string SoftmaxProfilerTestSetupHelper(const std::vector<armnn::Compute>& backends)
+{
+ using namespace armnn;
+
+ BOOST_CHECK(!backends.empty());
+
+ ProfilerManager& profilerManager = armnn::ProfilerManager::GetInstance();
+
+ // Create runtime in which test will run
+ IRuntime::CreationOptions options;
+ options.m_EnableGpuProfiling = backends.front() == armnn::Compute::GpuAcc;
+ IRuntimePtr runtime(IRuntime::Create(options));
+
+ // build up the structure of the network
+ INetworkPtr net(INetwork::Create());
+
+ IConnectableLayer* input = net->AddInputLayer(0, "input");
+ IConnectableLayer* softmax = net->AddSoftmaxLayer(SoftmaxDescriptor(), "softmax");
+ IConnectableLayer* output = net->AddOutputLayer(0, "output");
+
+ input->GetOutputSlot(0).Connect(softmax->GetInputSlot(0));
+ softmax->GetOutputSlot(0).Connect(output->GetInputSlot(0));
+
+ // set the tensors in the network
+ TensorInfo inputTensorInfo(TensorShape({1, 5}), DataType::QuantisedAsymm8);
+ inputTensorInfo.SetQuantizationOffset(100);
+ inputTensorInfo.SetQuantizationScale(10000.0f);
+ input->GetOutputSlot(0).SetTensorInfo(inputTensorInfo);
+
+ TensorInfo outputTensorInfo(TensorShape({1, 5}), DataType::QuantisedAsymm8);
+ outputTensorInfo.SetQuantizationOffset(0);
+ outputTensorInfo.SetQuantizationScale(1.0f / 256.0f);
+ softmax->GetOutputSlot(0).SetTensorInfo(outputTensorInfo);
+
+ // optimize the network
+ IOptimizedNetworkPtr optNet = Optimize(*net, backends, runtime->GetDeviceSpec());
+ if(!optNet)
+ {
+ BOOST_FAIL("Error occurred during Optimization, Optimize() returned nullptr.");
+ }
+ // load it into the runtime
+ NetworkId netId;
+ auto error = runtime->LoadNetwork(netId, std::move(optNet));
+ BOOST_TEST(error == Status::Success);
+
+ // create structures for input & output
+ std::vector<uint8_t> inputData
+ {
+ 1, 10, 3, 200, 5
+ // one of inputs is sufficiently larger than the others to saturate softmax
+ };
+ std::vector<uint8_t> outputData(5);
+
+ armnn::InputTensors inputTensors
+ {
+ {0, armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 0), inputData.data())}
+ };
+ armnn::OutputTensors outputTensors
+ {
+ {0, armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), outputData.data())}
+ };
+
+ runtime->GetProfiler(netId)->EnableProfiling(true);
+
+ // do the inferences
+ runtime->EnqueueWorkload(netId, inputTensors, outputTensors);
+ runtime->EnqueueWorkload(netId, inputTensors, outputTensors);
+ runtime->EnqueueWorkload(netId, inputTensors, outputTensors);
+
+ // retrieve the Profiler.Print() output
+ std::stringstream ss;
+ profilerManager.GetProfiler()->Print(ss);
+
+ return ss.str();
+}
+
+void SoftmaxProfilerTestValidationHelper(std::string& result, const std::string& testData)
+{
+ // ensure all measurements are greater than zero
+ std::vector<double> measurementsVector = ExtractMeasurements(result);
+ BOOST_CHECK(!measurementsVector.empty());
+
+ // check sections contain raw and unit tags
+ // first ensure Parenthesis are balanced
+ if (AreParenthesesMatching(result))
+ {
+ // remove parent sections that will not have raw or unit tag
+ std::vector<std::string> sectionVector = ExtractSections(result);
+ for (size_t i = 0; i < sectionVector.size(); ++i)
+ {
+ if (boost::contains(sectionVector[i], "\"ArmNN\":")
+ || boost::contains(sectionVector[i], "\"inference_measurements\":"))
+ {
+ sectionVector.erase(sectionVector.begin() + static_cast<int>(i));
+ }
+ }
+ BOOST_CHECK(!sectionVector.empty());
+
+ BOOST_CHECK(std::all_of(sectionVector.begin(), sectionVector.end(),
+ [](std::string i) { return boost::contains(i, "\"raw\":"); }));
+
+ BOOST_CHECK(std::all_of(sectionVector.begin(), sectionVector.end(),
+ [](std::string i) { return boost::contains(i, "\"unit\":"); }));
+ }
+
+ // remove the time measurements as they vary from test to test
+ result.erase(std::remove_if (result.begin(),result.end(),
+ [](char c) { return c == '.'; }), result.end());
+ result.erase(std::remove_if (result.begin(), result.end(), &isdigit), result.end());
+ result.erase(std::remove_if (result.begin(),result.end(),
+ [](char c) { return c == '\t'; }), result.end());
+
+ BOOST_CHECK(boost::contains(result, "ArmNN"));
+ BOOST_CHECK(boost::contains(result, "inference_measurements"));
+ BOOST_CHECK(boost::contains(result, "layer_measurements"));
+ BOOST_CHECK_EQUAL(result, testData);
+
+ // ensure no spare parenthesis present in print output
+ BOOST_CHECK(AreParenthesesMatching(result));
+}
+
+void SetupSoftmaxProfilerWithSpecifiedBackendsAndValidateJSONPrinterResult(
+ const std::vector<armnn::Compute>& backends)
+{
+ // setup the test fixture and obtain JSON Printer result
+ std::string result = SoftmaxProfilerTestSetupHelper(backends);
+
+ std::string backend = "Ref";
+ std::string changeLine31 = "\n},\n\"CopyMemGeneric_Execute\": {";
+ std::string changeLine39 = "ms\"";
+ std::string changeLine40;
+ std::string changeLine45;
+
+ switch(backends[0]) {
+ case armnn::Compute::GpuAcc: backend = "Cl";
+ changeLine31 = ",\n\"OpenClKernelTimer/: softmax_layer_max_shift_exp_sum_quantized_serial GWS[,,]\": {";
+ changeLine39 = R"(us"
+},
+"OpenClKernelTimer/: softmax_layer_norm_quantized GWS[,,]": {
+"raw": [
+,
+,
+
+],
+"unit": "us")";
+
+ changeLine40 = R"(
+},
+"CopyMemGeneric_Execute": {
+"raw": [
+,
+,
+
+],
+"unit": "ms")";
+ changeLine45 = "}\n";
+ break;
+ case armnn::Compute::CpuAcc: backend = "Neon";
+ changeLine31 = ",\n\"NeonKernelTimer/: NEFillBorderKernel\": {";
+ changeLine39 = R"(ms"
+},
+"NeonKernelTimer/: NELogitsDMaxKernel": {
+"raw": [
+,
+,
+
+],
+"unit": "ms"
+},
+"NeonKernelTimer/: NELogitsDSoftmaxKernel": {
+"raw": [
+,
+,
+
+],
+"unit": "ms")";
+ changeLine40 = R"(
+},
+"CopyMemGeneric_Execute": {
+"raw": [
+,
+,
+
+],
+"unit": "ms")";
+ changeLine45 = "}\n";
+ break;
+ default:
+ break;
+ }
+ std::string testData = R"({
+"ArmNN": {
+"inference_measurements": {
+"raw": [
+,
+,
+
+],
+"unit": "ms",
+"layer_measurements": {
+"raw": [
+,
+,
+
+],
+"unit": "ms",
+"CopyMemGeneric_Execute": {
+"raw": [
+,
+,
+
+],
+"unit": "ms"
+},
+")" + backend + R"(SoftmaxUintWorkload_Execute": {
+"raw": [
+,
+,
+
+],
+"unit": "ms")" + changeLine31 + R"(
+"raw": [
+,
+,
+
+],
+"unit": ")" + changeLine39 + R"(
+})" + changeLine40 + R"(
+}
+}
+}
+}
+)" + changeLine45 + R"()";
+
+ // validate the JSON Printer result
+ SoftmaxProfilerTestValidationHelper(result, testData);
+}
+
+BOOST_AUTO_TEST_CASE(SoftmaxProfilerJSONPrinterCpuRefTest)
+{
+ SetupSoftmaxProfilerWithSpecifiedBackendsAndValidateJSONPrinterResult({armnn::Compute::CpuRef});
+}
+
+
+#if ARMCOMPUTENEON_ENABLED
+BOOST_AUTO_TEST_CASE(SoftmaxProfilerJSONPrinterCpuAccTest)
+{
+ SetupSoftmaxProfilerWithSpecifiedBackendsAndValidateJSONPrinterResult({armnn::Compute::CpuAcc});
+}
+#endif
+
+#if ARMCOMPUTECL_ENABLED
+BOOST_AUTO_TEST_CASE(SoftmaxProfilerJSONPrinterGpuAccTest)
+{
+ SetupSoftmaxProfilerWithSpecifiedBackendsAndValidateJSONPrinterResult({armnn::Compute::GpuAcc});
+}
+#endif
+
+BOOST_AUTO_TEST_SUITE_END()
diff --git a/src/armnn/test/NeonTimerTest.cpp b/src/armnn/test/NeonTimerTest.cpp
new file mode 100644
index 0000000000..4502756e07
--- /dev/null
+++ b/src/armnn/test/NeonTimerTest.cpp
@@ -0,0 +1,104 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// See LICENSE file in the project root for full license information.
+//
+
+#include "NeonTimer.hpp"
+#include "TensorHelpers.hpp"
+
+#include "armnn/ArmNN.hpp"
+#include "armnn/Tensor.hpp"
+#include "armnn/TypesUtils.hpp"
+#include "backends/CpuTensorHandle.hpp"
+#include "backends/NeonWorkloadFactory.hpp"
+#include "backends/WorkloadInfo.hpp"
+#include "backends/WorkloadFactory.hpp"
+#include "backends/test/LayerTests.hpp"
+#include "backends/test/TensorCopyUtils.hpp"
+#include "backends/test/WorkloadTestUtils.hpp"
+
+#include <boost/test/unit_test.hpp>
+#include <cstdlib>
+#include <algorithm>
+
+using namespace armnn;
+
+BOOST_AUTO_TEST_SUITE(NeonTimerInstrument)
+
+
+BOOST_AUTO_TEST_CASE(NeonTimerGetName)
+{
+ NeonTimer neonTimer;
+ BOOST_CHECK_EQUAL(neonTimer.GetName(), "NeonKernelTimer");
+}
+
+BOOST_AUTO_TEST_CASE(NeonTimerMeasure)
+{
+ NeonWorkloadFactory workloadFactory;
+
+ unsigned int inputWidth = 4000u;
+ unsigned int inputHeight = 5000u;
+ unsigned int inputChannels = 1u;
+ unsigned int inputBatchSize = 1u;
+
+ float upperBound = 1.0f;
+ float lowerBound = -1.0f;
+
+ size_t inputSize = inputWidth * inputHeight * inputChannels * inputBatchSize;
+ std::vector<float> inputData(inputSize, 0.f);
+ std::generate(inputData.begin(), inputData.end(), [](){
+ return (static_cast<float>(rand()) / static_cast<float>(RAND_MAX / 3)) + 1.f; });
+
+ unsigned int outputWidth = inputWidth;
+ unsigned int outputHeight = inputHeight;
+ unsigned int outputChannels = inputChannels;
+ unsigned int outputBatchSize = inputBatchSize;
+
+ armnn::TensorInfo inputTensorInfo({ inputBatchSize, inputChannels, inputHeight, inputWidth },
+ armnn::GetDataType<float>());
+
+ armnn::TensorInfo outputTensorInfo({ outputBatchSize, outputChannels, outputHeight, outputWidth },
+ armnn::GetDataType<float>());
+
+ LayerTestResult<float, 4> result(inputTensorInfo);
+
+ auto input = MakeTensor<float, 4>(inputTensorInfo, inputData);
+
+ std::unique_ptr<armnn::ITensorHandle> inputHandle = workloadFactory.CreateTensorHandle(inputTensorInfo);
+ std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo);
+
+ // Setup bounded ReLu
+ armnn::ActivationQueueDescriptor descriptor;
+ armnn::WorkloadInfo workloadInfo;
+ AddInputToWorkload(descriptor, workloadInfo, inputTensorInfo, inputHandle.get());
+ AddOutputToWorkload(descriptor, workloadInfo, outputTensorInfo, outputHandle.get());
+
+ descriptor.m_Parameters.m_Function = armnn::ActivationFunction::BoundedReLu;
+ descriptor.m_Parameters.m_A = upperBound;
+ descriptor.m_Parameters.m_B = lowerBound;
+
+ std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateActivation(descriptor, workloadInfo);
+
+ inputHandle->Allocate();
+ outputHandle->Allocate();
+
+ CopyDataToITensorHandle(inputHandle.get(), &input[0][0][0][0]);
+
+ NeonTimer neonTimer;
+ // Start the timer.
+ neonTimer.Start();
+ // Execute the workload.
+ workload->Execute();
+ // Stop the timer.
+ neonTimer.Stop();
+
+ std::vector<Measurement> measurements = neonTimer.GetMeasurements();
+
+ BOOST_CHECK_EQUAL(measurements.size(), 2);
+ BOOST_CHECK_EQUAL(measurements[0].m_Name, "NeonKernelTimer/0: NEFillBorderKernel");
+ BOOST_CHECK(measurements[0].m_Value > 0.0);
+ BOOST_CHECK_EQUAL(measurements[1].m_Name, "NeonKernelTimer/1: NEActivationLayerKernel");
+ BOOST_CHECK(measurements[1].m_Value > 0.0);
+}
+
+BOOST_AUTO_TEST_SUITE_END()
diff --git a/src/armnn/test/NetworkTests.cpp b/src/armnn/test/NetworkTests.cpp
new file mode 100644
index 0000000000..66fa327221
--- /dev/null
+++ b/src/armnn/test/NetworkTests.cpp
@@ -0,0 +1,968 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// See LICENSE file in the project root for full license information.
+//
+#include <boost/test/unit_test.hpp>
+
+#include "armnn/ArmNN.hpp"
+#include "Network.hpp"
+#include "Graph.hpp"
+#include "backends/RefWorkloadFactory.hpp"
+#include "backends/ClWorkloadFactory.hpp"
+#include "backends/NeonWorkloadFactory.hpp"
+
+#include "GraphUtils.hpp"
+
+namespace
+{
+
+bool AreAllLayerInputSlotsConnected(const armnn::IConnectableLayer& layer)
+{
+ bool allConnected = true;
+ for (unsigned int i = 0; i < layer.GetNumInputSlots(); ++i)
+ {
+ const bool inputConnected = layer.GetInputSlot(i).GetConnection() != nullptr;
+ allConnected &= inputConnected;
+ }
+ return allConnected;
+}
+
+}
+
+BOOST_AUTO_TEST_SUITE(Network)
+
+BOOST_AUTO_TEST_CASE(LayerGuids)
+{
+ armnn::Network net;
+ armnn::LayerGuid inputId = net.AddInputLayer(0)->GetGuid();
+ armnn::LayerGuid addId = net.AddAdditionLayer()->GetGuid();
+ armnn::LayerGuid outputId = net.AddOutputLayer(0)->GetGuid();
+
+ BOOST_TEST(inputId != addId);
+ BOOST_TEST(addId != outputId);
+ BOOST_TEST(inputId != outputId);
+}
+
+BOOST_AUTO_TEST_CASE(SerializeToDot)
+{
+ armnn::Network net;
+
+ //Defines layers.
+ auto input = net.AddInputLayer(0);
+ auto add = net.AddAdditionLayer();
+ auto output = net.AddOutputLayer(0);
+
+ // Connects layers.
+ input->GetOutputSlot(0).Connect(add->GetInputSlot(0));
+ input->GetOutputSlot(0).Connect(add->GetInputSlot(1));
+ add->GetOutputSlot(0).Connect(output->GetInputSlot(0));
+
+ armnn::TensorShape shape({4});
+ armnn::TensorInfo info(shape, armnn::DataType::Float32);
+ input->GetOutputSlot(0).SetTensorInfo(info);
+ add->GetOutputSlot(0).SetTensorInfo(info);
+
+ armnn::IRuntime::CreationOptions options;
+ armnn::IRuntimePtr runtime(armnn::IRuntime::Create(options));
+
+ std::vector<armnn::Compute> backends = {armnn::Compute::CpuRef};
+ armnn::IOptimizedNetworkPtr optimizedNet = armnn::Optimize(net, backends, runtime->GetDeviceSpec());
+
+ std::ostringstream ss;
+ optimizedNet->SerializeToDot(ss);
+
+ auto inputId = input->GetGuid();
+ auto addId = add->GetGuid();
+ auto outputId = output->GetGuid();
+
+ std::stringstream expected;
+ expected <<
+ "digraph Optimized {\n"
+ " node [shape=\"record\"];\n"
+ " edge [fontsize=8 fontcolor=\"blue\" fontname=\"arial-bold\"];\n"
+ " " << inputId << " [label=\"{Input}\"];\n"
+ " " << addId << " [label=\"{Addition}\"];\n"
+ " " << outputId << " [label=\"{Output}\"];\n"
+ " " << inputId << " -> " << addId << " [label=< [4] >];\n"
+ " " << inputId << " -> " << addId << " [label=< [4] >];\n"
+ " " << addId << " -> " << outputId << " [label=< [4] >];\n"
+ "}\n";
+
+ BOOST_TEST(ss.str() == expected.str());
+}
+
+BOOST_AUTO_TEST_CASE(NetworkBasic)
+{
+ armnn::Network net;
+ BOOST_TEST(net.PrintGraph() == armnn::Status::Success);
+}
+
+BOOST_AUTO_TEST_CASE(LayerNamesAreOptionalForINetwork)
+{
+ armnn::Network net;
+ armnn::INetwork& inet = net;
+ inet.AddInputLayer(0);
+ inet.AddAdditionLayer();
+ inet.AddActivationLayer(armnn::ActivationDescriptor());
+ inet.AddOutputLayer(0);
+}
+
+BOOST_AUTO_TEST_CASE(LayerNamesAreOptionalForNetwork)
+{
+ armnn::Network net;
+ net.AddInputLayer(0);
+ net.AddAdditionLayer();
+ net.AddActivationLayer(armnn::ActivationDescriptor());
+ net.AddOutputLayer(0);
+}
+
+BOOST_AUTO_TEST_CASE(NetworkModification)
+{
+ armnn::Network net;
+
+ armnn::IConnectableLayer* const inputLayer = net.AddInputLayer(0, "input layer");
+ BOOST_TEST(inputLayer);
+
+ unsigned int dims[] = { 10,1,1,1 };
+ std::vector<float> convWeightsData(10);
+ armnn::ConstTensor weights(armnn::TensorInfo(4, dims, armnn::DataType::Float32), convWeightsData);
+
+ armnn::Convolution2dDescriptor convDesc2d;
+ armnn::IConnectableLayer* const convLayer = net.AddConvolution2dLayer(convDesc2d, weights, "conv layer");
+ BOOST_TEST(convLayer);
+
+ inputLayer->GetOutputSlot(0).Connect(convLayer->GetInputSlot(0));
+
+ armnn::FullyConnectedDescriptor fullyConnectedDesc;
+ armnn::IConnectableLayer* const fullyConnectedLayer = net.AddFullyConnectedLayer(fullyConnectedDesc,
+ weights,
+ "fully connected");
+ BOOST_TEST(fullyConnectedLayer);
+
+ convLayer->GetOutputSlot(0).Connect(fullyConnectedLayer->GetInputSlot(0));
+
+ armnn::Pooling2dDescriptor pooling2dDesc;
+ armnn::IConnectableLayer* const poolingLayer = net.AddPooling2dLayer(pooling2dDesc, "pooling2d");
+ BOOST_TEST(poolingLayer);
+
+ fullyConnectedLayer->GetOutputSlot(0).Connect(poolingLayer->GetInputSlot(0));
+
+ armnn::ActivationDescriptor activationDesc;
+ armnn::IConnectableLayer* const activationLayer = net.AddActivationLayer(activationDesc, "activation");
+ BOOST_TEST(activationLayer);
+
+ poolingLayer->GetOutputSlot(0).Connect(activationLayer->GetInputSlot(0));
+
+ armnn::NormalizationDescriptor normalizationDesc;
+ armnn::IConnectableLayer* const normalizationLayer = net.AddNormalizationLayer(normalizationDesc, "normalization");
+ BOOST_TEST(normalizationLayer);
+
+ activationLayer->GetOutputSlot(0).Connect(normalizationLayer->GetInputSlot(0));
+
+ armnn::SoftmaxDescriptor softmaxDesc;
+ armnn::IConnectableLayer* const softmaxLayer = net.AddSoftmaxLayer(softmaxDesc, "softmax");
+ BOOST_TEST(softmaxLayer);
+
+ normalizationLayer->GetOutputSlot(0).Connect(softmaxLayer->GetInputSlot(0));
+
+ armnn::BatchNormalizationDescriptor batchNormDesc;
+
+ armnn::TensorInfo tensorInfo({ 1 }, armnn::DataType::Float32);
+ std::vector<float> data(tensorInfo.GetNumBytes() / sizeof(float));
+ armnn::ConstTensor invalidTensor(tensorInfo, data);
+
+ armnn::IConnectableLayer* const batchNormalizationLayer = net.AddBatchNormalizationLayer(batchNormDesc,
+ invalidTensor,
+ invalidTensor,
+ invalidTensor,
+ invalidTensor,
+ "batch norm");
+ BOOST_TEST(batchNormalizationLayer);
+
+ softmaxLayer->GetOutputSlot(0).Connect(batchNormalizationLayer->GetInputSlot(0));
+
+ armnn::IConnectableLayer* const additionLayer = net.AddAdditionLayer("addition");
+ BOOST_TEST(additionLayer);
+
+ batchNormalizationLayer->GetOutputSlot(0).Connect(additionLayer->GetInputSlot(0));
+ batchNormalizationLayer->GetOutputSlot(0).Connect(additionLayer->GetInputSlot(1));
+
+ armnn::IConnectableLayer* const multiplicationLayer = net.AddMultiplicationLayer("multiplication");
+ BOOST_TEST(multiplicationLayer);
+
+ additionLayer->GetOutputSlot(0).Connect(multiplicationLayer->GetInputSlot(0));
+ additionLayer->GetOutputSlot(0).Connect(multiplicationLayer->GetInputSlot(1));
+
+ armnn::IConnectableLayer* const outputLayer = net.AddOutputLayer(0, "output layer");
+ BOOST_TEST(outputLayer);
+
+ multiplicationLayer->GetOutputSlot(0).Connect(outputLayer->GetInputSlot(0));
+
+ //Tests that all layers are present in the graph.
+ BOOST_TEST(net.GetGraph().GetNumLayers() == 11);
+
+ //Tests that the vertices exist and have correct names.
+ BOOST_TEST(GraphHasNamedLayer(net.GetGraph(), "input layer"));
+ BOOST_TEST(GraphHasNamedLayer(net.GetGraph(), "conv layer"));
+ BOOST_TEST(GraphHasNamedLayer(net.GetGraph(), "fully connected"));
+ BOOST_TEST(GraphHasNamedLayer(net.GetGraph(), "pooling2d"));
+ BOOST_TEST(GraphHasNamedLayer(net.GetGraph(), "activation"));
+ BOOST_TEST(GraphHasNamedLayer(net.GetGraph(), "normalization"));
+ BOOST_TEST(GraphHasNamedLayer(net.GetGraph(), "softmax"));
+ BOOST_TEST(GraphHasNamedLayer(net.GetGraph(), "batch norm"));
+ BOOST_TEST(GraphHasNamedLayer(net.GetGraph(), "addition"));
+ BOOST_TEST(GraphHasNamedLayer(net.GetGraph(), "multiplication"));
+ BOOST_TEST(GraphHasNamedLayer(net.GetGraph(), "output layer"));
+
+ auto checkOneOutputToOneInputConnection = []
+ (const armnn::IConnectableLayer* const srcLayer,
+ const armnn::IConnectableLayer* const tgtLayer,
+ int expectedSrcNumInputs = 1,
+ int expectedDstNumOutputs = 1)
+ {
+ BOOST_TEST(srcLayer->GetNumInputSlots() == expectedSrcNumInputs);
+ BOOST_TEST(srcLayer->GetNumOutputSlots() == 1);
+ BOOST_TEST(tgtLayer->GetNumInputSlots() == 1);
+ BOOST_TEST(tgtLayer->GetNumOutputSlots() == expectedDstNumOutputs);
+
+ BOOST_TEST(srcLayer->GetOutputSlot(0).GetNumConnections() == 1);
+ BOOST_TEST(srcLayer->GetOutputSlot(0).GetConnection(0) == &tgtLayer->GetInputSlot(0));
+ BOOST_TEST(&srcLayer->GetOutputSlot(0) == tgtLayer->GetInputSlot(0).GetConnection());
+ };
+ auto checkOneOutputToTwoInputsConnections = []
+ (const armnn::IConnectableLayer* const srcLayer,
+ const armnn::IConnectableLayer* const tgtLayer,
+ int expectedSrcNumInputs,
+ int expectedDstNumOutputs = 1)
+ {
+ BOOST_TEST(srcLayer->GetNumInputSlots() == expectedSrcNumInputs);
+ BOOST_TEST(srcLayer->GetNumOutputSlots() == 1);
+ BOOST_TEST(tgtLayer->GetNumInputSlots() == 2);
+ BOOST_TEST(tgtLayer->GetNumOutputSlots() == expectedDstNumOutputs);
+
+ BOOST_TEST(srcLayer->GetOutputSlot(0).GetNumConnections() == 2);
+ for (unsigned int i = 0; i < srcLayer->GetOutputSlot(0).GetNumConnections(); ++i)
+ {
+ BOOST_TEST(srcLayer->GetOutputSlot(0).GetConnection(i) == &tgtLayer->GetInputSlot(i));
+ BOOST_TEST(&srcLayer->GetOutputSlot(0) == tgtLayer->GetInputSlot(i).GetConnection());
+ }
+ };
+
+ BOOST_TEST(AreAllLayerInputSlotsConnected(*convLayer));
+ BOOST_TEST(AreAllLayerInputSlotsConnected(*fullyConnectedLayer));
+ BOOST_TEST(AreAllLayerInputSlotsConnected(*poolingLayer));
+ BOOST_TEST(AreAllLayerInputSlotsConnected(*activationLayer));
+ BOOST_TEST(AreAllLayerInputSlotsConnected(*normalizationLayer));
+ BOOST_TEST(AreAllLayerInputSlotsConnected(*softmaxLayer));
+ BOOST_TEST(AreAllLayerInputSlotsConnected(*batchNormalizationLayer));
+ BOOST_TEST(AreAllLayerInputSlotsConnected(*additionLayer));
+ BOOST_TEST(AreAllLayerInputSlotsConnected(*multiplicationLayer));
+ BOOST_TEST(AreAllLayerInputSlotsConnected(*outputLayer));
+
+ // Checks connectivity.
+ checkOneOutputToOneInputConnection(inputLayer, convLayer, 0);
+ checkOneOutputToOneInputConnection(convLayer, fullyConnectedLayer);
+ checkOneOutputToOneInputConnection(fullyConnectedLayer, poolingLayer);
+ checkOneOutputToOneInputConnection(poolingLayer, activationLayer);
+ checkOneOutputToOneInputConnection(activationLayer, normalizationLayer);
+ checkOneOutputToOneInputConnection(normalizationLayer, softmaxLayer);
+ checkOneOutputToOneInputConnection(softmaxLayer, batchNormalizationLayer);
+ checkOneOutputToTwoInputsConnections(batchNormalizationLayer, additionLayer, 1);
+ checkOneOutputToTwoInputsConnections(additionLayer, multiplicationLayer, 2);
+ checkOneOutputToOneInputConnection(multiplicationLayer, outputLayer, 2, 0);
+}
+
+BOOST_AUTO_TEST_CASE(NetworkModification_SplitterMerger)
+{
+ armnn::Network net;
+
+ // Adds an input layer and an input tensor descriptor.
+ armnn::IConnectableLayer* inputLayer = net.AddInputLayer(0, "input layer");
+ BOOST_TEST(inputLayer);
+
+ // Adds a splitter layer.
+ armnn::ViewsDescriptor splitterDesc(2,4);
+
+ armnn::IConnectableLayer* splitterLayer = net.AddSplitterLayer(splitterDesc, "splitter layer");
+ BOOST_TEST(splitterLayer);
+
+ inputLayer->GetOutputSlot(0).Connect(splitterLayer->GetInputSlot(0));
+
+ // Adds a softmax layer 1.
+ armnn::SoftmaxDescriptor softmaxDescriptor;
+ armnn::IConnectableLayer* softmaxLayer1 = net.AddSoftmaxLayer(softmaxDescriptor, "softmax_1");
+ BOOST_TEST(softmaxLayer1);
+
+ splitterLayer->GetOutputSlot(0).Connect(softmaxLayer1->GetInputSlot(0));
+
+ // Adds a softmax layer 2.
+ armnn::IConnectableLayer* softmaxLayer2 = net.AddSoftmaxLayer(softmaxDescriptor, "softmax_2");
+ BOOST_TEST(softmaxLayer2);
+
+ splitterLayer->GetOutputSlot(1).Connect(softmaxLayer2->GetInputSlot(0));
+
+ // Adds a merger layer.
+ armnn::OriginsDescriptor mergerDesc(2, 4);
+
+ armnn::IConnectableLayer* mergerLayer = net.AddMergerLayer(mergerDesc, "merger layer");
+ BOOST_TEST(mergerLayer);
+
+ softmaxLayer1->GetOutputSlot(0).Connect(mergerLayer->GetInputSlot(0));
+ softmaxLayer2->GetOutputSlot(0).Connect(mergerLayer->GetInputSlot(1));
+
+ // Adds an output layer.
+ armnn::IConnectableLayer* outputLayer = net.AddOutputLayer(0, "output layer");
+ BOOST_TEST(outputLayer);
+
+ mergerLayer->GetOutputSlot(0).Connect(outputLayer->GetInputSlot(0));
+
+ BOOST_TEST(splitterLayer->GetNumOutputSlots() == 2);
+ BOOST_TEST(splitterLayer->GetOutputSlot(0).GetConnection(0) == &softmaxLayer1->GetInputSlot(0));
+ BOOST_TEST(&splitterLayer->GetOutputSlot(0) == softmaxLayer1->GetInputSlot(0).GetConnection());
+ BOOST_TEST(splitterLayer->GetOutputSlot(1).GetConnection(0) == &softmaxLayer2->GetInputSlot(0));
+ BOOST_TEST(&splitterLayer->GetOutputSlot(1) == softmaxLayer2->GetInputSlot(0).GetConnection());
+
+ BOOST_TEST(mergerLayer->GetNumInputSlots() == 2);
+ BOOST_TEST(softmaxLayer1->GetOutputSlot(0).GetConnection(0) == &mergerLayer->GetInputSlot(0));
+ BOOST_TEST(&softmaxLayer1->GetOutputSlot(0) == mergerLayer->GetInputSlot(0).GetConnection());
+ BOOST_TEST(softmaxLayer2->GetOutputSlot(0).GetConnection(0) == &mergerLayer->GetInputSlot(1));
+ BOOST_TEST(&softmaxLayer2->GetOutputSlot(0) == mergerLayer->GetInputSlot(1).GetConnection());
+}
+
+BOOST_AUTO_TEST_CASE(NetworkModification_SplitterAddition)
+{
+ armnn::Network net;
+
+ // Adds an input layer and an input tensor descriptor.
+ armnn::IConnectableLayer* layer = net.AddInputLayer(0, "input layer");
+ BOOST_TEST(layer);
+
+ // Adds a splitter layer.
+ armnn::ViewsDescriptor splitterDesc(2,4);
+
+ armnn::IConnectableLayer* const splitterLayer = net.AddSplitterLayer(splitterDesc, "splitter layer");
+ BOOST_TEST(splitterLayer);
+
+ layer->GetOutputSlot(0).Connect(splitterLayer->GetInputSlot(0));
+
+ // Adds a softmax layer 1.
+ armnn::SoftmaxDescriptor softmaxDescriptor;
+ armnn::IConnectableLayer* const softmax1Layer = net.AddSoftmaxLayer(softmaxDescriptor, "softmax_1");
+ BOOST_TEST(softmax1Layer);
+
+ splitterLayer->GetOutputSlot(0).Connect(softmax1Layer->GetInputSlot(0));
+
+ // Adds a softmax layer 2.
+ armnn::IConnectableLayer* const softmax2Layer = net.AddSoftmaxLayer(softmaxDescriptor, "softmax_2");
+ BOOST_TEST(softmax2Layer);
+
+ splitterLayer->GetOutputSlot(1).Connect(softmax2Layer->GetInputSlot(0));
+
+ // Adds addition layer.
+ layer = net.AddAdditionLayer("add layer");
+ BOOST_TEST(layer);
+
+ softmax1Layer->GetOutputSlot(0).Connect(layer->GetInputSlot(0));
+ softmax2Layer->GetOutputSlot(0).Connect(layer->GetInputSlot(1));
+
+ // Adds an output layer.
+ armnn::IConnectableLayer* prevLayer = layer;
+ layer = net.AddOutputLayer(0, "output layer");
+
+ prevLayer->GetOutputSlot(0).Connect(layer->GetInputSlot(0));
+
+ BOOST_TEST(layer);
+}
+
+BOOST_AUTO_TEST_CASE(NetworkModification_SplitterMultiplication)
+{
+ armnn::Network net;
+
+ // Adds an input layer and an input tensor descriptor.
+ armnn::IConnectableLayer* layer = net.AddInputLayer(0, "input layer");
+ BOOST_TEST(layer);
+
+ // Adds a splitter layer.
+ armnn::ViewsDescriptor splitterDesc(2,4);
+ armnn::IConnectableLayer* const splitterLayer = net.AddSplitterLayer(splitterDesc, "splitter layer");
+ BOOST_TEST(splitterLayer);
+
+ layer->GetOutputSlot(0).Connect(splitterLayer->GetInputSlot(0));
+
+ // Adds a softmax layer 1.
+ armnn::SoftmaxDescriptor softmaxDescriptor;
+ armnn::IConnectableLayer* const softmax1Layer = net.AddSoftmaxLayer(softmaxDescriptor, "softmax_1");
+ BOOST_TEST(softmax1Layer);
+
+ splitterLayer->GetOutputSlot(0).Connect(softmax1Layer->GetInputSlot(0));
+
+ // Adds a softmax layer 2.
+ armnn::IConnectableLayer* const softmax2Layer = net.AddSoftmaxLayer(softmaxDescriptor, "softmax_2");
+ BOOST_TEST(softmax2Layer);
+
+ splitterLayer->GetOutputSlot(1).Connect(softmax2Layer->GetInputSlot(0));
+
+ // Adds multiplication layer.
+ layer = net.AddMultiplicationLayer("multiplication layer");
+ BOOST_TEST(layer);
+
+ softmax1Layer->GetOutputSlot(0).Connect(layer->GetInputSlot(0));
+ softmax2Layer->GetOutputSlot(0).Connect(layer->GetInputSlot(1));
+
+ // Adds an output layer.
+ armnn::IConnectableLayer* prevLayer = layer;
+ layer = net.AddOutputLayer(0, "output layer");
+ BOOST_TEST(layer);
+
+ prevLayer->GetOutputSlot(0).Connect(layer->GetInputSlot(0));
+}
+
+BOOST_AUTO_TEST_CASE(OptimizeValidateCpuRefWorkloads)
+{
+ const armnn::TensorInfo desc({3, 5}, armnn::DataType::Float32);
+
+ armnn::Network net;
+
+ armnn::NormalizationDescriptor nmDesc;
+ armnn::ActivationDescriptor acDesc;
+
+ // in
+ // |
+ // nm
+ // / |
+ // ac |
+ // \ |
+ // ml
+ // |
+ // sm
+ // |
+ // ot
+ armnn::IConnectableLayer* layer = net.AddInputLayer(0, "in");
+ layer->GetOutputSlot(0).SetTensorInfo(desc);
+
+ armnn::IConnectableLayer* const normLayer = net.AddNormalizationLayer(nmDesc, "nm");
+
+ layer->GetOutputSlot(0).Connect(normLayer->GetInputSlot(0));
+ normLayer->GetOutputSlot(0).SetTensorInfo(desc);
+
+ layer = net.AddActivationLayer(acDesc, "ac");
+
+ normLayer->GetOutputSlot(0).Connect(layer->GetInputSlot(0));
+ layer->GetOutputSlot(0).SetTensorInfo(desc);
+
+ armnn::IConnectableLayer* prevLayer = layer;
+ layer = net.AddMultiplicationLayer("ml");
+
+ prevLayer->GetOutputSlot(0).Connect(layer->GetInputSlot(0));
+ normLayer->GetOutputSlot(0).Connect(layer->GetInputSlot(1));
+ layer->GetOutputSlot(0).SetTensorInfo(desc);
+
+ prevLayer = layer;
+ armnn::SoftmaxDescriptor softmaxDescriptor;
+ layer = net.AddSoftmaxLayer(softmaxDescriptor, "sm");
+
+ prevLayer->GetOutputSlot(0).Connect(layer->GetInputSlot(0));
+ layer->GetOutputSlot(0).SetTensorInfo(desc);
+
+ prevLayer = layer;
+ layer = net.AddOutputLayer(0, "ot");
+
+ prevLayer->GetOutputSlot(0).Connect(layer->GetInputSlot(0));
+
+ armnn::IRuntime::CreationOptions options;
+ armnn::IRuntimePtr runtime(armnn::IRuntime::Create(options));
+
+ std::vector<armnn::Compute> backends = { armnn::Compute::CpuRef };
+ armnn::IOptimizedNetworkPtr optNet = armnn::Optimize(net, backends, runtime->GetDeviceSpec());
+ static_cast<armnn::OptimizedNetwork*>(optNet.get())->GetGraph().AllocateDynamicBuffers();
+ BOOST_CHECK(optNet);
+
+ // Validates workloads.
+ armnn::RefWorkloadFactory fact;
+ for (auto&& layer : static_cast<armnn::OptimizedNetwork*>(optNet.get())->GetGraph())
+ {
+ BOOST_CHECK_NO_THROW(
+ layer->CreateWorkload(static_cast<armnn::OptimizedNetwork*>(optNet.get())->GetGraph(), fact));
+ }
+}
+
+#if ARMCOMPUTENEON_ENABLED
+BOOST_AUTO_TEST_CASE(OptimizeValidateCpuAccDeviceSupportLayerNoFallback)
+{
+ // build up the structure of the network
+ armnn::INetworkPtr net(armnn::INetwork::Create());
+
+ armnn::IConnectableLayer* input = net->AddInputLayer(0);
+
+ armnn::IConnectableLayer* output = net->AddOutputLayer(0);
+
+ input->GetOutputSlot(0).Connect(output->GetInputSlot(0));
+
+ input->GetOutputSlot(0).SetTensorInfo(armnn::TensorInfo({ 1, 1, 4, 4 }, armnn::DataType::Float32));
+
+ armnn::IRuntime::CreationOptions options;
+ armnn::IRuntimePtr runtime(armnn::IRuntime::Create(options));
+
+ std::vector<armnn::Compute> backends = { armnn::Compute::CpuAcc };
+ armnn::IOptimizedNetworkPtr optNet = armnn::Optimize(*net, backends, runtime->GetDeviceSpec());
+ BOOST_CHECK(optNet);
+ // validate workloads
+ armnn::NeonWorkloadFactory fact;
+ for (auto&& layer : static_cast<armnn::OptimizedNetwork*>(optNet.get())->GetGraph())
+ {
+ BOOST_CHECK_EQUAL(armnn::Compute::CpuAcc, layer->GetComputeDevice());
+ BOOST_CHECK_NO_THROW(
+ layer->CreateWorkload(static_cast<armnn::OptimizedNetwork*>(optNet.get())->GetGraph(), fact));
+ }
+}
+#endif // ARMCOMPUTENEON_ENABLED
+
+#if ARMCOMPUTECL_ENABLED
+BOOST_AUTO_TEST_CASE(OptimizeValidateGpuDeviceSupportLayerNoFallback)
+{
+ // build up the structure of the network
+ armnn::INetworkPtr net(armnn::INetwork::Create());
+
+ armnn::IConnectableLayer* input = net->AddInputLayer(0);
+
+ armnn::IConnectableLayer* output = net->AddOutputLayer(0);
+
+ input->GetOutputSlot(0).Connect(output->GetInputSlot(0));
+
+ input->GetOutputSlot(0).SetTensorInfo(armnn::TensorInfo({ 1, 1, 4, 4 }, armnn::DataType::Float32));
+
+ armnn::IRuntime::CreationOptions options;
+ armnn::IRuntimePtr runtime(armnn::IRuntime::Create(options));
+
+ std::vector<armnn::Compute> backends = { armnn::Compute::GpuAcc };
+ armnn::IOptimizedNetworkPtr optNet = armnn::Optimize(*net, backends, runtime->GetDeviceSpec());
+ BOOST_CHECK(optNet);
+ // validate workloads
+ armnn::ClWorkloadFactory fact;
+ for (auto&& layer : static_cast<armnn::OptimizedNetwork*>(optNet.get())->GetGraph())
+ {
+ BOOST_CHECK_EQUAL(armnn::Compute::GpuAcc, layer->GetComputeDevice());
+ BOOST_CHECK_NO_THROW(
+ layer->CreateWorkload(static_cast<armnn::OptimizedNetwork*>(optNet.get())->GetGraph(), fact));
+ }
+}
+#endif // ARMCOMPUTECL_ENABLED
+
+BOOST_AUTO_TEST_CASE(OptimizeValidateDeviceNonSupportLayerNoFallback)
+{
+ // build up the structure of the network
+ armnn::INetworkPtr net(armnn::INetwork::Create());
+
+ armnn::IConnectableLayer* input = net->AddInputLayer(0);
+
+ // This layer configuration isn't supported by CpuAcc and isn't allowed to fall back, so Optimize will return null.
+ armnn::NormalizationDescriptor descriptor;
+ armnn::IConnectableLayer* normalize = net->AddNormalizationLayer(descriptor);
+
+ armnn::IConnectableLayer* output = net->AddOutputLayer(0);
+
+ input->GetOutputSlot(0).Connect(normalize->GetInputSlot(0));
+ normalize->GetOutputSlot(0).Connect(output->GetInputSlot(0));
+
+ input->GetOutputSlot(0).SetTensorInfo(armnn::TensorInfo({ 1, 1, 4, 4 }, armnn::DataType::Float32));
+ normalize->GetOutputSlot(0).SetTensorInfo(armnn::TensorInfo({ 1, 1, 4, 4 }, armnn::DataType::Float32));
+
+ armnn::IRuntime::CreationOptions options;
+ armnn::IRuntimePtr runtime(armnn::IRuntime::Create(options));
+
+ std::vector<armnn::Compute> backends = { armnn::Compute::CpuAcc };
+ armnn::IOptimizedNetworkPtr optNet = armnn::Optimize(*net, backends, runtime->GetDeviceSpec());
+ BOOST_CHECK(!optNet);
+}
+
+BOOST_AUTO_TEST_CASE(OptimizeValidateDeviceNonSupportLayerWithFallback)
+{
+ // build up the structure of the network
+ armnn::INetworkPtr net(armnn::INetwork::Create());
+
+ armnn::IConnectableLayer* input = net->AddInputLayer(0);
+
+ // This layer configuration isn't supported by CpuAcc but it allows to fallback to CpuRef.
+ armnn::NormalizationDescriptor descriptor;
+ armnn::IConnectableLayer* normalize = net->AddNormalizationLayer(descriptor);
+
+ armnn::IConnectableLayer* output = net->AddOutputLayer(0);
+
+ input->GetOutputSlot(0).Connect(normalize->GetInputSlot(0));
+ normalize->GetOutputSlot(0).Connect(output->GetInputSlot(0));
+
+ input->GetOutputSlot(0).SetTensorInfo(armnn::TensorInfo({ 1, 1, 4, 4 }, armnn::DataType::Float32));
+ normalize->GetOutputSlot(0).SetTensorInfo(armnn::TensorInfo({ 1, 1, 4, 4 }, armnn::DataType::Float32));
+
+ armnn::IRuntime::CreationOptions options;
+ armnn::IRuntimePtr runtime(armnn::IRuntime::Create(options));
+
+ std::vector<armnn::Compute> backends = { armnn::Compute::CpuAcc, armnn::Compute::CpuRef };
+ armnn::IOptimizedNetworkPtr optNet = armnn::Optimize(*net, backends, runtime->GetDeviceSpec());
+ BOOST_REQUIRE(optNet);
+
+ for (auto&& layer : static_cast<armnn::OptimizedNetwork*>(optNet.get())->GetGraph())
+ {
+ // If NEON is enabled, Input and Output layers are supported by CpuAcc,
+ // the other layers are supported by CpuRef.
+ // If NEON is not enabled, all layers are supported by CpuRef.
+#if ARMCOMPUTENEON_ENABLED
+ if (layer->GetType() == armnn::LayerType::Input || layer->GetType() == armnn::LayerType::Output)
+ {
+ BOOST_CHECK_EQUAL(armnn::Compute::CpuAcc, layer->GetComputeDevice());
+ }
+ else if (layer->GetType() == armnn::LayerType::Normalization)
+ {
+ BOOST_CHECK_EQUAL(armnn::Compute::CpuRef, layer->GetComputeDevice());
+ }
+#else
+ BOOST_CHECK_EQUAL(armnn::Compute::CpuRef, layer->GetComputeDevice());
+#endif
+ }
+}
+
+BOOST_AUTO_TEST_CASE(OptimizeValidateWorkloadsUndefinedComputeDevice)
+{
+ const armnn::TensorInfo desc({3, 5}, armnn::DataType::Float32);
+
+ armnn::Network net;
+
+ armnn::NormalizationDescriptor nmDesc;
+ armnn::ActivationDescriptor acDesc;
+
+ // in
+ // |
+ // nm
+ // / |
+ // ac |
+ // \ |
+ // ml
+ // |
+ // sm
+ // |
+ // ot
+ armnn::IConnectableLayer* layer = net.AddInputLayer(0, "in");
+ layer->GetOutputSlot(0).SetTensorInfo(desc);
+
+ armnn::IConnectableLayer* const normLayer = net.AddNormalizationLayer(nmDesc, "nm");
+
+ layer->GetOutputSlot(0).Connect(normLayer->GetInputSlot(0));
+ normLayer->GetOutputSlot(0).SetTensorInfo(desc);
+
+ layer = net.AddActivationLayer(acDesc, "ac");
+
+ normLayer->GetOutputSlot(0).Connect(layer->GetInputSlot(0));
+ layer->GetOutputSlot(0).SetTensorInfo(desc);
+
+ armnn::IConnectableLayer* prevLayer = layer;
+ layer = net.AddMultiplicationLayer("ml");
+
+ prevLayer->GetOutputSlot(0).Connect(layer->GetInputSlot(0));
+ normLayer->GetOutputSlot(0).Connect(layer->GetInputSlot(1));
+ layer->GetOutputSlot(0).SetTensorInfo(desc);
+
+ prevLayer = layer;
+ armnn::SoftmaxDescriptor softmaxDescriptor;
+ layer = net.AddSoftmaxLayer(softmaxDescriptor, "sm");
+
+ prevLayer->GetOutputSlot(0).Connect(layer->GetInputSlot(0));
+ layer->GetOutputSlot(0).SetTensorInfo(desc);
+
+ prevLayer = layer;
+ layer = net.AddOutputLayer(0, "ot");
+
+ prevLayer->GetOutputSlot(0).Connect(layer->GetInputSlot(0));
+
+ armnn::IRuntime::CreationOptions options;
+ armnn::IRuntimePtr runtime(armnn::IRuntime::Create(options));
+
+ std::vector<armnn::Compute> backends = { armnn::Compute::Undefined };
+
+ armnn::IOptimizedNetworkPtr optNet = armnn::Optimize(net, backends, runtime->GetDeviceSpec());
+ BOOST_CHECK(!optNet);
+
+}
+
+BOOST_AUTO_TEST_CASE(OptimizeValidateWorkloadsUndefinedComputeDeviceWithFallback)
+{
+ const armnn::TensorInfo desc({3, 5}, armnn::DataType::Float32);
+
+ armnn::Network net;
+
+ armnn::NormalizationDescriptor nmDesc;
+ armnn::ActivationDescriptor acDesc;
+
+ // in
+ // |
+ // nm
+ // / |
+ // ac |
+ // \ |
+ // ml
+ // |
+ // sm
+ // |
+ // ot
+ armnn::IConnectableLayer* layer = net.AddInputLayer(0, "in");
+ layer->GetOutputSlot(0).SetTensorInfo(desc);
+
+ armnn::IConnectableLayer* const normLayer = net.AddNormalizationLayer(nmDesc, "nm");
+
+ layer->GetOutputSlot(0).Connect(normLayer->GetInputSlot(0));
+ normLayer->GetOutputSlot(0).SetTensorInfo(desc);
+
+ layer = net.AddActivationLayer(acDesc, "ac");
+
+ normLayer->GetOutputSlot(0).Connect(layer->GetInputSlot(0));
+ layer->GetOutputSlot(0).SetTensorInfo(desc);
+
+ armnn::IConnectableLayer* prevLayer = layer;
+ layer = net.AddMultiplicationLayer("ml");
+
+ prevLayer->GetOutputSlot(0).Connect(layer->GetInputSlot(0));
+ normLayer->GetOutputSlot(0).Connect(layer->GetInputSlot(1));
+ layer->GetOutputSlot(0).SetTensorInfo(desc);
+
+ prevLayer = layer;
+ armnn::SoftmaxDescriptor softmaxDescriptor;
+ layer = net.AddSoftmaxLayer(softmaxDescriptor, "sm");
+
+ prevLayer->GetOutputSlot(0).Connect(layer->GetInputSlot(0));
+ layer->GetOutputSlot(0).SetTensorInfo(desc);
+
+ prevLayer = layer;
+ layer = net.AddOutputLayer(0, "ot");
+
+ prevLayer->GetOutputSlot(0).Connect(layer->GetInputSlot(0));
+
+ armnn::IRuntime::CreationOptions options;
+ armnn::IRuntimePtr runtime(armnn::IRuntime::Create(options));
+
+ std::vector<armnn::Compute> backends = { armnn::Compute::Undefined, armnn::Compute::CpuRef };
+
+ armnn::IOptimizedNetworkPtr optNet = armnn::Optimize(net, backends, runtime->GetDeviceSpec());
+ BOOST_CHECK(optNet);
+
+ // validate workloads
+ armnn::RefWorkloadFactory fact;
+ for (auto&& layer : static_cast<armnn::OptimizedNetwork*>(optNet.get())->GetGraph())
+ {
+ BOOST_CHECK_EQUAL(armnn::Compute::CpuRef, layer->GetComputeDevice());
+ BOOST_CHECK_NO_THROW(
+ layer->CreateWorkload(static_cast<armnn::OptimizedNetwork*>(optNet.get())->GetGraph(), fact));
+ }
+}
+BOOST_AUTO_TEST_CASE(OptimizeValidateWorkloadsDuplicateComputeDeviceWithFallback)
+{
+ // build up the structure of the network
+ armnn::INetworkPtr net(armnn::INetwork::Create());
+
+ armnn::IConnectableLayer* input = net->AddInputLayer(0);
+
+ // This layer configuration isn't supported by CpuAcc but it allows to fallback to CpuRef.
+ armnn::NormalizationDescriptor descriptor;
+ armnn::IConnectableLayer* normalize = net->AddNormalizationLayer(descriptor);
+
+ armnn::IConnectableLayer* output = net->AddOutputLayer(0);
+
+ input->GetOutputSlot(0).Connect(normalize->GetInputSlot(0));
+ normalize->GetOutputSlot(0).Connect(output->GetInputSlot(0));
+
+ input->GetOutputSlot(0).SetTensorInfo(armnn::TensorInfo({ 1, 1, 4, 4 }, armnn::DataType::Float32));
+ normalize->GetOutputSlot(0).SetTensorInfo(armnn::TensorInfo({ 1, 1, 4, 4 }, armnn::DataType::Float32));
+
+ armnn::IRuntime::CreationOptions options;
+ armnn::IRuntimePtr runtime(armnn::IRuntime::Create(options));
+
+ std::vector<armnn::Compute> backends = { armnn::Compute::CpuAcc,
+ armnn::Compute::GpuAcc,
+ armnn::Compute::CpuRef };
+
+ armnn::IOptimizedNetworkPtr optNet = armnn::Optimize(*net, backends, runtime->GetDeviceSpec());
+ BOOST_REQUIRE(optNet);
+
+ for (auto&& layer : static_cast<armnn::OptimizedNetwork*>(optNet.get())->GetGraph())
+ {
+ // If NEON is enabled, Input and Output layers are supported by CpuAcc,
+ // the other layers are supported by CpuRef.
+ // If only CL is enabled, Input and Output layers are supported by GpuAcc,
+ // the other layers are supported by CpuRef.
+ // If neither NEON, nor CL is enabled, all layers are supported by CpuRef.
+#if ARMCOMPUTENEON_ENABLED
+ if (layer->GetType() == armnn::LayerType::Input || layer->GetType() == armnn::LayerType::Output)
+ {
+ BOOST_CHECK_EQUAL(armnn::Compute::CpuAcc, layer->GetComputeDevice());
+ }
+ else if (layer->GetType() == armnn::LayerType::Normalization)
+ {
+ BOOST_CHECK_EQUAL(armnn::Compute::CpuRef, layer->GetComputeDevice());
+ }
+#elif ARMCOMPUTECL_ENABLED
+ if (layer->GetType() == armnn::LayerType::Input || layer->GetType() == armnn::LayerType::Output)
+ {
+ BOOST_CHECK_EQUAL(armnn::Compute::GpuAcc, layer->GetComputeDevice());
+ }
+ else if (layer->GetType() == armnn::LayerType::Normalization)
+ {
+ BOOST_CHECK_EQUAL(armnn::Compute::CpuRef, layer->GetComputeDevice());
+ }
+#else
+ BOOST_CHECK_EQUAL(armnn::Compute::CpuRef, layer->GetComputeDevice());
+#endif
+ }
+}
+
+BOOST_AUTO_TEST_CASE(OptimizeValidateWorkloadsCpuRefPermuteLayer)
+{
+ // Create runtime in which test will run
+ armnn::IRuntime::CreationOptions options;
+ armnn::IRuntimePtr runtime(armnn::IRuntime::Create(options));
+
+ std::vector<armnn::Compute> backends = {armnn::Compute::CpuRef};
+
+ // build up the structure of the network
+ armnn::INetworkPtr net(armnn::INetwork::Create());
+
+ armnn::IConnectableLayer* input = net->AddInputLayer(0);
+
+ armnn::PermuteDescriptor descriptor({0, 2, 3, 1});
+ armnn::IConnectableLayer* permute = net->AddPermuteLayer(descriptor);
+
+ armnn::IConnectableLayer* output = net->AddOutputLayer(0);
+
+ input->GetOutputSlot(0).Connect(permute->GetInputSlot(0));
+ permute->GetOutputSlot(0).Connect(output->GetInputSlot(0));
+
+ input->GetOutputSlot(0).SetTensorInfo(armnn::TensorInfo({ 1, 1, 4, 4 }, armnn::DataType::Float32));
+ permute->GetOutputSlot(0).SetTensorInfo(armnn::TensorInfo({ 1, 4, 1, 4 }, armnn::DataType::Float32));
+
+ // optimize the network
+ armnn::IOptimizedNetworkPtr optNet = armnn::Optimize(*net, backends, runtime->GetDeviceSpec());
+
+ for (auto&& layer : static_cast<armnn::OptimizedNetwork*>(optNet.get())->GetGraph())
+ {
+ BOOST_CHECK_EQUAL(armnn::Compute::CpuRef, layer->GetComputeDevice());
+ }
+}
+
+BOOST_AUTO_TEST_CASE(FP16TurboModeTestOnCpuRef)
+{
+ // Test to check when FP16 Turbo mode set
+ // it converts the FP32 network to FP16 Network
+ // add FP32ToFP16 conversion layer after the InputLayer
+ // add FP16ToFP32 conversion layer after the OutputLayer
+ // checks the other layers if they are supported in FP16
+ // if they are not put the conversion layers before and after
+ // if they are not supported in FP16 use FP32 instead
+ // if there are inverse conversion layers remove them with optimization
+ // at the moment FloorLayer is not supported in FP16 so it rolls back to FP32
+ // and inverse conversion layers are removed by the optimizer
+ armnn::Network net;
+
+ // Defines layers.
+ auto input = net.AddInputLayer(0);
+ auto floor = net.AddFloorLayer();
+ auto output = net.AddOutputLayer(0);
+
+ // Connects layers.
+ input->GetOutputSlot(0).Connect(floor->GetInputSlot(0));
+ floor->GetOutputSlot(0).Connect(output->GetInputSlot(0));
+
+ armnn::TensorShape shape({4});
+ armnn::TensorInfo info(shape, armnn::DataType::Float32);
+ input->GetOutputSlot(0).SetTensorInfo(info);
+ floor->GetOutputSlot(0).SetTensorInfo(info);
+
+ armnn::IRuntime::CreationOptions options;
+ armnn::IRuntimePtr runtime(armnn::IRuntime::Create(options));
+
+ std::vector<armnn::Compute> backends = {armnn::Compute::CpuRef};
+
+ armnn::OptimizerOptions optimizerOptions;
+ optimizerOptions.m_ReduceFp32ToFp16 = true;
+
+ armnn::IOptimizedNetworkPtr optimizedNet = armnn::Optimize(net, backends, runtime->GetDeviceSpec(),
+ optimizerOptions);
+
+ std::ostringstream ss;
+ optimizedNet->SerializeToDot(ss);
+
+ auto inputId = input->GetGuid();
+ auto floorId = floor->GetGuid();
+ auto outputId = output->GetGuid();
+
+ std::stringstream expected;
+ expected <<
+ "digraph Optimized {\n"
+ " node [shape=\"record\"];\n"
+ " edge [fontsize=8 fontcolor=\"blue\" fontname=\"arial-bold\"];\n"
+ " " << inputId << " [label=\"{Input}\"];\n"
+ " " << floorId << " [label=\"{Floor}\"];\n"
+ " " << outputId << " [label=\"{Output}\"];\n"
+ " " << inputId << " -> " << floorId << " [label=< [4] >];\n"
+ " " << floorId << " -> " << outputId << " [label=< [4] >];\n"
+ "}\n";
+
+ BOOST_TEST(ss.str() == expected.str());
+}
+
+#if ARMCOMPUTECL_ENABLED
+BOOST_AUTO_TEST_CASE(FP16TurboModeTestOnGpuAcc)
+{
+ // Test to check when Fp16 Turbo mode set
+ // it converts the Fp32 network to Fp16 Network
+ // add Fp32ToFp16 conversion layer after the InputLayer
+ // add Fp16ToFp32 conversion layer after the OutputLayer
+ // checks the other layers if they are supported in Fp16
+ // if they are not put the conversion layers before and after
+ // if they are not supported in Fp16 use Fp32 instead
+ // if there are inverse conversion layers remove them with optimization
+ // at the moment FloorLayer is not supported in Fp16 so it rolls back to Fp32
+ // and inverse conversion layers are removed by the optimizer
+ armnn::Network net;
+
+ // Defines layers.
+ auto input = net.AddInputLayer(0, "input layer");
+ // ReLu1
+ armnn::ActivationDescriptor activation1Descriptor;
+ activation1Descriptor.m_Function = armnn::ActivationFunction::BoundedReLu;
+ activation1Descriptor.m_A = 1.f;
+ activation1Descriptor.m_B = -1.f;
+ auto activation = net.AddActivationLayer(activation1Descriptor, "activation layer");
+ auto output = net.AddOutputLayer(0, "output layer");
+
+ // Connects layers.
+ input->GetOutputSlot(0).Connect(activation->GetInputSlot(0));
+ activation->GetOutputSlot(0).Connect(output->GetInputSlot(0));
+
+ armnn::TensorShape shape({4});
+ armnn::TensorInfo info(shape, armnn::DataType::Float32);
+ input->GetOutputSlot(0).SetTensorInfo(info);
+ activation->GetOutputSlot(0).SetTensorInfo(info);
+
+ armnn::IRuntime::CreationOptions options;
+ armnn::IRuntimePtr runtime(armnn::IRuntime::Create(options));
+
+ std::vector<armnn::Compute> backends = {armnn::Compute::GpuAcc};
+
+ armnn::OptimizerOptions optimizerOptions;
+ optimizerOptions.m_ReduceFp32ToFp16 = true;
+
+ armnn::IOptimizedNetworkPtr optimizedNet = armnn::Optimize(net, backends, runtime->GetDeviceSpec(),
+ optimizerOptions);
+
+ const armnn::Graph& graph = static_cast<armnn::OptimizedNetwork*>(optimizedNet.get())->GetGraph();
+
+ // Tests that all layers are present in the graph.
+ BOOST_TEST(graph.GetNumLayers() == 5);
+
+ // Tests that the vertices exist and have correct names.
+ BOOST_TEST(GraphHasNamedLayer(graph, "input layer"));
+ BOOST_TEST(GraphHasNamedLayer(graph, "convert_fp32_to_fp16-0-input layer"));
+ BOOST_TEST(GraphHasNamedLayer(graph, "activation layer"));
+ BOOST_TEST(GraphHasNamedLayer(graph, "convert_fp16_to_fp32-0-output layer"));
+ BOOST_TEST(GraphHasNamedLayer(graph, "output layer"));
+}
+#endif
+
+BOOST_AUTO_TEST_SUITE_END()
diff --git a/src/armnn/test/Network_test.cpp b/src/armnn/test/Network_test.cpp
deleted file mode 100644
index 057caa0505..0000000000
--- a/src/armnn/test/Network_test.cpp
+++ /dev/null
@@ -1,483 +0,0 @@
-//
-// Copyright © 2017 Arm Ltd. All rights reserved.
-// See LICENSE file in the project root for full license information.
-//
-#include <boost/test/unit_test.hpp>
-
-#include "armnn/ArmNN.hpp"
-#include "Network.hpp"
-#include "Graph.hpp"
-#include "backends/RefWorkloadFactory.hpp"
-
-#include "GraphUtils.hpp"
-
-namespace
-{
-
-bool AreAllLayerInputSlotsConnected(const armnn::IConnectableLayer& layer)
-{
- bool allConnected = true;
- for (unsigned int i = 0; i < layer.GetNumInputSlots(); ++i)
- {
- const bool inputConnected = layer.GetInputSlot(i).GetConnection() != nullptr;
- allConnected &= inputConnected;
- }
- return allConnected;
-}
-
-}
-
-BOOST_AUTO_TEST_SUITE(Network)
-
-BOOST_AUTO_TEST_CASE(LayerGuids)
-{
- armnn::Network net;
- armnn::LayerGuid inputId = net.AddInputLayer(0)->GetGuid();
- armnn::LayerGuid addId = net.AddAdditionLayer()->GetGuid();
- armnn::LayerGuid outputId = net.AddOutputLayer(0)->GetGuid();
-
- BOOST_TEST(inputId != addId);
- BOOST_TEST(addId != outputId);
- BOOST_TEST(inputId != outputId);
-}
-
-BOOST_AUTO_TEST_CASE(SerializeToDot)
-{
- armnn::Network net;
-
- //define layers
- auto input = net.AddInputLayer(0);
- auto add = net.AddAdditionLayer();
- auto output = net.AddOutputLayer(0);
-
- // connect layers
- input->GetOutputSlot(0).Connect(add->GetInputSlot(0));
- input->GetOutputSlot(0).Connect(add->GetInputSlot(1));
- add->GetOutputSlot(0).Connect(output->GetInputSlot(0));
-
- armnn::TensorShape shape({4});
- armnn::TensorInfo info(shape, armnn::DataType::Float32);
- input->GetOutputSlot(0).SetTensorInfo(info);
- add->GetOutputSlot(0).SetTensorInfo(info);
-
- armnn::DeviceSpec spec;
- spec.DefaultComputeDevice = armnn::Compute::CpuAcc;
- armnn::IOptimizedNetworkPtr optimizedNet = armnn::Optimize(net, spec);
-
- std::ostringstream ss;
- optimizedNet->SerializeToDot(ss);
-
- auto inputId = input->GetGuid();
- auto addId = add->GetGuid();
- auto outputId = output->GetGuid();
-
- std::stringstream expected;
- expected <<
- "digraph Optimized {\n"
- " node [shape=\"record\"];\n"
- " edge [fontsize=8 fontcolor=\"blue\" fontname=\"arial-bold\"];\n"
- " " << inputId << " [label=\"{Input}\"];\n"
- " " << addId << " [label=\"{Addition}\"];\n"
- " " << outputId << " [label=\"{Output}\"];\n"
- " " << inputId << " -> " << addId << " [label=< [4] >];\n"
- " " << inputId << " -> " << addId << " [label=< [4] >];\n"
- " " << addId << " -> " << outputId << " [label=< [4] >];\n"
- "}\n";
-
- BOOST_TEST(ss.str() == expected.str());
-}
-
-BOOST_AUTO_TEST_CASE(NetworkBasic)
-{
- armnn::Network net;
- BOOST_TEST(net.PrintGraph() == armnn::Status::Success);
-}
-
-BOOST_AUTO_TEST_CASE(LayerNamesAreOptionalForINetwork)
-{
- armnn::Network net;
- armnn::INetwork& inet = net;
- inet.AddInputLayer(0);
- inet.AddAdditionLayer();
- inet.AddActivationLayer(armnn::ActivationDescriptor());
- inet.AddOutputLayer(0);
-}
-
-BOOST_AUTO_TEST_CASE(LayerNamesAreOptionalForNetwork)
-{
- armnn::Network net;
- net.AddInputLayer(0);
- net.AddAdditionLayer();
- net.AddActivationLayer(armnn::ActivationDescriptor());
- net.AddOutputLayer(0);
-}
-
-BOOST_AUTO_TEST_CASE(NetworkModification)
-{
- armnn::Network net;
-
- armnn::IConnectableLayer* const inputLayer = net.AddInputLayer(0, "input layer");
- BOOST_TEST(inputLayer);
-
- unsigned int dims[] = { 10,1,1,1 };
- std::vector<float> convWeightsData(10);
- armnn::ConstTensor weights(armnn::TensorInfo(4, dims, armnn::DataType::Float32), convWeightsData);
-
- armnn::Convolution2dDescriptor convDesc2d;
- armnn::IConnectableLayer* const convLayer = net.AddConvolution2dLayer(convDesc2d, weights, "conv layer");
- BOOST_TEST(convLayer);
-
- inputLayer->GetOutputSlot(0).Connect(convLayer->GetInputSlot(0));
-
- armnn::FullyConnectedDescriptor fullyConnectedDesc;
- armnn::IConnectableLayer* const fullyConnectedLayer = net.AddFullyConnectedLayer(fullyConnectedDesc,
- weights,
- "fully connected");
- BOOST_TEST(fullyConnectedLayer);
-
- convLayer->GetOutputSlot(0).Connect(fullyConnectedLayer->GetInputSlot(0));
-
- armnn::Pooling2dDescriptor pooling2dDesc;
- armnn::IConnectableLayer* const poolingLayer = net.AddPooling2dLayer(pooling2dDesc, "pooling2d");
- BOOST_TEST(poolingLayer);
-
- fullyConnectedLayer->GetOutputSlot(0).Connect(poolingLayer->GetInputSlot(0));
-
- armnn::ActivationDescriptor activationDesc;
- armnn::IConnectableLayer* const activationLayer = net.AddActivationLayer(activationDesc, "activation");
- BOOST_TEST(activationLayer);
-
- poolingLayer->GetOutputSlot(0).Connect(activationLayer->GetInputSlot(0));
-
- armnn::NormalizationDescriptor normalizationDesc;
- armnn::IConnectableLayer* const normalizationLayer = net.AddNormalizationLayer(normalizationDesc, "normalization");
- BOOST_TEST(normalizationLayer);
-
- activationLayer->GetOutputSlot(0).Connect(normalizationLayer->GetInputSlot(0));
-
- armnn::SoftmaxDescriptor softmaxDesc;
- armnn::IConnectableLayer* const softmaxLayer = net.AddSoftmaxLayer(softmaxDesc, "softmax");
- BOOST_TEST(softmaxLayer);
-
- normalizationLayer->GetOutputSlot(0).Connect(softmaxLayer->GetInputSlot(0));
-
- armnn::BatchNormalizationDescriptor batchNormDesc;
-
- armnn::TensorInfo tensorInfo({ 1 }, armnn::DataType::Float32);
- std::vector<float> data(tensorInfo.GetNumBytes() / sizeof(float));
- armnn::ConstTensor invalidTensor(tensorInfo, data);
-
- armnn::IConnectableLayer* const batchNormalizationLayer = net.AddBatchNormalizationLayer(batchNormDesc,
- invalidTensor,
- invalidTensor,
- invalidTensor,
- invalidTensor,
- "batch norm");
- BOOST_TEST(batchNormalizationLayer);
-
- softmaxLayer->GetOutputSlot(0).Connect(batchNormalizationLayer->GetInputSlot(0));
-
- armnn::IConnectableLayer* const additionLayer = net.AddAdditionLayer("addition");
- BOOST_TEST(additionLayer);
-
- batchNormalizationLayer->GetOutputSlot(0).Connect(additionLayer->GetInputSlot(0));
- batchNormalizationLayer->GetOutputSlot(0).Connect(additionLayer->GetInputSlot(1));
-
- armnn::IConnectableLayer* const multiplicationLayer = net.AddMultiplicationLayer("multiplication");
- BOOST_TEST(multiplicationLayer);
-
- additionLayer->GetOutputSlot(0).Connect(multiplicationLayer->GetInputSlot(0));
- additionLayer->GetOutputSlot(0).Connect(multiplicationLayer->GetInputSlot(1));
-
- armnn::IConnectableLayer* const outputLayer = net.AddOutputLayer(0, "output layer");
- BOOST_TEST(outputLayer);
-
- multiplicationLayer->GetOutputSlot(0).Connect(outputLayer->GetInputSlot(0));
-
- //Test that all layers are present in the graph
- BOOST_TEST(net.GetGraph().GetNumLayers() == 11);
-
- //Test that the vertices exist and have correct names
- BOOST_TEST(GraphHasNamedLayer(net.GetGraph(), "input layer"));
- BOOST_TEST(GraphHasNamedLayer(net.GetGraph(), "conv layer"));
- BOOST_TEST(GraphHasNamedLayer(net.GetGraph(), "fully connected"));
- BOOST_TEST(GraphHasNamedLayer(net.GetGraph(), "pooling2d"));
- BOOST_TEST(GraphHasNamedLayer(net.GetGraph(), "activation"));
- BOOST_TEST(GraphHasNamedLayer(net.GetGraph(), "normalization"));
- BOOST_TEST(GraphHasNamedLayer(net.GetGraph(), "softmax"));
- BOOST_TEST(GraphHasNamedLayer(net.GetGraph(), "batch norm"));
- BOOST_TEST(GraphHasNamedLayer(net.GetGraph(), "addition"));
- BOOST_TEST(GraphHasNamedLayer(net.GetGraph(), "multiplication"));
- BOOST_TEST(GraphHasNamedLayer(net.GetGraph(), "output layer"));
-
- auto checkOneOutputToOneInputConnection = []
- (const armnn::IConnectableLayer* const srcLayer,
- const armnn::IConnectableLayer* const tgtLayer,
- int expectedSrcNumInputs = 1,
- int expectedDstNumOutputs = 1)
- {
- BOOST_TEST(srcLayer->GetNumInputSlots() == expectedSrcNumInputs);
- BOOST_TEST(srcLayer->GetNumOutputSlots() == 1);
- BOOST_TEST(tgtLayer->GetNumInputSlots() == 1);
- BOOST_TEST(tgtLayer->GetNumOutputSlots() == expectedDstNumOutputs);
-
- BOOST_TEST(srcLayer->GetOutputSlot(0).GetNumConnections() == 1);
- BOOST_TEST(srcLayer->GetOutputSlot(0).GetConnection(0) == &tgtLayer->GetInputSlot(0));
- BOOST_TEST(&srcLayer->GetOutputSlot(0) == tgtLayer->GetInputSlot(0).GetConnection());
- };
- auto checkOneOutputToTwoInputsConnections = []
- (const armnn::IConnectableLayer* const srcLayer,
- const armnn::IConnectableLayer* const tgtLayer,
- int expectedSrcNumInputs,
- int expectedDstNumOutputs = 1)
- {
- BOOST_TEST(srcLayer->GetNumInputSlots() == expectedSrcNumInputs);
- BOOST_TEST(srcLayer->GetNumOutputSlots() == 1);
- BOOST_TEST(tgtLayer->GetNumInputSlots() == 2);
- BOOST_TEST(tgtLayer->GetNumOutputSlots() == expectedDstNumOutputs);
-
- BOOST_TEST(srcLayer->GetOutputSlot(0).GetNumConnections() == 2);
- for (unsigned int i = 0; i < srcLayer->GetOutputSlot(0).GetNumConnections(); ++i)
- {
- BOOST_TEST(srcLayer->GetOutputSlot(0).GetConnection(i) == &tgtLayer->GetInputSlot(i));
- BOOST_TEST(&srcLayer->GetOutputSlot(0) == tgtLayer->GetInputSlot(i).GetConnection());
- }
- };
-
- BOOST_TEST(AreAllLayerInputSlotsConnected(*convLayer));
- BOOST_TEST(AreAllLayerInputSlotsConnected(*fullyConnectedLayer));
- BOOST_TEST(AreAllLayerInputSlotsConnected(*poolingLayer));
- BOOST_TEST(AreAllLayerInputSlotsConnected(*activationLayer));
- BOOST_TEST(AreAllLayerInputSlotsConnected(*normalizationLayer));
- BOOST_TEST(AreAllLayerInputSlotsConnected(*softmaxLayer));
- BOOST_TEST(AreAllLayerInputSlotsConnected(*batchNormalizationLayer));
- BOOST_TEST(AreAllLayerInputSlotsConnected(*additionLayer));
- BOOST_TEST(AreAllLayerInputSlotsConnected(*multiplicationLayer));
- BOOST_TEST(AreAllLayerInputSlotsConnected(*outputLayer));
-
- // Check connectivity
- checkOneOutputToOneInputConnection(inputLayer, convLayer, 0);
- checkOneOutputToOneInputConnection(convLayer, fullyConnectedLayer);
- checkOneOutputToOneInputConnection(fullyConnectedLayer, poolingLayer);
- checkOneOutputToOneInputConnection(poolingLayer, activationLayer);
- checkOneOutputToOneInputConnection(activationLayer, normalizationLayer);
- checkOneOutputToOneInputConnection(normalizationLayer, softmaxLayer);
- checkOneOutputToOneInputConnection(softmaxLayer, batchNormalizationLayer);
- checkOneOutputToTwoInputsConnections(batchNormalizationLayer, additionLayer, 1);
- checkOneOutputToTwoInputsConnections(additionLayer, multiplicationLayer, 2);
- checkOneOutputToOneInputConnection(multiplicationLayer, outputLayer, 2, 0);
-}
-
-BOOST_AUTO_TEST_CASE(NetworkModification_SplitterMerger)
-{
- armnn::Network net;
-
- // Add an input layer and an input tensor descriptor.
- armnn::IConnectableLayer* inputLayer = net.AddInputLayer(0, "input layer");
- BOOST_TEST(inputLayer);
-
- // Add a splitter layer
- armnn::ViewsDescriptor splitterDesc(2,4);
-
- armnn::IConnectableLayer* splitterLayer = net.AddSplitterLayer(splitterDesc, "splitter layer");
- BOOST_TEST(splitterLayer);
-
- inputLayer->GetOutputSlot(0).Connect(splitterLayer->GetInputSlot(0));
-
- // Add a softmax layer 1
- armnn::SoftmaxDescriptor softmaxDescriptor;
- armnn::IConnectableLayer* softmaxLayer1 = net.AddSoftmaxLayer(softmaxDescriptor, "softmax_1");
- BOOST_TEST(softmaxLayer1);
-
- splitterLayer->GetOutputSlot(0).Connect(softmaxLayer1->GetInputSlot(0));
-
- // Add a softmax layer 2
- armnn::IConnectableLayer* softmaxLayer2 = net.AddSoftmaxLayer(softmaxDescriptor, "softmax_2");
- BOOST_TEST(softmaxLayer2);
-
- splitterLayer->GetOutputSlot(1).Connect(softmaxLayer2->GetInputSlot(0));
-
- // Add a merger layer
- armnn::OriginsDescriptor mergerDesc(2, 4);
-
- armnn::IConnectableLayer* mergerLayer = net.AddMergerLayer(mergerDesc, "merger layer");
- BOOST_TEST(mergerLayer);
-
- softmaxLayer1->GetOutputSlot(0).Connect(mergerLayer->GetInputSlot(0));
- softmaxLayer2->GetOutputSlot(0).Connect(mergerLayer->GetInputSlot(1));
-
- // Add an output layer
- armnn::IConnectableLayer* outputLayer = net.AddOutputLayer(0, "output layer");
- BOOST_TEST(outputLayer);
-
- mergerLayer->GetOutputSlot(0).Connect(outputLayer->GetInputSlot(0));
-
- BOOST_TEST(splitterLayer->GetNumOutputSlots() == 2);
- BOOST_TEST(splitterLayer->GetOutputSlot(0).GetConnection(0) == &softmaxLayer1->GetInputSlot(0));
- BOOST_TEST(&splitterLayer->GetOutputSlot(0) == softmaxLayer1->GetInputSlot(0).GetConnection());
- BOOST_TEST(splitterLayer->GetOutputSlot(1).GetConnection(0) == &softmaxLayer2->GetInputSlot(0));
- BOOST_TEST(&splitterLayer->GetOutputSlot(1) == softmaxLayer2->GetInputSlot(0).GetConnection());
-
- BOOST_TEST(mergerLayer->GetNumInputSlots() == 2);
- BOOST_TEST(softmaxLayer1->GetOutputSlot(0).GetConnection(0) == &mergerLayer->GetInputSlot(0));
- BOOST_TEST(&softmaxLayer1->GetOutputSlot(0) == mergerLayer->GetInputSlot(0).GetConnection());
- BOOST_TEST(softmaxLayer2->GetOutputSlot(0).GetConnection(0) == &mergerLayer->GetInputSlot(1));
- BOOST_TEST(&softmaxLayer2->GetOutputSlot(0) == mergerLayer->GetInputSlot(1).GetConnection());
-}
-
-BOOST_AUTO_TEST_CASE(NetworkModification_SplitterAddition)
-{
- armnn::Network net;
-
- // Add an input layer and an input tensor descriptor.
- armnn::IConnectableLayer* layer = net.AddInputLayer(0, "input layer");
- BOOST_TEST(layer);
-
- // Add a splitter layer
- armnn::ViewsDescriptor splitterDesc(2,4);
-
- armnn::IConnectableLayer* const splitterLayer = net.AddSplitterLayer(splitterDesc, "splitter layer");
- BOOST_TEST(splitterLayer);
-
- layer->GetOutputSlot(0).Connect(splitterLayer->GetInputSlot(0));
-
- // Add a softmax layer 1
- armnn::SoftmaxDescriptor softmaxDescriptor;
- armnn::IConnectableLayer* const softmax1Layer = net.AddSoftmaxLayer(softmaxDescriptor, "softmax_1");
- BOOST_TEST(softmax1Layer);
-
- splitterLayer->GetOutputSlot(0).Connect(softmax1Layer->GetInputSlot(0));
-
- // Add a softmax layer 2
- armnn::IConnectableLayer* const softmax2Layer = net.AddSoftmaxLayer(softmaxDescriptor, "softmax_2");
- BOOST_TEST(softmax2Layer);
-
- splitterLayer->GetOutputSlot(1).Connect(softmax2Layer->GetInputSlot(0));
-
- // Add addition layer
- layer = net.AddAdditionLayer("add layer");
- BOOST_TEST(layer);
-
- softmax1Layer->GetOutputSlot(0).Connect(layer->GetInputSlot(0));
- softmax2Layer->GetOutputSlot(0).Connect(layer->GetInputSlot(1));
-
- // Add an output layer
- armnn::IConnectableLayer* prevLayer = layer;
- layer = net.AddOutputLayer(0, "output layer");
-
- prevLayer->GetOutputSlot(0).Connect(layer->GetInputSlot(0));
-
- BOOST_TEST(layer);
-}
-
-BOOST_AUTO_TEST_CASE(NetworkModification_SplitterMultiplication)
-{
- armnn::Network net;
-
- // Add an input layer and an input tensor descriptor.
- armnn::IConnectableLayer* layer = net.AddInputLayer(0, "input layer");
- BOOST_TEST(layer);
-
- // Add a splitter layer
- armnn::ViewsDescriptor splitterDesc(2,4);
- armnn::IConnectableLayer* const splitterLayer = net.AddSplitterLayer(splitterDesc, "splitter layer");
- BOOST_TEST(splitterLayer);
-
- layer->GetOutputSlot(0).Connect(splitterLayer->GetInputSlot(0));
-
- // Add a softmax layer 1
- armnn::SoftmaxDescriptor softmaxDescriptor;
- armnn::IConnectableLayer* const softmax1Layer = net.AddSoftmaxLayer(softmaxDescriptor, "softmax_1");
- BOOST_TEST(softmax1Layer);
-
- splitterLayer->GetOutputSlot(0).Connect(softmax1Layer->GetInputSlot(0));
-
- // Add a softmax layer 2
- armnn::IConnectableLayer* const softmax2Layer = net.AddSoftmaxLayer(softmaxDescriptor, "softmax_2");
- BOOST_TEST(softmax2Layer);
-
- splitterLayer->GetOutputSlot(1).Connect(softmax2Layer->GetInputSlot(0));
-
- // Add multiplication layer
- layer = net.AddMultiplicationLayer("multiplication layer");
- BOOST_TEST(layer);
-
- softmax1Layer->GetOutputSlot(0).Connect(layer->GetInputSlot(0));
- softmax2Layer->GetOutputSlot(0).Connect(layer->GetInputSlot(1));
-
- // Add an output layer
- armnn::IConnectableLayer* prevLayer = layer;
- layer = net.AddOutputLayer(0, "output layer");
- BOOST_TEST(layer);
-
- prevLayer->GetOutputSlot(0).Connect(layer->GetInputSlot(0));
-}
-
-BOOST_AUTO_TEST_CASE(ValidateWorkloads)
-{
- const armnn::TensorInfo desc({3, 5}, armnn::DataType::Float32);
-
- armnn::Network net;
-
- armnn::NormalizationDescriptor nmDesc;
- armnn::ActivationDescriptor acDesc;
-
- // in
- // |
- // nm
- // / |
- // ac |
- // \ |
- // ml
- // |
- // sm
- // |
- // ot
- armnn::IConnectableLayer* layer = net.AddInputLayer(0, "in");
- layer->GetOutputSlot(0).SetTensorInfo(desc);
-
- armnn::IConnectableLayer* const normLayer = net.AddNormalizationLayer(nmDesc, "nm");
-
- layer->GetOutputSlot(0).Connect(normLayer->GetInputSlot(0));
- normLayer->GetOutputSlot(0).SetTensorInfo(desc);
-
- layer = net.AddActivationLayer(acDesc, "ac");
-
- normLayer->GetOutputSlot(0).Connect(layer->GetInputSlot(0));
- layer->GetOutputSlot(0).SetTensorInfo(desc);
-
- armnn::IConnectableLayer* prevLayer = layer;
- layer = net.AddMultiplicationLayer("ml");
-
- prevLayer->GetOutputSlot(0).Connect(layer->GetInputSlot(0));
- normLayer->GetOutputSlot(0).Connect(layer->GetInputSlot(1));
- layer->GetOutputSlot(0).SetTensorInfo(desc);
-
- prevLayer = layer;
- armnn::SoftmaxDescriptor softmaxDescriptor;
- layer = net.AddSoftmaxLayer(softmaxDescriptor, "sm");
-
- prevLayer->GetOutputSlot(0).Connect(layer->GetInputSlot(0));
- layer->GetOutputSlot(0).SetTensorInfo(desc);
-
- prevLayer = layer;
- layer = net.AddOutputLayer(0, "ot");
-
- prevLayer->GetOutputSlot(0).Connect(layer->GetInputSlot(0));
-
- armnn::DeviceSpec spec;
- spec.DefaultComputeDevice = armnn::Compute::CpuRef;
-
- armnn::IOptimizedNetworkPtr optNet = Optimize(net, spec);
- static_cast<armnn::OptimizedNetwork*>(optNet.get())->GetGraph().AllocateDynamicBuffers();
-
- // validate workloads
- armnn::RefWorkloadFactory fact;
- for (auto&& layer : static_cast<armnn::OptimizedNetwork*>(optNet.get())->GetGraph())
- {
- BOOST_CHECK_NO_THROW(
- layer->CreateWorkload(static_cast<armnn::OptimizedNetwork*>(optNet.get())->GetGraph(), fact));
- }
-}
-
-BOOST_AUTO_TEST_SUITE_END()
diff --git a/src/armnn/test/ObservableTest.cpp b/src/armnn/test/ObservableTest.cpp
new file mode 100644
index 0000000000..6588f3469e
--- /dev/null
+++ b/src/armnn/test/ObservableTest.cpp
@@ -0,0 +1,94 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// See LICENSE file in the project root for full license information.
+//
+
+#include <boost/test/unit_test.hpp>
+
+#include "Graph.hpp"
+#include "Observable.hpp"
+
+BOOST_AUTO_TEST_SUITE(Observable)
+
+BOOST_AUTO_TEST_CASE(AddedLayerObservableTest)
+{
+ armnn::Graph graph;
+
+ // Create a graph observable
+ armnn::AddedLayerObservable layerObservable(graph);
+
+ // Add a few layers
+ auto output = graph.AddLayer<armnn::OutputLayer>(0, "output");
+ auto input = graph.InsertNewLayer<armnn::InputLayer>(output->GetInputSlot(0), 0, "input");
+
+ // Check the observable has observed the changes
+ std::list<armnn::Layer*> testLayers({ output, input });
+
+ BOOST_CHECK_EQUAL_COLLECTIONS(layerObservable.begin(), layerObservable.end(),
+ testLayers.begin(), testLayers.end());
+}
+
+BOOST_AUTO_TEST_CASE(ClearAddedLayerObservableTest)
+{
+ armnn::Graph graph;
+
+ // Create a graph observable
+ armnn::AddedLayerObservable addedLayerObservable(graph);
+
+ // Add a few layers
+ auto output = graph.AddLayer<armnn::OutputLayer>(0, "output");
+ graph.InsertNewLayer<armnn::InputLayer>(output->GetInputSlot(0), 0, "input");
+
+ addedLayerObservable.Clear();
+
+ // Check the observable has observed the changes
+ std::list<armnn::Layer*> emptyList({});
+
+ BOOST_CHECK_EQUAL_COLLECTIONS(addedLayerObservable.begin(), addedLayerObservable.end(),
+ emptyList.begin(), emptyList.end());
+}
+
+BOOST_AUTO_TEST_CASE(ErasedLayerNamesObservableTest)
+{
+ armnn::Graph graph;
+
+ // Create a graph observable
+ armnn::ErasedLayerNamesObservable erasedLayerNamesObservable(graph);
+
+ // Add a few layers
+ auto output = graph.AddLayer<armnn::OutputLayer>(0, "output");
+ graph.InsertNewLayer<armnn::InputLayer>(output->GetInputSlot(0), 0, "input");
+
+ graph.EraseLayer(output);
+
+ // Check the observable has observed the changes
+ std::list<std::string> testList({"output"});
+
+ BOOST_CHECK_EQUAL_COLLECTIONS(erasedLayerNamesObservable.begin(), erasedLayerNamesObservable.end(),
+ testList.begin(), testList.end());
+}
+
+BOOST_AUTO_TEST_CASE(ClearErasedLayerNamesObservableTest)
+{
+ armnn::Graph graph;
+
+ // Create a graph observable
+ armnn::ErasedLayerNamesObservable erasedLayerNamesObservable(graph);
+
+ // Add a few layers
+ auto output = graph.AddLayer<armnn::OutputLayer>(0, "output");
+ graph.InsertNewLayer<armnn::InputLayer>(output->GetInputSlot(0), 0, "input");
+
+ graph.EraseLayer(output);
+
+ erasedLayerNamesObservable.Clear();
+
+ // Check the observable has observed the changes
+ std::list<std::string> emptyList({});
+
+ BOOST_CHECK_EQUAL_COLLECTIONS(erasedLayerNamesObservable.begin(), erasedLayerNamesObservable.end(),
+ emptyList.begin(), emptyList.end());
+}
+
+BOOST_AUTO_TEST_SUITE_END()
+
diff --git a/src/armnn/test/OpenClTimerTest.cpp b/src/armnn/test/OpenClTimerTest.cpp
new file mode 100644
index 0000000000..b8dea8ebe0
--- /dev/null
+++ b/src/armnn/test/OpenClTimerTest.cpp
@@ -0,0 +1,137 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// See LICENSE file in the project root for full license information.
+//
+
+#if (defined(__aarch64__)) || (defined(__x86_64__)) // disable test failing on FireFly/Armv7
+
+#include "arm_compute/runtime/CL/CLScheduler.h"
+#include "backends/ClContextControl.hpp"
+#include "backends/ClWorkloadFactory.hpp"
+#include "backends/CpuTensorHandle.hpp"
+#include <boost/format.hpp>
+#include <iostream>
+#include "OpenClTimer.hpp"
+#include "backends/test/TensorCopyUtils.hpp"
+#include "TensorHelpers.hpp"
+#include <boost/test/unit_test.hpp>
+#include "backends/WorkloadFactory.hpp"
+#include "backends/test/WorkloadTestUtils.hpp"
+
+using namespace armnn;
+
+struct OpenClFixture
+{
+ // Initialising ClContextControl to ensure OpenCL is loaded correctly for each test case.
+ // NOTE: Profiling needs to be enabled in ClContextControl to be able to obtain execution
+ // times from OpenClTimer.
+ OpenClFixture() : m_ClContextControl(nullptr, true) {}
+ ~OpenClFixture() {}
+
+ ClContextControl m_ClContextControl;
+};
+
+BOOST_FIXTURE_TEST_SUITE(OpenClTimerBatchNorm, OpenClFixture)
+using FactoryType = ClWorkloadFactory;
+
+BOOST_AUTO_TEST_CASE(OpenClTimerBatchNorm)
+{
+ ClWorkloadFactory workloadFactory;
+
+ const unsigned int width = 2;
+ const unsigned int height = 3;
+ const unsigned int channels = 2;
+ const unsigned int num = 1;
+ int32_t qOffset = 0;
+ float qScale = 0.f;
+
+ TensorInfo inputTensorInfo({num, channels, height, width}, GetDataType<float>());
+ TensorInfo outputTensorInfo({num, channels, height, width}, GetDataType<float>());
+ TensorInfo tensorInfo({channels}, GetDataType<float>());
+
+ // Set quantization parameters if the requested type is a quantized type.
+ if(IsQuantizedType<float>())
+ {
+ inputTensorInfo.SetQuantizationScale(qScale);
+ inputTensorInfo.SetQuantizationOffset(qOffset);
+ outputTensorInfo.SetQuantizationScale(qScale);
+ outputTensorInfo.SetQuantizationOffset(qOffset);
+ tensorInfo.SetQuantizationScale(qScale);
+ tensorInfo.SetQuantizationOffset(qOffset);
+ }
+
+ auto input = MakeTensor<float, 4>(inputTensorInfo,
+ QuantizedVector<float>(qScale, qOffset,
+ {
+ 1.f, 4.f,
+ 4.f, 2.f,
+ 1.f, 6.f,
+
+ 1.f, 1.f,
+ 4.f, 1.f,
+ -2.f, 4.f
+ }));
+ // these values are per-channel of the input
+ auto mean = MakeTensor<float, 1>(tensorInfo, QuantizedVector<float>(qScale, qOffset, {3, -2}));
+ auto variance = MakeTensor<float, 1>(tensorInfo, QuantizedVector<float>(qScale, qOffset, {4, 9}));
+ auto beta = MakeTensor<float, 1>(tensorInfo, QuantizedVector<float>(qScale, qOffset, {3, 2}));
+ auto gamma = MakeTensor<float, 1>(tensorInfo, QuantizedVector<float>(qScale, qOffset, {2, 1}));
+
+ std::unique_ptr<ITensorHandle> inputHandle = workloadFactory.CreateTensorHandle(inputTensorInfo);
+ std::unique_ptr<ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo);
+
+ BatchNormalizationQueueDescriptor data;
+ WorkloadInfo info;
+ ScopedCpuTensorHandle meanTensor(tensorInfo);
+ ScopedCpuTensorHandle varianceTensor(tensorInfo);
+ ScopedCpuTensorHandle betaTensor(tensorInfo);
+ ScopedCpuTensorHandle gammaTensor(tensorInfo);
+
+ AllocateAndCopyDataToITensorHandle(&meanTensor, &mean[0]);
+ AllocateAndCopyDataToITensorHandle(&varianceTensor, &variance[0]);
+ AllocateAndCopyDataToITensorHandle(&betaTensor, &beta[0]);
+ AllocateAndCopyDataToITensorHandle(&gammaTensor, &gamma[0]);
+
+ AddInputToWorkload(data, info, inputTensorInfo, inputHandle.get());
+ AddOutputToWorkload(data, info, outputTensorInfo, outputHandle.get());
+ data.m_Mean = &meanTensor;
+ data.m_Variance = &varianceTensor;
+ data.m_Beta = &betaTensor;
+ data.m_Gamma = &gammaTensor;
+ data.m_Parameters.m_Eps = 0.0f;
+
+ // for each channel:
+ // substract mean, divide by standard deviation (with an epsilon to avoid div by 0)
+ // multiply by gamma and add beta
+ std::unique_ptr<IWorkload> workload = workloadFactory.CreateBatchNormalization(data, info);
+
+ inputHandle->Allocate();
+ outputHandle->Allocate();
+
+ CopyDataToITensorHandle(inputHandle.get(), &input[0][0][0][0]);
+
+ OpenClTimer openClTimer;
+
+ BOOST_CHECK_EQUAL(openClTimer.GetName(), "OpenClKernelTimer");
+
+ //Start the timer
+ openClTimer.Start();
+
+ //Execute the workload
+ workload->Execute();
+
+ //Stop the timer
+ openClTimer.Stop();
+
+ BOOST_CHECK_EQUAL(openClTimer.GetMeasurements().size(), 1);
+
+ BOOST_CHECK_EQUAL(openClTimer.GetMeasurements().front().m_Name,
+ "OpenClKernelTimer/0: batchnormalization_layer_nchw GWS[1,3,2]");
+
+ BOOST_CHECK(openClTimer.GetMeasurements().front().m_Value > 0);
+
+}
+
+BOOST_AUTO_TEST_SUITE_END()
+
+#endif //aarch64 or x86_64 \ No newline at end of file
diff --git a/src/armnn/test/OptimizerTests.cpp b/src/armnn/test/OptimizerTests.cpp
index da26fba76e..0c1a2619b2 100644
--- a/src/armnn/test/OptimizerTests.cpp
+++ b/src/armnn/test/OptimizerTests.cpp
@@ -7,6 +7,8 @@
#include "armnn/ArmNN.hpp"
#include "Graph.hpp"
#include "Optimizer.hpp"
+#include "backends/CpuTensorHandle.hpp"
+#include "FloatingPointConverter.hpp"
namespace
{
@@ -21,7 +23,7 @@ bool CheckSequence(const armnn::Graph::ConstIterator first, const armnn::Graph::
return (first == last);
}
-/// Check each unary function in Us evaluates true for each correspondent layer in the sequence [first, last)
+/// Checks each unary function in Us evaluates true for each correspondent layer in the sequence [first, last).
template <typename U, typename... Us>
bool CheckSequence(const armnn::Graph::ConstIterator first,
const armnn::Graph::ConstIterator last,
@@ -30,11 +32,149 @@ bool CheckSequence(const armnn::Graph::ConstIterator first,
{
return u(*first) && CheckSequence(std::next(first), last, us...);
}
+
+template <typename LayerT>
+bool CheckRelatedLayers(armnn::Graph& graph, const std::list<std::string>& testRelatedLayers)
+{
+ for (auto& layer : graph)
+ {
+ if (layer->GetType() == armnn::LayerEnumOf<LayerT>())
+ {
+ auto& relatedLayers = layer->GetRelatedLayerNames();
+ if(!std::equal(relatedLayers.begin(), relatedLayers.end(),
+ testRelatedLayers.begin(), testRelatedLayers.end()))
+ {
+ return false;
+ }
+ }
+ }
+
+ return true;
+}
+
+// connects two layers
+using namespace armnn;
+void Connect(Layer* from, Layer* to, const TensorInfo& tensorInfo, unsigned int fromIndex = 0, unsigned int toIndex = 0)
+{
+ from->GetOutputSlot(fromIndex).Connect(to->GetInputSlot(toIndex));
+ from->GetOutputHandler(fromIndex).SetTensorInfo(tensorInfo);
+}
+
+void CreateLSTMLayerHelper(Graph &graph, bool CifgEnabled)
+{
+ LstmDescriptor layerDesc;
+ layerDesc.m_ActivationFunc = 4;
+ layerDesc.m_ClippingThresCell = 0.2f;
+ layerDesc.m_ClippingThresProj = 0.4f;
+ layerDesc.m_CifgEnabled = CifgEnabled;
+ layerDesc.m_PeepholeEnabled = false;
+ layerDesc.m_ProjectionEnabled = false;
+
+ LstmLayer* const layer = graph.AddLayer<LstmLayer>(layerDesc, "layer");
+ unsigned int batchSize = 3;
+ unsigned int inputSize = 2;
+ unsigned int numUnits = 4;
+ unsigned int outputSize = 4;
+
+ layer->m_BasicParameters.m_InputToForgetWeights = std::make_unique<ScopedCpuTensorHandle>
+ (TensorInfo({ numUnits, inputSize }, DataType::Float32));
+ layer->m_BasicParameters.m_InputToCellWeights = std::make_unique<ScopedCpuTensorHandle>
+ (TensorInfo({ numUnits, inputSize }, DataType::Float32));
+ layer->m_BasicParameters.m_InputToOutputWeights = std::make_unique<ScopedCpuTensorHandle>
+ (TensorInfo({ numUnits, inputSize }, DataType::Float32));
+ layer->m_BasicParameters.m_RecurrentToForgetWeights = std::make_unique<ScopedCpuTensorHandle>
+ (TensorInfo({ numUnits, outputSize }, DataType::Float32));
+ layer->m_BasicParameters.m_RecurrentToCellWeights = std::make_unique<ScopedCpuTensorHandle>
+ (TensorInfo({ numUnits, outputSize }, DataType::Float32));
+ layer->m_BasicParameters.m_RecurrentToOutputWeights = std::make_unique<ScopedCpuTensorHandle>
+ (TensorInfo({ numUnits, outputSize }, DataType::Float32));
+ layer->m_BasicParameters.m_ForgetGateBias = std::make_unique<ScopedCpuTensorHandle>
+ (TensorInfo({ numUnits }, DataType::Float32));
+ layer->m_BasicParameters.m_CellBias = std::make_unique<ScopedCpuTensorHandle>
+ (TensorInfo({ numUnits }, DataType::Float32));
+ layer->m_BasicParameters.m_OutputGateBias = std::make_unique<ScopedCpuTensorHandle>
+ (TensorInfo({ numUnits }, DataType::Float32));
+
+ layer->m_BasicParameters.m_InputToForgetWeights->Allocate();
+ layer->m_BasicParameters.m_InputToCellWeights->Allocate();
+ layer->m_BasicParameters.m_InputToOutputWeights->Allocate();
+ layer->m_BasicParameters.m_RecurrentToForgetWeights->Allocate();
+ layer->m_BasicParameters.m_RecurrentToCellWeights->Allocate();
+ layer->m_BasicParameters.m_RecurrentToOutputWeights->Allocate();
+ layer->m_BasicParameters.m_ForgetGateBias->Allocate();
+ layer->m_BasicParameters.m_CellBias->Allocate();
+ layer->m_BasicParameters.m_OutputGateBias->Allocate();
+
+ if (!layerDesc.m_CifgEnabled)
+ {
+ layer->m_CifgParameters.m_InputToInputWeights = std::make_unique<ScopedCpuTensorHandle>
+ (TensorInfo({ numUnits, inputSize }, DataType::Float32));
+ layer->m_CifgParameters.m_RecurrentToInputWeights = std::make_unique<ScopedCpuTensorHandle>
+ (TensorInfo({ numUnits, outputSize }, DataType::Float32));
+ layer->m_CifgParameters.m_CellToInputWeights = std::make_unique<ScopedCpuTensorHandle>
+ (TensorInfo({ numUnits }, DataType::Float32));
+ layer->m_CifgParameters.m_InputGateBias = std::make_unique<ScopedCpuTensorHandle>
+ (TensorInfo({ numUnits }, DataType::Float32));
+ layer->m_CifgParameters.m_InputToInputWeights->Allocate();
+ layer->m_CifgParameters.m_RecurrentToInputWeights->Allocate();
+ layer->m_CifgParameters.m_CellToInputWeights->Allocate();
+ layer->m_CifgParameters.m_InputGateBias->Allocate();
+ }
+
+ if (layerDesc.m_ProjectionEnabled)
+ {
+ layer->m_ProjectionParameters.m_ProjectionWeights = std::make_unique<ScopedCpuTensorHandle>
+ (TensorInfo({ outputSize, numUnits }, DataType::Float32));
+ layer->m_ProjectionParameters.m_ProjectionBias = std::make_unique<ScopedCpuTensorHandle>
+ (TensorInfo({ outputSize }, DataType::Float32));
+ layer->m_ProjectionParameters.m_ProjectionWeights->Allocate();
+ layer->m_ProjectionParameters.m_ProjectionBias->Allocate();
+ }
+
+ if (layerDesc.m_PeepholeEnabled)
+ {
+ layer->m_PeepholeParameters.m_CellToForgetWeights = std::make_unique<ScopedCpuTensorHandle>
+ (TensorInfo({ numUnits }, DataType::Float32));
+ layer->m_PeepholeParameters.m_CellToOutputWeights = std::make_unique<ScopedCpuTensorHandle>
+ (TensorInfo({ numUnits }, DataType::Float32));
+ layer->m_PeepholeParameters.m_CellToForgetWeights->Allocate();
+ layer->m_PeepholeParameters.m_CellToOutputWeights->Allocate();
+ }
+
+ // create input and output layers
+ Layer* const input = graph.AddLayer<InputLayer>(0, "input");
+ Layer* const outputStateIn = graph.AddLayer<InputLayer>(1, "outputStateIn");
+ Layer* const cellStateIn = graph.AddLayer<InputLayer>(2, "cellStateIn");
+ Layer* const scratchBuffer = graph.AddLayer<OutputLayer>(0, "scratchBuffer");
+ Layer* const outputStateOut = graph.AddLayer<OutputLayer>(1, "outputStateOut");
+ Layer* const cellStateOut = graph.AddLayer<OutputLayer>(2, "cellStateOut");
+ Layer* const output = graph.AddLayer<OutputLayer>(3, "output");
+
+ // connect up
+ armnn::TensorInfo lstmTensorInfo1({ batchSize, inputSize }, DataType::Float32);
+ armnn::TensorInfo lstmTensorInfo2({ batchSize, numUnits}, DataType::Float32);
+ armnn::TensorInfo lstmTensorInfo3({ batchSize, outputSize }, DataType::Float32);
+ armnn::TensorInfo lstmTensorInfoScratchBuff({ batchSize, numUnits*3 }, DataType::Float32);
+ if (layerDesc.m_CifgEnabled)
+ {
+ lstmTensorInfoScratchBuff.SetShape({ batchSize, numUnits*4 });
+ }
+
+ Connect(input, layer, lstmTensorInfo1, 0, 0);
+ Connect(cellStateIn, layer, lstmTensorInfo2, 0, 1);
+ Connect(outputStateIn, layer, lstmTensorInfo3, 0, 2);
+ Connect(layer, scratchBuffer, lstmTensorInfoScratchBuff, 0, 0);
+ Connect(layer, outputStateOut, lstmTensorInfo3, 1, 0);
+ Connect(layer, cellStateOut, lstmTensorInfo2, 2, 0);
+ Connect(layer, output, lstmTensorInfo3, 3, 0);
+}
+
}
BOOST_AUTO_TEST_SUITE(Optimizer)
+using namespace armnn::optimizations;
-BOOST_AUTO_TEST_CASE(OptimizeInversePermutes)
+BOOST_AUTO_TEST_CASE(OptimizeInversePermutesTest)
{
armnn::Graph graph;
@@ -42,7 +182,7 @@ BOOST_AUTO_TEST_CASE(OptimizeInversePermutes)
graph.InsertNewLayer<armnn::InputLayer>(output->GetInputSlot(0), 0, "input");
- // Insert two permutes, one the inverse of the other
+ // Inserts two permutes, one the inverse of the other.
graph.InsertNewLayer<armnn::PermuteLayer>(output->GetInputSlot(0),
armnn::PermuteDescriptor({0, 2, 3, 1}),
"perm0231");
@@ -57,16 +197,38 @@ BOOST_AUTO_TEST_CASE(OptimizeInversePermutes)
&IsLayerOfType<armnn::PermuteLayer>,
&IsLayerOfType<armnn::OutputLayer>));
- armnn::Optimizer::Optimize(graph);
+ armnn::Optimizer::Pass(graph, armnn::MakeOptimizations(OptimizeInversePermutes()));
- // The permutes are removed
+ // The permutes are removed.
BOOST_TEST(CheckSequence(graph.cbegin(),
graph.cend(),
&IsLayerOfType<armnn::InputLayer>,
&IsLayerOfType<armnn::OutputLayer>));
}
-BOOST_AUTO_TEST_CASE(MovePermuteUp)
+BOOST_AUTO_TEST_CASE(LSTMValidateTensorShapesFromInputsCIFGDisabledTest)
+{
+ Graph graph;
+
+ //Helper function creates graph containing LSTM layer with required input and output layers
+ CreateLSTMLayerHelper(graph, false);
+
+ //This function used to call ValidateShapesFromInputs();
+ BOOST_CHECK_NO_THROW(graph.InferTensorInfos());
+}
+
+BOOST_AUTO_TEST_CASE(LSTMValidateTensorShapesFromInputsCIFGEnabledTest)
+{
+ Graph graph;
+
+ //Helper function creates graph containing LSTM layer with required input and output layers
+ CreateLSTMLayerHelper(graph, true);
+
+ //This function used to call ValidateShapesFromInputs();
+ BOOST_CHECK_NO_THROW(graph.InferTensorInfos());
+}
+
+BOOST_AUTO_TEST_CASE(MovePermuteUpTest)
{
const armnn::TensorInfo info({ 1, 5, 2, 3 }, armnn::DataType::Float32);
const armnn::TensorInfo permuted({ 1, 3, 5, 2 }, armnn::DataType::Float32);
@@ -77,12 +239,16 @@ BOOST_AUTO_TEST_CASE(MovePermuteUp)
armnn::Layer* head = graph.AddLayer<armnn::OutputLayer>(0, "output");
+ std::string permuteLayerName = "original_permute";
+
// Insert permute
head = graph.InsertNewLayer<armnn::PermuteLayer>(head->GetInputSlot(0),
- armnn::PermuteDescriptor({ 0, 2, 3, 1 }), "");
+ armnn::PermuteDescriptor({ 0, 2, 3, 1 }),
+ permuteLayerName.c_str());
+
head->GetOutputHandler().SetTensorInfo(permuted);
- // Insert layers that don't care about data format
+ // Inserts layers that don't care about data format.
head = graph.InsertNewLayer<armnn::ActivationLayer>(head->GetInputSlot(0),
armnn::ActivationDescriptor{}, "");
head->GetOutputHandler().SetTensorInfo(info);
@@ -90,7 +256,7 @@ BOOST_AUTO_TEST_CASE(MovePermuteUp)
head = graph.InsertNewLayer<armnn::AdditionLayer>(head->GetInputSlot(0), "");
head->GetOutputHandler().SetTensorInfo(info);
- // Insert input for 2nd input of Addition
+ // Inserts input for 2nd input of Addition.
graph.InsertNewLayer<armnn::InputLayer>(head->GetInputSlot(1), inputId++, "")
->GetOutputHandler().SetTensorInfo(info);
@@ -107,11 +273,11 @@ BOOST_AUTO_TEST_CASE(MovePermuteUp)
head = graph.InsertNewLayer<armnn::MultiplicationLayer>(head->GetInputSlot(0), "");
head->GetOutputHandler().SetTensorInfo(info);
- // Insert input for 2nd input of Multiplication
+ // Inserts input for 2nd input of Multiplication.
graph.InsertNewLayer<armnn::InputLayer>(head->GetInputSlot(1), inputId++, "")
->GetOutputHandler().SetTensorInfo(info);
- // Insert input
+ // Inserts input.
graph.InsertNewLayer<armnn::InputLayer>(head->GetInputSlot(0), inputId++, "")
->GetOutputHandler().SetTensorInfo(info);
@@ -129,9 +295,9 @@ BOOST_AUTO_TEST_CASE(MovePermuteUp)
&IsLayerOfType<armnn::PermuteLayer>,
&IsLayerOfType<armnn::OutputLayer>));
- armnn::Optimizer::Optimize(graph);
+ armnn::Optimizer::Pass(graph, armnn::MakeOptimizations(MovePermuteUp()));
- // The permute is moved to the top. New permutes for layers with multiple inputs
+ // The permute is moved to the top. New permutes for layers with multiple inputs.
BOOST_TEST(CheckSequence(graph.cbegin(),
graph.cend(),
&IsLayerOfType<armnn::InputLayer>,
@@ -147,12 +313,18 @@ BOOST_AUTO_TEST_CASE(MovePermuteUp)
&IsLayerOfType<armnn::AdditionLayer>,
&IsLayerOfType<armnn::ActivationLayer>,
&IsLayerOfType<armnn::OutputLayer>));
+
+ std::list<std::string> testRelatedLayers = { permuteLayerName };
+
+ BOOST_TEST(CheckRelatedLayers<armnn::PermuteLayer>(graph, testRelatedLayers));
}
-BOOST_AUTO_TEST_CASE(PermuteAsReshape)
+BOOST_AUTO_TEST_CASE(PermuteAsReshapeTest)
{
armnn::Graph graph;
+ std::string permuteLayerName = "permute";
+
const armnn::TensorInfo infoIn({ 1, 2, 3, 1 }, armnn::DataType::Float32);
const armnn::TensorInfo infoOut({ 1, 1, 2, 3 }, armnn::DataType::Float32);
@@ -161,9 +333,9 @@ BOOST_AUTO_TEST_CASE(PermuteAsReshape)
graph.InsertNewLayer<armnn::InputLayer>(output->GetInputSlot(0), 0, "input")
->GetOutputHandler().SetTensorInfo(infoIn);
- // Insert permute
+ // Inserts permute.
graph.InsertNewLayer<armnn::PermuteLayer>(output->GetInputSlot(0),
- armnn::PermuteDescriptor({ 0, 2, 3, 1 }), "")
+ armnn::PermuteDescriptor({ 0, 2, 3, 1 }), permuteLayerName.c_str())
->GetOutputHandler().SetTensorInfo(infoOut);
BOOST_TEST(CheckSequence(graph.cbegin(),
@@ -172,7 +344,7 @@ BOOST_AUTO_TEST_CASE(PermuteAsReshape)
&IsLayerOfType<armnn::PermuteLayer>,
&IsLayerOfType<armnn::OutputLayer>));
- armnn::Optimizer::Optimize(graph);
+ armnn::Optimizer::Pass(graph, armnn::MakeOptimizations(PermuteAsReshape()));
// The permute is replaced by an equivalent reshape.
@@ -189,9 +361,13 @@ BOOST_AUTO_TEST_CASE(PermuteAsReshape)
&IsLayerOfType<armnn::InputLayer>,
checkReshape,
&IsLayerOfType<armnn::OutputLayer>));
+
+
+ std::list<std::string> testRelatedLayers = { permuteLayerName };
+ BOOST_TEST(CheckRelatedLayers<armnn::ReshapeLayer>(graph, testRelatedLayers));
}
-BOOST_AUTO_TEST_CASE(OptimizeConsecutiveReshapes)
+BOOST_AUTO_TEST_CASE(OptimizeConsecutiveReshapesTest)
{
armnn::Graph graph;
@@ -203,16 +379,19 @@ BOOST_AUTO_TEST_CASE(OptimizeConsecutiveReshapes)
input->GetOutputHandler().SetTensorInfo(info0);
{
- // Insert two reshapes
+ // Inserts two reshapes.
const armnn::TensorInfo info1({1, 30, 1, 1}, armnn::DataType::Float32);
const armnn::TensorInfo info2({1, 2, 1, 15}, armnn::DataType::Float32);
+ std::string reshape1Name = "reshape1";
+ std::string reshape2Name = "reshape2";
+
auto reshape1 = graph.InsertNewLayer<armnn::ReshapeLayer>(output->GetInputSlot(0),
armnn::ReshapeDescriptor{ info1.GetShape() },
- "reshape1");
+ reshape1Name.c_str());
auto reshape2 = graph.InsertNewLayer<armnn::ReshapeLayer>(output->GetInputSlot(0),
armnn::ReshapeDescriptor{ info2.GetShape() },
- "reshape2");
+ reshape2Name.c_str());
reshape1->GetOutputHandler().SetTensorInfo(info1);
reshape2->GetOutputHandler().SetTensorInfo(info2);
@@ -224,7 +403,7 @@ BOOST_AUTO_TEST_CASE(OptimizeConsecutiveReshapes)
&IsLayerOfType<armnn::ReshapeLayer>,
&IsLayerOfType<armnn::OutputLayer>));
- armnn::Optimizer::Optimize(graph);
+ armnn::Optimizer::Pass(graph, armnn::MakeOptimizations(OptimizeConsecutiveReshapes()));
auto checkReshape = [&info2](const armnn::Layer* const layer) -> bool
{
@@ -234,25 +413,30 @@ BOOST_AUTO_TEST_CASE(OptimizeConsecutiveReshapes)
(reshapeLayer->GetOutputHandler().GetTensorInfo().GetShape() == info2.GetShape());
};
- // The two reshapes are replaced by a single equivalent reshape
+ // The two reshapes are replaced by a single equivalent reshape.
BOOST_TEST(CheckSequence(graph.cbegin(),
graph.cend(),
&IsLayerOfType<armnn::InputLayer>,
checkReshape,
&IsLayerOfType<armnn::OutputLayer>));
+
+ // Check the new reshape layer has the other two reshapes as related layers
+ std::list<std::string> testRelatedLayers = { reshape2Name, reshape1Name };
+
+ BOOST_TEST(CheckRelatedLayers<armnn::ReshapeLayer>(graph, testRelatedLayers));
}
{
- // Insert a reshape to the input shape
+ // Inserts a reshape to the input shape.
auto reshapeToIn = graph.InsertNewLayer<armnn::ReshapeLayer>(output->GetInputSlot(0),
armnn::ReshapeDescriptor{ info0.GetShape() },
"reshapeToIn");
reshapeToIn->GetOutputHandler().SetTensorInfo(info0);
- armnn::Optimizer::Optimize(graph);
+ armnn::Optimizer::Pass(graph, armnn::MakeOptimizations(OptimizeConsecutiveReshapes()));
- // The two reshapes are removed
+ // The two reshapes are removed.
BOOST_TEST(CheckSequence(graph.cbegin(),
graph.cend(),
&IsLayerOfType<armnn::InputLayer>,
@@ -260,7 +444,7 @@ BOOST_AUTO_TEST_CASE(OptimizeConsecutiveReshapes)
}
}
-BOOST_AUTO_TEST_CASE(SquashEqualSiblings)
+BOOST_AUTO_TEST_CASE(SquashEqualSiblingsTest)
{
armnn::Graph graph;
@@ -272,7 +456,7 @@ BOOST_AUTO_TEST_CASE(SquashEqualSiblings)
auto input = graph.AddLayer<armnn::InputLayer>(0, "input");
input->GetOutputSlot().SetTensorInfo(info);
- // Insert equal permutes, equal reshapes and something else
+ // Inserts equal permutes, equal reshapes and something else.
const armnn::PermuteDescriptor permDesc({ 0, 2, 3, 1 });
const armnn::ReshapeDescriptor reshapeDesc{ { 1, 3, 1, 5 } };
@@ -314,7 +498,8 @@ BOOST_AUTO_TEST_CASE(SquashEqualSiblings)
&IsLayerOfType<armnn::OutputLayer>,
&IsLayerOfType<armnn::OutputLayer>));
- armnn::Optimizer::Optimize(graph);
+ armnn::Optimizer::Pass(graph, armnn::MakeOptimizations(SquashEqualPermuteSiblings(),
+ SquashEqualReshapeSiblings()));
// The permutes and reshapes are squashed.
@@ -331,4 +516,259 @@ BOOST_AUTO_TEST_CASE(SquashEqualSiblings)
&IsLayerOfType<armnn::OutputLayer>));
}
+BOOST_AUTO_TEST_CASE(ConvertConstantsHalfToFloatTest)
+{
+ armnn::Graph graph;
+
+ const armnn::TensorInfo info({ 1,1,1,2 }, armnn::DataType::Float32);
+
+ // Create the half precision input data
+ unsigned int dims[] = { 4,1,1,1 };
+ std::vector<float> convWeightsData{1.f, 2.f, 3.f, 4.f};
+ std::vector<uint16_t> halfWeights(4);
+ armnnUtils::FloatingPointConverter::ConvertFloat32To16(convWeightsData.data(),
+ convWeightsData.size(),
+ halfWeights.data());
+ armnn::ConstTensor weights(armnn::TensorInfo(4, dims, armnn::DataType::Float16), halfWeights);
+
+ //Create the simple test network
+ auto input = graph.AddLayer<armnn::InputLayer>(0, "input");
+ input->GetOutputSlot().SetTensorInfo(info);
+
+ auto fc = graph.AddLayer<armnn::FullyConnectedLayer>(armnn::FullyConnectedDescriptor(), "fc");
+ fc->m_Weight = std::make_unique<armnn::ScopedCpuTensorHandle>(weights);
+ fc->GetOutputSlot().SetTensorInfo(info);
+
+ auto output = graph.AddLayer<armnn::OutputLayer>(1, "output");
+
+ //Connect up the layers
+ input->GetOutputSlot().Connect(fc->GetInputSlot(0));
+ fc->GetOutputSlot().Connect(output->GetInputSlot(0));
+
+ //Test the tensor info is correct.
+ BOOST_CHECK(fc->m_Weight->GetTensorInfo().GetDataType() == armnn::DataType::Float16);
+
+ // Run the optimizer
+ armnn::Optimizer::Pass(graph, armnn::MakeOptimizations(ConvertConstantsHalfToFloat()));
+
+ //Test the tensor info is correct.
+ BOOST_CHECK(fc->m_Weight->GetTensorInfo().GetDataType() == armnn::DataType::Float32);
+
+ // Now test the data matches float32 data
+ float* data = fc->m_Weight->GetTensor<float>();
+ BOOST_CHECK(1.0f == data[0]);
+ BOOST_CHECK(2.0f == data[1]);
+ BOOST_CHECK(3.0f == data[2]);
+ BOOST_CHECK(4.0f == data[3]);
+}
+
+BOOST_AUTO_TEST_CASE(ConvertConstantsFloatToHalfTest)
+{
+ armnn::Graph graph;
+
+ const armnn::TensorInfo info({ 1, 1, 1, 2 }, armnn::DataType::Float16);
+
+ // Create const tensor from fp32 data
+ unsigned int dims[] = { 4, 1, 1, 1 };
+ std::vector<float> floatWeights{ 1.0f, 2.0f, 3.0f, 4.0f };
+ armnn::ConstTensor weights(armnn::TensorInfo(4, dims, armnn::DataType::Float32), floatWeights);
+
+ // Create simple test network
+ auto input = graph.AddLayer<armnn::InputLayer>(0, "input");
+ input->GetOutputSlot().SetTensorInfo(info);
+
+ auto fc = graph.AddLayer<armnn::FullyConnectedLayer>(armnn::FullyConnectedDescriptor(), "fc");
+ fc->m_Weight = std::make_unique<armnn::ScopedCpuTensorHandle>(weights);
+ fc->GetOutputSlot().SetTensorInfo(info);
+
+ auto output = graph.AddLayer<armnn::OutputLayer>(1, "output");
+
+ // Connect up the layers
+ input->GetOutputSlot().Connect(fc->GetInputSlot(0));
+ fc->GetOutputSlot().Connect(output->GetInputSlot(0));
+
+ // Check tensor data type before conversion
+ BOOST_CHECK(fc->m_Weight->GetTensorInfo().GetDataType() == armnn::DataType::Float32);
+
+ // Run the optimizer
+ armnn::Optimizer::Pass(graph, armnn::MakeOptimizations(ConvertConstantsFloatToHalf()));
+
+ // Check tensor data type after conversion
+ BOOST_CHECK(fc->m_Weight->GetTensorInfo().GetDataType() == armnn::DataType::Float16);
+
+ // Check whether data matches expected fp16 data
+ Half* data = fc->m_Weight->GetTensor<Half>();
+ BOOST_CHECK(data[0] == Half(1.0f));
+ BOOST_CHECK(data[1] == Half(2.0f));
+ BOOST_CHECK(data[2] == Half(3.0f));
+ BOOST_CHECK(data[3] == Half(4.0f));
+}
+
+BOOST_AUTO_TEST_CASE(OptimizeInverseConversionsTest)
+{
+ armnn::Graph graph;
+
+ auto output = graph.AddLayer<armnn::OutputLayer>(0, "output");
+
+ graph.InsertNewLayer<armnn::InputLayer>(output->GetInputSlot(0), 0, "input");
+
+ // Fp32ToFp16 conversion followed by an inverse Fp16ToFp32 conversion
+ graph.InsertNewLayer<armnn::ConvertFp32ToFp16Layer>(output->GetInputSlot(0), "convert1");
+ graph.InsertNewLayer<armnn::ConvertFp16ToFp32Layer>(output->GetInputSlot(0), "convert2");
+
+ graph.InsertNewLayer<armnn::Convolution2dLayer>(output->GetInputSlot(0), Convolution2dDescriptor(), "conv");
+
+ // Fp16ToFp32 conversion followed by an inverse Fp32ToFp16 conversion
+ graph.InsertNewLayer<armnn::ConvertFp16ToFp32Layer>(output->GetInputSlot(0), "convert3");
+ graph.InsertNewLayer<armnn::ConvertFp32ToFp16Layer>(output->GetInputSlot(0), "convert4");
+
+ BOOST_TEST(CheckSequence(graph.cbegin(),
+ graph.cend(),
+ &IsLayerOfType<armnn::InputLayer>,
+ &IsLayerOfType<armnn::ConvertFp32ToFp16Layer>,
+ &IsLayerOfType<armnn::ConvertFp16ToFp32Layer>,
+ &IsLayerOfType<armnn::Convolution2dLayer>,
+ &IsLayerOfType<armnn::ConvertFp16ToFp32Layer>,
+ &IsLayerOfType<armnn::ConvertFp32ToFp16Layer>,
+ &IsLayerOfType<armnn::OutputLayer>));
+
+ armnn::Optimizer::Pass(graph, armnn::MakeOptimizations(OptimizeInverseConversionsFp16(),
+ OptimizeInverseConversionsFp32()));
+
+ // Check that all consecutive inverse conversions are removed
+ BOOST_TEST(CheckSequence(graph.cbegin(),
+ graph.cend(),
+ &IsLayerOfType<armnn::InputLayer>,
+ &IsLayerOfType<armnn::Convolution2dLayer>,
+ &IsLayerOfType<armnn::OutputLayer>));
+}
+
+BOOST_AUTO_TEST_CASE(InsertConvertersTest)
+{
+ const armnn::TensorInfo info({ 1, 5, 2, 3 }, armnn::DataType::Float16);
+
+ armnn::Graph graph;
+
+ armnn::LayerBindingId inputId = 0;
+
+ armnn::Layer* head = graph.AddLayer<armnn::OutputLayer>(0, "output");
+
+ head = graph.InsertNewLayer<armnn::AdditionLayer>(head->GetInputSlot(0), "");
+ head->GetOutputHandler().SetTensorInfo(info);
+
+ graph.InsertNewLayer<armnn::InputLayer>(head->GetInputSlot(1), inputId++, "")
+ ->GetOutputHandler().SetTensorInfo(info);
+
+ head = graph.InsertNewLayer<armnn::FloorLayer>(head->GetInputSlot(0), "");
+ head->GetOutputHandler().SetTensorInfo(info);
+
+ head = graph.InsertNewLayer<armnn::MemCopyLayer>(head->GetInputSlot(0), "");
+ head->GetOutputHandler().SetTensorInfo(info);
+
+ graph.InsertNewLayer<armnn::InputLayer>(head->GetInputSlot(0), inputId++, "")
+ ->GetOutputHandler().SetTensorInfo(info);
+
+ // Check graph layer sequence before inserting convert layers
+ BOOST_TEST(CheckSequence(graph.cbegin(),
+ graph.cend(),
+ &IsLayerOfType<armnn::InputLayer>,
+ &IsLayerOfType<armnn::InputLayer>,
+ &IsLayerOfType<armnn::MemCopyLayer>,
+ &IsLayerOfType<armnn::FloorLayer>,
+ &IsLayerOfType<armnn::AdditionLayer>,
+ &IsLayerOfType<armnn::OutputLayer>));
+
+ // Check layers have Float16 DataType
+ for (auto& layer : graph)
+ {
+ if(layer->GetType()==LayerType::Floor || layer->GetType() == LayerType::Addition)
+ {
+ BOOST_ASSERT(layer->GetOutputSlot(0).GetTensorInfo().GetDataType() == DataType::Float16);
+ BOOST_ASSERT(layer->GetDataType() == DataType::Float16);
+ }
+ }
+
+ // Insert convert layers either side of unsupported layer
+ for (auto& layer : graph)
+ {
+ if(layer->GetType()==LayerType::Floor || layer->GetType() == LayerType::Addition)
+ {
+ InsertConvertFp16ToFp32LayersBefore(graph, *layer);
+ InsertConvertFp32ToFp16LayersAfter(graph, *layer);
+ }
+ }
+
+ // Check layers have correct DataType after inserting convert layers
+ for (auto& layer : graph)
+ {
+ if (layer->GetType()==LayerType::Floor || layer->GetType() == LayerType::Addition)
+ {
+ BOOST_ASSERT(layer->GetOutputSlot(0).GetTensorInfo().GetDataType() == DataType::Float32);
+ BOOST_ASSERT(layer->GetDataType() == DataType::Float32);
+ }
+ else if (layer->GetType() == LayerType::ConvertFp16ToFp32)
+ {
+ BOOST_ASSERT(layer->GetOutputSlot(0).GetTensorInfo().GetDataType() == DataType::Float32);
+ BOOST_ASSERT(layer->GetDataType() == DataType::Float16);
+ }
+ else if (layer->GetType() == LayerType::ConvertFp32ToFp16)
+ {
+ BOOST_ASSERT(layer->GetOutputSlot(0).GetTensorInfo().GetDataType() == DataType::Float16);
+ BOOST_ASSERT(layer->GetDataType() == DataType::Float32);
+ }
+ }
+
+ // Check sequence of layers after inserting convert layers
+ BOOST_TEST(CheckSequence(graph.cbegin(),
+ graph.cend(),
+ &IsLayerOfType<armnn::InputLayer>,
+ &IsLayerOfType<armnn::InputLayer>,
+ &IsLayerOfType<armnn::ConvertFp16ToFp32Layer>,
+ &IsLayerOfType<armnn::MemCopyLayer>,
+ &IsLayerOfType<armnn::ConvertFp16ToFp32Layer>,
+ &IsLayerOfType<armnn::FloorLayer>,
+ &IsLayerOfType<armnn::ConvertFp32ToFp16Layer>,
+ &IsLayerOfType<armnn::ConvertFp16ToFp32Layer>,
+ &IsLayerOfType<armnn::AdditionLayer>,
+ &IsLayerOfType<armnn::ConvertFp32ToFp16Layer>,
+ &IsLayerOfType<armnn::OutputLayer>));
+}
+
+BOOST_AUTO_TEST_CASE(Fp32NetworkToFp16OptimizationTest)
+{
+ armnn::Graph graph;
+
+ const armnn::TensorInfo infoFP32({ 2,2,1,3 }, armnn::DataType::Float32);
+
+ // Create the simple test network
+ auto input = graph.AddLayer<armnn::InputLayer>(0, "input");
+ input->GetOutputSlot().SetTensorInfo(infoFP32);
+
+ auto floor = graph.AddLayer<armnn::FloorLayer>("floor");
+ floor->GetOutputSlot().SetTensorInfo(infoFP32);
+
+ auto output = graph.AddLayer<armnn::OutputLayer>(1, "output");
+
+ // Connect up the layers
+ input->GetOutputSlot().Connect(floor->GetInputSlot(0));
+ floor->GetOutputSlot().Connect(output->GetInputSlot(0));
+
+ BOOST_TEST(CheckSequence(graph.cbegin(),
+ graph.cend(),
+ &IsLayerOfType<armnn::InputLayer>,
+ &IsLayerOfType<armnn::FloorLayer>,
+ &IsLayerOfType<armnn::OutputLayer>));
+
+ // Run the optimizer
+ armnn::Optimizer::Pass(graph, armnn::MakeOptimizations(Fp32NetworkToFp16Converter()));
+
+ BOOST_TEST(CheckSequence(graph.cbegin(),
+ graph.cend(),
+ &IsLayerOfType<armnn::InputLayer>,
+ &IsLayerOfType<armnn::ConvertFp32ToFp16Layer>,
+ &IsLayerOfType<armnn::FloorLayer>,
+ &IsLayerOfType<armnn::ConvertFp16ToFp32Layer>,
+ &IsLayerOfType<armnn::OutputLayer>));
+}
+
BOOST_AUTO_TEST_SUITE_END()
diff --git a/src/armnn/test/ProfilerTests.cpp b/src/armnn/test/ProfilerTests.cpp
new file mode 100644
index 0000000000..4450c5a08e
--- /dev/null
+++ b/src/armnn/test/ProfilerTests.cpp
@@ -0,0 +1,235 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// See LICENSE file in the project root for full license information.
+//
+
+#include <boost/test/unit_test.hpp>
+#include <boost/test/output_test_stream.hpp>
+#include <boost/algorithm/string.hpp>
+
+#include <memory>
+#include <thread>
+
+#include <armnn/TypesUtils.hpp>
+#include <Profiling.hpp>
+
+namespace armnn
+{
+
+size_t GetProfilerEventSequenceSize(armnn::Profiler* profiler)
+{
+ if (!profiler)
+ {
+ return static_cast<size_t>(-1);
+ }
+
+ return profiler->m_EventSequence.size();
+}
+} // namespace armnn
+
+namespace
+{
+
+void RegisterUnregisterProfilerSingleThreadImpl()
+{
+ // Important! Regular assertions must be used in this function for testing (rather than
+ // BOOST_TEST macros) otherwise multi-threading tests would randomly fail.
+
+ // Get a reference to the profiler manager.
+ armnn::ProfilerManager& profilerManager = armnn::ProfilerManager::GetInstance();
+
+ // Check that there's no profiler registered for this thread.
+ assert(!profilerManager.GetProfiler());
+
+ // Create and register a profiler for this thread.
+ std::unique_ptr<armnn::Profiler> profiler = std::make_unique<armnn::Profiler>();
+ profilerManager.RegisterProfiler(profiler.get());
+
+ // Check that on a single thread we get the same profiler we registered.
+ assert(profiler.get() == profilerManager.GetProfiler());
+
+ // Destroy the profiler.
+ profiler.reset();
+
+ // Check that the profiler has been un-registered for this thread.
+ assert(!profilerManager.GetProfiler());
+}
+
+} // namespace
+
+BOOST_AUTO_TEST_SUITE(Profiler)
+
+BOOST_AUTO_TEST_CASE(EnableDisableProfiling)
+{
+ std::unique_ptr<armnn::Profiler> profiler = std::make_unique<armnn::Profiler>();
+
+ // Check that profiling is disabled by default.
+ BOOST_TEST(!profiler->IsProfilingEnabled());
+
+ // Enable profiling.
+ profiler->EnableProfiling(true);
+
+ // Check that profiling is enabled.
+ BOOST_TEST(profiler->IsProfilingEnabled());
+
+ // Disable profiling.
+ profiler->EnableProfiling(false);
+
+ // Check that profiling is disabled.
+ BOOST_TEST(!profiler->IsProfilingEnabled());
+}
+
+BOOST_AUTO_TEST_CASE(RegisterUnregisterProfilerSingleThread)
+{
+ RegisterUnregisterProfilerSingleThreadImpl();
+}
+
+BOOST_AUTO_TEST_CASE(RegisterUnregisterProfilerMultipleThreads)
+{
+ std::thread thread1([]() { RegisterUnregisterProfilerSingleThreadImpl(); });
+ std::thread thread2([]() { RegisterUnregisterProfilerSingleThreadImpl(); });
+ std::thread thread3([]() { RegisterUnregisterProfilerSingleThreadImpl(); });
+
+ thread1.join();
+ thread2.join();
+ thread3.join();
+}
+
+BOOST_AUTO_TEST_CASE(ProfilingMacros)
+{
+ // Get a reference to the profiler manager.
+ armnn::ProfilerManager& profilerManager = armnn::ProfilerManager::GetInstance();
+
+ { // --- No profiler ---
+
+ // Check that there's no profiler registered for this thread.
+ BOOST_TEST(!profilerManager.GetProfiler());
+
+ // Test scoped event.
+ { ARMNN_SCOPED_PROFILING_EVENT(armnn::Compute::CpuAcc, "test"); }
+
+ // Check that we still cannot get a profiler for this thread.
+ BOOST_TEST(!profilerManager.GetProfiler());
+ }
+
+ // Create and register a profiler for this thread.
+ std::unique_ptr<armnn::Profiler> profiler = std::make_unique<armnn::Profiler>();
+ profilerManager.RegisterProfiler(profiler.get());
+
+ { // --- Profiler, but profiling disabled ---
+
+ // Get current event sequence size.
+ size_t eventSequenceSizeBefore = armnn::GetProfilerEventSequenceSize(profiler.get());
+
+ // Test scoped macro.
+ { ARMNN_SCOPED_PROFILING_EVENT(armnn::Compute::CpuAcc, "test"); }
+
+ // Check that no profiling event has been added to the sequence.
+ size_t eventSequenceSizeAfter = armnn::GetProfilerEventSequenceSize(profiler.get());
+ BOOST_TEST(eventSequenceSizeBefore == eventSequenceSizeAfter);
+ }
+
+ // Enable profiling.
+ profiler->EnableProfiling(true);
+
+ { // --- Profiler, and profiling enabled ---
+
+ // Get current event sequence size.
+ size_t eventSequenceSizeBefore = armnn::GetProfilerEventSequenceSize(profiler.get());
+
+ // Test scoped macro.
+ { ARMNN_SCOPED_PROFILING_EVENT(armnn::Compute::CpuAcc, "test"); }
+
+ // Check that a profiling event has been added to the sequence.
+ size_t eventSequenceSizeAfter = armnn::GetProfilerEventSequenceSize(profiler.get());
+ BOOST_TEST(eventSequenceSizeAfter == eventSequenceSizeBefore + 1);
+ }
+
+ // Disable profiling here to not print out anything on stdout.
+ profiler->EnableProfiling(false);
+}
+
+BOOST_AUTO_TEST_CASE(RuntimeLoadNetwork)
+{
+ // Get a reference to the profiler manager.
+ armnn::ProfilerManager& profilerManager = armnn::ProfilerManager::GetInstance();
+
+ // Check that there's no profiler registered for this thread.
+ BOOST_TEST(!profilerManager.GetProfiler());
+
+ // Build a mock-network and load it into the runtime.
+ armnn::IRuntime::CreationOptions options;
+ armnn::IRuntimePtr runtime(armnn::IRuntime::Create(options));
+ armnn::NetworkId networkIdentifier = 1;
+ armnn::INetworkPtr mockNetwork(armnn::INetwork::Create());
+ mockNetwork->AddInputLayer(0, "test layer");
+ std::vector<armnn::Compute> backends = { armnn::Compute::CpuRef };
+ runtime->LoadNetwork(networkIdentifier, armnn::Optimize(*mockNetwork, backends, runtime->GetDeviceSpec()));
+
+ // Check that now there's a profiler registered for this thread (created and registered by the loading the network).
+ BOOST_TEST(profilerManager.GetProfiler());
+
+ // Unload the network.
+ runtime->UnloadNetwork(networkIdentifier);
+
+ // Check that the profiler has been un-registered for this thread.
+ BOOST_TEST(!profilerManager.GetProfiler());
+}
+
+BOOST_AUTO_TEST_CASE(WriteEventResults)
+{
+ // Get a reference to the profiler manager.
+ armnn::ProfilerManager& profileManager = armnn::ProfilerManager::GetInstance();
+
+ // Create and register a profiler for this thread.
+ std::unique_ptr<armnn::Profiler> profiler = std::make_unique<armnn::Profiler>();
+ profileManager.RegisterProfiler(profiler.get());
+
+ // Enable profiling.
+ profiler->EnableProfiling(true);
+
+ { // --- Profiler, and profiling enabled ---
+
+ // Get current event sequence size.
+ size_t eventSequenceSizeBefore = armnn::GetProfilerEventSequenceSize(profiler.get());
+
+ // Test scoped macro.
+ {
+ // Need to directly create a ScopedProfilingEvent as the one created by the macro falls out of scope
+ // immediately causing the Event.Stop() function method to be called immediately after the Event.Start()
+ // function resulting in periodic test failures on the Dent and Smith HiKeys
+ armnn::ScopedProfilingEvent testEvent(armnn::Compute::CpuAcc, "test", armnn::WallClockTimer());
+ std::this_thread::sleep_for(std::chrono::milliseconds(10));
+ }
+
+ // Check that a profiling event has been added to the sequence.
+ size_t eventSequenceSizeAfter = armnn::GetProfilerEventSequenceSize(profiler.get());
+ BOOST_TEST(eventSequenceSizeAfter == eventSequenceSizeBefore + 1);
+
+ boost::test_tools::output_test_stream output;
+ profiler->AnalyzeEventsAndWriteResults(output);
+ BOOST_TEST(!output.is_empty(false));
+
+ // output should contain event name 'test'
+ BOOST_CHECK(boost::contains(output.str(), "test"));
+
+ // output should contain headers
+ BOOST_CHECK(boost::contains(output.str(), "Event Sequence - Name"));
+ BOOST_CHECK(boost::contains(output.str(), "Event Stats - Name"));
+ BOOST_CHECK(boost::contains(output.str(), "Total"));
+ BOOST_CHECK(boost::contains(output.str(), "Device"));
+ // output should contain compute device 'CpuAcc'
+ BOOST_CHECK(boost::contains(output.str(), "CpuAcc"));
+ // output should not contain un-readable numbers
+ BOOST_CHECK(!(boost::contains(output.str(), "e+")));
+ // output should not contain un-readable numbers
+ BOOST_CHECK(!(boost::contains(output.str(), "+")));
+ // output should not contain zero value
+ BOOST_CHECK(!(boost::contains(output.str(), " 0 ")));
+ }
+
+ // Disable profiling here to not print out anything on stdout.
+ profiler->EnableProfiling(false);
+}
+
+BOOST_AUTO_TEST_SUITE_END()
diff --git a/src/armnn/test/ProfilingEventTest.cpp b/src/armnn/test/ProfilingEventTest.cpp
new file mode 100644
index 0000000000..4d0319d456
--- /dev/null
+++ b/src/armnn/test/ProfilingEventTest.cpp
@@ -0,0 +1,95 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// See LICENSE file in the project root for full license information.
+//
+#include <boost/test/unit_test.hpp>
+
+#include "ProfilingEvent.hpp"
+#include "Profiling.hpp"
+#include <thread>
+
+using namespace armnn;
+
+BOOST_AUTO_TEST_SUITE(ProfilingEvent)
+
+BOOST_AUTO_TEST_CASE(ProfilingEventTest)
+{
+ // Get a reference to the profiler manager.
+ armnn::ProfilerManager& profileManager = armnn::ProfilerManager::GetInstance();
+
+ const char* eventName = "EventName";
+
+ Event::Instruments insts1;
+ insts1.emplace_back(std::make_unique<WallClockTimer>());
+ Event testEvent(eventName,
+ nullptr,
+ nullptr,
+ armnn::Compute::Undefined,
+ std::move(insts1));
+
+ BOOST_CHECK_EQUAL(testEvent.GetName(), "EventName");
+
+ // start the timer - outer
+ testEvent.Start();
+
+ // wait for 10 milliseconds
+ std::this_thread::sleep_for(std::chrono::milliseconds(10));
+
+ // stop the timer - outer
+ testEvent.Stop();
+
+ BOOST_CHECK_GE(testEvent.GetMeasurements().front().m_Value, 10.0);
+
+ // create a sub event with CpuAcc
+ Event::Instruments insts2;
+ insts2.emplace_back(std::make_unique<WallClockTimer>());
+ Event testEvent2(eventName,
+ profileManager.GetProfiler(),
+ &testEvent,
+ Compute::CpuAcc,
+ std::move(insts2));
+
+ BOOST_CHECK_EQUAL(&testEvent, testEvent2.GetParentEvent());
+ BOOST_CHECK_EQUAL(profileManager.GetProfiler(), testEvent2.GetProfiler());
+ BOOST_CHECK_EQUAL(Compute::CpuAcc, testEvent2.GetComputeDevice());
+}
+
+BOOST_AUTO_TEST_CASE(ProfilingEventTestOnGpuAcc)
+{
+ // Get a reference to the profiler manager.
+ armnn::ProfilerManager& profileManager = armnn::ProfilerManager::GetInstance();
+
+ const char* eventName = "GPUEvent";
+
+ Event::Instruments insts1;
+ insts1.emplace_back(std::make_unique<WallClockTimer>());
+ Event testEvent(eventName,
+ nullptr,
+ nullptr,
+ armnn::Compute::Undefined,
+ std::move(insts1));
+
+ BOOST_CHECK_EQUAL(testEvent.GetName(), "GPUEvent");
+
+ // start the timer - outer
+ testEvent.Start();
+
+ // wait for 10 milliseconds
+ std::this_thread::sleep_for(std::chrono::milliseconds(10));
+
+ // stop the timer - outer
+ testEvent.Stop();
+
+ BOOST_CHECK_GE(testEvent.GetMeasurements().front().m_Value, 10.0);
+
+ // create a sub event
+ Event::Instruments insts2;
+ insts2.emplace_back(std::make_unique<WallClockTimer>());
+ Event testEvent2(eventName, profileManager.GetProfiler(), &testEvent, Compute::GpuAcc, std::move(insts2));
+
+ BOOST_CHECK_EQUAL(&testEvent, testEvent2.GetParentEvent());
+ BOOST_CHECK_EQUAL(profileManager.GetProfiler(), testEvent2.GetProfiler());
+ BOOST_CHECK_EQUAL(Compute::GpuAcc, testEvent2.GetComputeDevice());
+}
+
+BOOST_AUTO_TEST_SUITE_END()
diff --git a/src/armnn/test/RuntimeTests.cpp b/src/armnn/test/RuntimeTests.cpp
index fcb0a1e7c2..e29a1d4841 100644
--- a/src/armnn/test/RuntimeTests.cpp
+++ b/src/armnn/test/RuntimeTests.cpp
@@ -32,33 +32,46 @@ BOOST_AUTO_TEST_SUITE(Runtime)
BOOST_AUTO_TEST_CASE(RuntimeUnloadNetwork)
{
// build 2 mock-networks and load them into the runtime
- armnn::IRuntimePtr runtime(armnn::IRuntime::Create(armnn::Compute::CpuRef));
+ armnn::IRuntime::CreationOptions options;
+ armnn::IRuntimePtr runtime(armnn::IRuntime::Create(options));
- // mock network 1
+ // Mock network 1.
armnn::NetworkId networkIdentifier1 = 1;
armnn::INetworkPtr mockNetwork1(armnn::INetwork::Create());
mockNetwork1->AddInputLayer(0, "test layer");
- runtime->LoadNetwork(networkIdentifier1, Optimize(*mockNetwork1, runtime->GetDeviceSpec()));
+ std::vector<armnn::Compute> backends = {armnn::Compute::CpuRef};
+ runtime->LoadNetwork(networkIdentifier1, Optimize(*mockNetwork1, backends, runtime->GetDeviceSpec()));
- // mock network 2
+ // Mock network 2.
armnn::NetworkId networkIdentifier2 = 2;
armnn::INetworkPtr mockNetwork2(armnn::INetwork::Create());
mockNetwork2->AddInputLayer(0, "test layer");
- runtime->LoadNetwork(networkIdentifier2, Optimize(*mockNetwork2, runtime->GetDeviceSpec()));
+ runtime->LoadNetwork(networkIdentifier2, Optimize(*mockNetwork2, backends, runtime->GetDeviceSpec()));
- // unload one by its networkID
+ // Unloads one by its networkID.
BOOST_TEST(runtime->UnloadNetwork(networkIdentifier1) == armnn::Status::Success);
BOOST_TEST(runtime->UnloadNetwork(networkIdentifier1) == armnn::Status::Failure);
}
// Note: the current builds we don't do valgrind and gperftools based leak checking at the same
-// time, so in practice WITH_VALGRIND and ARMNN_LEAK_CHECKING_ENABLED are exclusive. In
-// the future the gperftools based leak checking should stay and the valgrind based should
-// be removed.
+// time, so in practice WITH_VALGRIND and ARMNN_LEAK_CHECKING_ENABLED are exclusive. The
+// valgrind tests can stay for x86 builds, but on hikey Valgrind is just way too slow
+// to be integrated into the CI system.
-#if ARMNN_LEAK_CHECKING_ENABLED
-void CreateAndDropDummyNetwork(armnn::Runtime & runtime)
+#ifdef ARMNN_LEAK_CHECKING_ENABLED
+
+struct DisableGlobalLeakChecking
+{
+ DisableGlobalLeakChecking()
+ {
+ ARMNN_LOCAL_LEAK_CHECKING_ONLY();
+ }
+};
+
+BOOST_GLOBAL_FIXTURE(DisableGlobalLeakChecking);
+
+void CreateAndDropDummyNetwork(const std::vector<armnn::Compute>& backends, armnn::Runtime& runtime)
{
armnn::NetworkId networkIdentifier;
{
@@ -74,12 +87,12 @@ void CreateAndDropDummyNetwork(armnn::Runtime & runtime)
input->GetOutputSlot(0).Connect(layer->GetInputSlot(0));
layer->GetOutputSlot(0).Connect(output->GetInputSlot(0));
- // set the tensors in the network
+ // Sets the tensors in the network.
input->GetOutputSlot(0).SetTensorInfo(inputTensorInfo);
layer->GetOutputSlot(0).SetTensorInfo(outputTensorInfo);
// optimize the network
- armnn::IOptimizedNetworkPtr optNet = Optimize(*network, runtime.GetDeviceSpec());
+ armnn::IOptimizedNetworkPtr optNet = Optimize(*network, backends, runtime.GetDeviceSpec());
runtime.LoadNetwork(networkIdentifier, std::move(optNet));
}
@@ -94,10 +107,13 @@ BOOST_AUTO_TEST_CASE(RuntimeHeapMemoryUsageSanityChecks)
ARMNN_SCOPED_LEAK_CHECKER("Sanity_Check_Outer");
{
ARMNN_SCOPED_LEAK_CHECKER("Sanity_Check_Inner");
+ BOOST_TEST(ARMNN_NO_LEAKS_IN_SCOPE() == true);
std::unique_ptr<char[]> dummyAllocation(new char[1000]);
- BOOST_TEST(ARMNN_NO_LEAKS_IN_SCOPE() == false);
- BOOST_TEST(ARMNN_BYTES_LEAKED_IN_SCOPE() >= 1000);
- BOOST_TEST(ARMNN_OBJECTS_LEAKED_IN_SCOPE() >= 1);
+ BOOST_CHECK_MESSAGE(ARMNN_NO_LEAKS_IN_SCOPE() == false,
+ "A leak of 1000 bytes is expected here. "
+ "Please make sure environment variable: HEAPCHECK=draconian is set!");
+ BOOST_TEST(ARMNN_BYTES_LEAKED_IN_SCOPE() == 1000);
+ BOOST_TEST(ARMNN_OBJECTS_LEAKED_IN_SCOPE() == 1);
}
BOOST_TEST(ARMNN_NO_LEAKS_IN_SCOPE());
BOOST_TEST(ARMNN_BYTES_LEAKED_IN_SCOPE() == 0);
@@ -109,22 +125,24 @@ BOOST_AUTO_TEST_CASE(RuntimeHeapMemoryUsageSanityChecks)
BOOST_AUTO_TEST_CASE(RuntimeMemoryLeaksGpuAcc)
{
BOOST_TEST(ARMNN_LEAK_CHECKER_IS_ACTIVE());
-
- armnn::Runtime runtime(armnn::Compute::GpuAcc);
+ armnn::IRuntime::CreationOptions options;
+ armnn::Runtime runtime(options);
armnn::RuntimeLoadedNetworksReserve(&runtime);
+ std::vector<armnn::Compute> backends = {armnn::Compute::GpuAcc};
{
// Do a warmup of this so we make sure that all one-time
// initialization happens before we do the leak checking.
- CreateAndDropDummyNetwork(runtime);
+ CreateAndDropDummyNetwork(backends, runtime);
}
{
ARMNN_SCOPED_LEAK_CHECKER("LoadAndUnloadNetworkGpuAcc");
+ BOOST_TEST(ARMNN_NO_LEAKS_IN_SCOPE());
// In the second run we check for all remaining memory
// in use after the network was unloaded. If there is any
// then it will be treated as a memory leak.
- CreateAndDropDummyNetwork(runtime);
+ CreateAndDropDummyNetwork(backends, runtime);
BOOST_TEST(ARMNN_NO_LEAKS_IN_SCOPE());
BOOST_TEST(ARMNN_BYTES_LEAKED_IN_SCOPE() == 0);
BOOST_TEST(ARMNN_OBJECTS_LEAKED_IN_SCOPE() == 0);
@@ -136,22 +154,24 @@ BOOST_AUTO_TEST_CASE(RuntimeMemoryLeaksGpuAcc)
BOOST_AUTO_TEST_CASE(RuntimeMemoryLeaksCpuAcc)
{
BOOST_TEST(ARMNN_LEAK_CHECKER_IS_ACTIVE());
-
- armnn::Runtime runtime(armnn::Compute::CpuAcc);
+ armnn::IRuntime::CreationOptions options;
+ armnn::Runtime runtime(options);
armnn::RuntimeLoadedNetworksReserve(&runtime);
+ std::vector<armnn::Compute> backends = {armnn::Compute::CpuAcc};
{
// Do a warmup of this so we make sure that all one-time
// initialization happens before we do the leak checking.
- CreateAndDropDummyNetwork(runtime);
+ CreateAndDropDummyNetwork(backends, runtime);
}
{
ARMNN_SCOPED_LEAK_CHECKER("LoadAndUnloadNetworkCpuAcc");
+ BOOST_TEST(ARMNN_NO_LEAKS_IN_SCOPE());
// In the second run we check for all remaining memory
// in use after the network was unloaded. If there is any
// then it will be treated as a memory leak.
- CreateAndDropDummyNetwork(runtime);
+ CreateAndDropDummyNetwork(backends, runtime);
BOOST_TEST(ARMNN_NO_LEAKS_IN_SCOPE());
BOOST_TEST(ARMNN_BYTES_LEAKED_IN_SCOPE() == 0);
BOOST_TEST(ARMNN_OBJECTS_LEAKED_IN_SCOPE() == 0);
@@ -163,21 +183,24 @@ BOOST_AUTO_TEST_CASE(RuntimeMemoryLeaksCpuRef)
{
BOOST_TEST(ARMNN_LEAK_CHECKER_IS_ACTIVE());
- armnn::Runtime runtime(armnn::Compute::CpuRef);
+ armnn::IRuntime::CreationOptions options;
+ armnn::Runtime runtime(options);
armnn::RuntimeLoadedNetworksReserve(&runtime);
+ std::vector<armnn::Compute> backends = {armnn::Compute::CpuRef};
{
// Do a warmup of this so we make sure that all one-time
// initialization happens before we do the leak checking.
- CreateAndDropDummyNetwork(runtime);
+ CreateAndDropDummyNetwork(backends, runtime);
}
{
ARMNN_SCOPED_LEAK_CHECKER("LoadAndUnloadNetworkCpuRef");
+ BOOST_TEST(ARMNN_NO_LEAKS_IN_SCOPE());
// In the second run we check for all remaining memory
// in use after the network was unloaded. If there is any
// then it will be treated as a memory leak.
- CreateAndDropDummyNetwork(runtime);
+ CreateAndDropDummyNetwork(backends, runtime);
BOOST_TEST(ARMNN_NO_LEAKS_IN_SCOPE());
BOOST_TEST(ARMNN_BYTES_LEAKED_IN_SCOPE() == 0);
BOOST_TEST(ARMNN_OBJECTS_LEAKED_IN_SCOPE() == 0);
@@ -199,25 +222,28 @@ BOOST_AUTO_TEST_CASE(RuntimeMemoryUsage)
// A start-pointer or chain of start-pointers to the block is found. Since the block is still pointed at,
// the programmer could, at least in principle, have freed it before program exit.
- // We want to test this in case memory is not freed as early as it could have been
+ // We want to test this in case memory is not freed as early as it could have been.
unsigned long reachableBefore = 0;
unsigned long reachableAfter = 0;
- // needed as out params but we don't test them
+ // Needed as out params but we don't test them.
unsigned long dubious = 0;
unsigned long suppressed = 0;
- // ensure that runtime is large enough before checking for memory leaks
- // otherwise when loading the network it will automatically reserve memory that won't be released until destruction
+ // Ensure that runtime is large enough before checking for memory leaks.
+ // Otherwise, when loading the network, it will automatically reserve memory that won't be released
+ // until destruction.
armnn::NetworkId networkIdentifier;
- armnn::Runtime runtime(armnn::Compute::GpuAcc);
+ armnn::IRuntime::CreationOptions options;
+ armnn::Runtime runtime(options);
armnn::RuntimeLoadedNetworksReserve(&runtime);
- // check for leaks before we load the network and record them so that we can see the delta after unloading
+ // Checks for leaks before we load the network and record them so that we can see the delta after unloading.
VALGRIND_DO_QUICK_LEAK_CHECK;
VALGRIND_COUNT_LEAKS(leakedBefore, dubious, reachableBefore, suppressed);
// build a mock-network and load it into the runtime
+ std::vector<armnn::Compute> backends = {armnn::Compute::GpuAcc};
{
armnn::TensorInfo inputTensorInfo(armnn::TensorShape({ 7, 7 }), armnn::DataType::Float32);
armnn::TensorInfo outputTensorInfo(armnn::TensorShape({ 7, 7 }), armnn::DataType::Float32);
@@ -231,12 +257,12 @@ BOOST_AUTO_TEST_CASE(RuntimeMemoryUsage)
input->GetOutputSlot(0).Connect(layer->GetInputSlot(0));
layer->GetOutputSlot(0).Connect(output->GetInputSlot(0));
- // set the tensors in the network
+ // Sets the tensors in the network.
input->GetOutputSlot(0).SetTensorInfo(inputTensorInfo);
layer->GetOutputSlot(0).SetTensorInfo(outputTensorInfo);
// optimize the network
- armnn::IOptimizedNetworkPtr optNet = Optimize(*mockNetwork, runtime.GetDeviceSpec());
+ armnn::IOptimizedNetworkPtr optNet = Optimize(*mockNetwork, backends, runtime.GetDeviceSpec());
runtime.LoadNetwork(networkIdentifier, std::move(optNet));
}
@@ -246,16 +272,16 @@ BOOST_AUTO_TEST_CASE(RuntimeMemoryUsage)
VALGRIND_DO_ADDED_LEAK_CHECK;
VALGRIND_COUNT_LEAKS(leakedAfter, dubious, reachableAfter, suppressed);
- // if we're not running under Valgrind, these vars will have been initialised to 0, so this will always pass
+ // If we're not running under Valgrind, these vars will have been initialised to 0, so this will always pass.
BOOST_TEST(leakedBefore == leakedAfter);
// Add resonable threshold after and before running valgrind with the ACL clear cache function.
// TODO Threshold set to 80k until the root cause of the memory leakage is found and fixed. Revert threshold
- // value to 1024 when fixed
+ // value to 1024 when fixed.
BOOST_TEST(static_cast<long>(reachableAfter) - static_cast<long>(reachableBefore) < 81920);
- // these are needed because VALGRIND_COUNT_LEAKS is a macro that assigns to the parameters
- // so they are assigned to, but still considered unused, causing a warning
+ // These are needed because VALGRIND_COUNT_LEAKS is a macro that assigns to the parameters
+ // so they are assigned to, but still considered unused, causing a warning.
boost::ignore_unused(dubious);
boost::ignore_unused(suppressed);
}
@@ -263,7 +289,7 @@ BOOST_AUTO_TEST_CASE(RuntimeMemoryUsage)
// Note: this part of the code is due to be removed when we fully trust the gperftools based results.
#ifdef WITH_VALGRIND
-// run with the following command to get all the amazing output (in the devenv/build folder) :)
+// Run with the following command to get all the amazing output (in the devenv/build folder) :)
// valgrind --leak-check=full --show-leak-kinds=all --log-file=Valgrind_Memcheck_Leak_Report.txt armnn/test/UnitTests
BOOST_AUTO_TEST_CASE(RuntimeMemoryLeak)
{
@@ -276,11 +302,11 @@ BOOST_AUTO_TEST_CASE(RuntimeMemoryLeak)
// A start-pointer or chain of start-pointers to the block is found. Since the block is still pointed at,
// the programmer could, at least in principle, have freed it before program exit.
- // We want to test this in case memory is not freed as early as it could have been
+ // We want to test this in case memory is not freed as early as it could have been.
unsigned long reachableBefore = 0;
unsigned long reachableAfter = 0;
- // needed as out params but we don't test them
+ // Needed as out params but we don't test them.
unsigned long dubious = 0;
unsigned long suppressed = 0;
@@ -288,14 +314,15 @@ BOOST_AUTO_TEST_CASE(RuntimeMemoryLeak)
// ensure that runtime is large enough before checking for memory leaks
// otherwise when loading the network it will automatically reserve memory that won't be released until destruction
- armnn::Runtime runtime(armnn::Compute::CpuRef);
+ armnn::IRuntime::CreationOptions options;
+ armnn::Runtime runtime(options);
armnn::RuntimeLoadedNetworksReserve(&runtime);
- // check for leaks before we load the network and record them so that we can see the delta after unloading
+ // Checks for leaks before we load the network and record them so that we can see the delta after unloading.
VALGRIND_DO_QUICK_LEAK_CHECK;
VALGRIND_COUNT_LEAKS(leakedBefore, dubious, reachableBefore, suppressed);
- // build a mock-network and load it into the runtime
+ // Builds a mock-network and load it into the runtime.
{
unsigned int inputShape[] = {1, 7, 1, 1};
armnn::TensorInfo inputTensorInfo(4, inputShape, armnn::DataType::Float32);
@@ -303,10 +330,9 @@ BOOST_AUTO_TEST_CASE(RuntimeMemoryLeak)
std::unique_ptr<armnn::Network> mockNetwork1 = std::make_unique<armnn::Network>();
mockNetwork1->AddInputLayer(0, "test layer");
- armnn::DeviceSpec device;
- device.DefaultComputeDevice = armnn::Compute::CpuRef;
- runtime.LoadNetwork(networkIdentifier1, Optimize(*mockNetwork1, device));
+ std::vector<armnn::Compute> backends = {armnn::Compute::CpuRef};
+ runtime.LoadNetwork(networkIdentifier1, Optimize(*mockNetwork1, backends, runtime.GetDeviceSpec()));
}
runtime.UnloadNetwork(networkIdentifier1);
@@ -314,7 +340,7 @@ BOOST_AUTO_TEST_CASE(RuntimeMemoryLeak)
VALGRIND_DO_ADDED_LEAK_CHECK;
VALGRIND_COUNT_LEAKS(leakedAfter, dubious, reachableAfter, suppressed);
- // if we're not running under Valgrind, these vars will have been initialised to 0, so this will always pass
+ // If we're not running under Valgrind, these vars will have been initialised to 0, so this will always pass.
BOOST_TEST(leakedBefore == leakedAfter);
#if defined(ARMCOMPUTECL_ENABLED)
@@ -329,11 +355,134 @@ BOOST_AUTO_TEST_CASE(RuntimeMemoryLeak)
BOOST_TEST(reachableBefore >= reachableAfter);
- // these are needed because VALGRIND_COUNT_LEAKS is a macro that assigns to the parameters
- // so they are assigned to, but still considered unused, causing a warning
+ // These are needed because VALGRIND_COUNT_LEAKS is a macro that assigns to the parameters
+ // so they are assigned to, but still considered unused, causing a warning.
boost::ignore_unused(dubious);
boost::ignore_unused(suppressed);
}
#endif
+#if ARMCOMPUTENEON_ENABLED
+BOOST_AUTO_TEST_CASE(RuntimeValidateCpuAccDeviceSupportLayerNoFallback)
+{
+ // build up the structure of the network
+ armnn::INetworkPtr net(armnn::INetwork::Create());
+
+ armnn::IConnectableLayer* input = net->AddInputLayer(0);
+
+ armnn::IConnectableLayer* output = net->AddOutputLayer(0);
+
+ input->GetOutputSlot(0).Connect(output->GetInputSlot(0));
+
+ input->GetOutputSlot(0).SetTensorInfo(armnn::TensorInfo({ 1, 1, 4, 4 }, armnn::DataType::Float32));
+
+ armnn::IRuntime::CreationOptions options;
+ armnn::IRuntimePtr runtime(armnn::IRuntime::Create(options));
+
+ std::vector<armnn::Compute> backends = { armnn::Compute::CpuAcc };
+ armnn::IOptimizedNetworkPtr optNet = armnn::Optimize(*net, backends, runtime->GetDeviceSpec());
+ BOOST_CHECK(optNet);
+
+ // Load it into the runtime. It should success.
+ armnn::NetworkId netId;
+ BOOST_TEST(runtime->LoadNetwork(netId, std::move(optNet)) == armnn::Status::Success);
+}
+#endif // ARMCOMPUTENEON_ENABLED
+
+#if ARMCOMPUTECL_ENABLED
+BOOST_AUTO_TEST_CASE(RuntimeValidateGpuDeviceSupportLayerNoFallback)
+{
+ // build up the structure of the network
+ armnn::INetworkPtr net(armnn::INetwork::Create());
+
+ armnn::IConnectableLayer* input = net->AddInputLayer(0);
+
+ armnn::IConnectableLayer* output = net->AddOutputLayer(0);
+
+ input->GetOutputSlot(0).Connect(output->GetInputSlot(0));
+
+ input->GetOutputSlot(0).SetTensorInfo(armnn::TensorInfo({ 1, 1, 4, 4 }, armnn::DataType::Float32));
+
+ armnn::IRuntime::CreationOptions options;
+ armnn::IRuntimePtr runtime(armnn::IRuntime::Create(options));
+
+ std::vector<armnn::Compute> backends = { armnn::Compute::GpuAcc };
+ armnn::IOptimizedNetworkPtr optNet = armnn::Optimize(*net, backends, runtime->GetDeviceSpec());
+ BOOST_CHECK(optNet);
+
+ // Load it into the runtime. It should success.
+ armnn::NetworkId netId;
+ BOOST_TEST(runtime->LoadNetwork(netId, std::move(optNet)) == armnn::Status::Success);
+}
+#endif // ARMCOMPUTECL_ENABLED
+
+BOOST_AUTO_TEST_CASE(RuntimeCpuRef)
+{
+ using namespace armnn;
+
+ // Create runtime in which test will run
+ armnn::IRuntime::CreationOptions options;
+ armnn::IRuntimePtr runtime(armnn::IRuntime::Create(options));
+
+ // build up the structure of the network
+ INetworkPtr net(INetwork::Create());
+
+ IConnectableLayer* input = net->AddInputLayer(0);
+
+ // This layer configuration isn't supported by CpuAcc, should be fall back to CpuRef.
+ NormalizationDescriptor descriptor;
+ IConnectableLayer* normalize = net->AddNormalizationLayer(descriptor);
+
+ IConnectableLayer* output = net->AddOutputLayer(0);
+
+ input->GetOutputSlot(0).Connect(normalize->GetInputSlot(0));
+ normalize->GetOutputSlot(0).Connect(output->GetInputSlot(0));
+
+ input->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 4, 4 }, DataType::Float32));
+ normalize->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 4, 4 }, DataType::Float32));
+
+ // optimize the network
+ std::vector<armnn::Compute> backends = { armnn::Compute::CpuRef };
+ IOptimizedNetworkPtr optNet = Optimize(*net, backends, runtime->GetDeviceSpec());
+
+ // Load it into the runtime. It should success.
+ armnn::NetworkId netId;
+ BOOST_TEST(runtime->LoadNetwork(netId, std::move(optNet)) == Status::Success);
+}
+
+BOOST_AUTO_TEST_CASE(RuntimeFallbackToCpuRef)
+{
+ using namespace armnn;
+
+ // Create runtime in which test will run
+ armnn::IRuntime::CreationOptions options;
+ armnn::IRuntimePtr runtime(armnn::IRuntime::Create(options));
+
+ // build up the structure of the network
+ INetworkPtr net(INetwork::Create());
+
+ IConnectableLayer* input = net->AddInputLayer(0);
+
+ // This layer configuration isn't supported by CpuAcc, should be fall back to CpuRef.
+ NormalizationDescriptor descriptor;
+ IConnectableLayer* normalize = net->AddNormalizationLayer(descriptor);
+
+ IConnectableLayer* output = net->AddOutputLayer(0);
+
+ input->GetOutputSlot(0).Connect(normalize->GetInputSlot(0));
+ normalize->GetOutputSlot(0).Connect(output->GetInputSlot(0));
+
+ input->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 4, 4 }, DataType::Float32));
+ normalize->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 4, 4 }, DataType::Float32));
+
+ // Allow fallback to CpuRef.
+ std::vector<armnn::Compute> backends = { armnn::Compute::CpuAcc, armnn::Compute::CpuRef };
+ // optimize the network
+ IOptimizedNetworkPtr optNet = Optimize(*net, backends, runtime->GetDeviceSpec());
+
+ // Load it into the runtime. It should succeed.
+ armnn::NetworkId netId;
+ BOOST_TEST(runtime->LoadNetwork(netId, std::move(optNet)) == Status::Success);
+}
+
BOOST_AUTO_TEST_SUITE_END()
diff --git a/src/armnn/test/TensorHelpers.hpp b/src/armnn/test/TensorHelpers.hpp
index aac4c1d15e..ec38940a44 100644
--- a/src/armnn/test/TensorHelpers.hpp
+++ b/src/armnn/test/TensorHelpers.hpp
@@ -39,7 +39,7 @@ struct SelectiveComparer<T, false>
{
static bool Compare(T a, T b)
{
- // if a or b is zero, percent_tolerance does an exact match, so compare to a small, constant tolerance instead
+ // If a or b is zero, percent_tolerance does an exact match, so compare to a small, constant tolerance instead.
if (a == 0.0f || b == 0.0f)
{
return std::abs(a - b) <= g_FloatCloseToZeroTolerance;
@@ -62,7 +62,7 @@ template <typename T, std::size_t n>
boost::test_tools::predicate_result CompareTensors(const boost::multi_array<T, n>& a,
const boost::multi_array<T, n>& b)
{
- // check they are same shape
+ // Checks they are same shape.
for (unsigned int i=0; i<n; i++)
{
if (a.shape()[i] != b.shape()[i])
@@ -77,9 +77,9 @@ boost::test_tools::predicate_result CompareTensors(const boost::multi_array<T, n
}
}
- // now compare element-wise
+ // Now compares element-wise.
- // fun iteration over n dimensions
+ // Fun iteration over n dimensions.
std::array<unsigned int, n> indices;
for (unsigned int i = 0; i < n; i++)
{
@@ -150,7 +150,7 @@ boost::test_tools::predicate_result CompareTensors(const boost::multi_array<T, n
}
-// Creates a boost::multi_array with shape defined by the given TensorInfo.
+// Creates a boost::multi_array with the shape defined by the given TensorInfo.
template <typename T, std::size_t n>
boost::multi_array<T, n> MakeTensor(const armnn::TensorInfo& tensorInfo)
{
@@ -164,7 +164,7 @@ boost::multi_array<T, n> MakeTensor(const armnn::TensorInfo& tensorInfo)
return boost::multi_array<T, n>(shape);
}
-// Creates a boost::multi_array with shape defined by the given TensorInfo and contents defined by the given vector.
+// Creates a boost::multi_array with the shape defined by the given TensorInfo and contents defined by the given vector.
template <typename T, std::size_t n>
boost::multi_array<T, n> MakeTensor(const armnn::TensorInfo& tensorInfo, const std::vector<T>& flat)
{
diff --git a/src/armnn/test/TensorTest.cpp b/src/armnn/test/TensorTest.cpp
index 2bb37f4fb8..8057d4dd7a 100644
--- a/src/armnn/test/TensorTest.cpp
+++ b/src/armnn/test/TensorTest.cpp
@@ -8,7 +8,7 @@
namespace armnn
{
-// Add unit test framework for interpreting TensorInfo type
+// Adds unit test framework for interpreting TensorInfo type.
std::ostream& boost_test_print_type(std::ostream& ostr, const TensorInfo& right)
{
ostr << "TensorInfo[ "
@@ -115,7 +115,7 @@ BOOST_AUTO_TEST_CASE(TensorVsConstTensor)
armnn::Tensor t(TensorInfo(), &mutableDatum);
armnn::ConstTensor ct(TensorInfo(), &immutableDatum);
- // Check that both Tensor and ConstTensor can be passed as a ConstTensor
+ // Checks that both Tensor and ConstTensor can be passed as a ConstTensor.
CheckTensor(t);
CheckTensor(ct);
}
@@ -136,9 +136,9 @@ BOOST_AUTO_TEST_CASE(ModifyTensorInfo)
BOOST_AUTO_TEST_CASE(TensorShapeOperatorBrackets)
{
TensorShape shape({0,1,2,3});
- // Check version of operator[] which returns an unsigned int
+ // Checks version of operator[] which returns an unsigned int.
BOOST_TEST(shape[2] == 2);
- // Check the version of operator[] which returns a reference
+ // Checks the version of operator[] which returns a reference.
shape[2] = 20;
BOOST_TEST(shape[2] == 20);
}
diff --git a/src/armnn/test/UnitTests.cpp b/src/armnn/test/UnitTests.cpp
index 0e2f99583f..203fbfe821 100644
--- a/src/armnn/test/UnitTests.cpp
+++ b/src/armnn/test/UnitTests.cpp
@@ -44,7 +44,7 @@ class SetupDebugOutput
public:
SetupDebugOutput()
{
- // Send the output to both cout (as standard) and the debug output.
+ // Sends the output to both cout (as standard) and the debug output.
m_OutputStream.push(tee(std::cout));
m_OutputStream.push(m_DebugOutputSink);
diff --git a/src/armnn/test/UnitTests.hpp b/src/armnn/test/UnitTests.hpp
index 9b750b5b33..8d5c7055e7 100644
--- a/src/armnn/test/UnitTests.hpp
+++ b/src/armnn/test/UnitTests.hpp
@@ -12,7 +12,7 @@
inline void ConfigureLoggingTest()
{
- // Configure logging for both the ARMNN library and this test program
+ // Configures logging for both the ARMNN library and this test program.
armnn::ConfigureLogging(true, true, armnn::LogSeverity::Fatal);
armnnUtils::ConfigureLogging(boost::log::core::get().get(), true, true, armnn::LogSeverity::Fatal);
}
@@ -43,9 +43,27 @@ void CompareTestResultIfSupported(const std::string& testName, const LayerTestRe
}
}
+template <typename T, std::size_t n>
+void CompareTestResultIfSupported(const std::string& testName, const std::vector<LayerTestResult<T, n>>& testResult)
+{
+ bool testNameIndicatesUnsupported = testName.find("UNSUPPORTED") != std::string::npos;
+ for (unsigned int i = 0; i < testResult.size(); ++i)
+ {
+ BOOST_CHECK_MESSAGE(testNameIndicatesUnsupported != testResult[i].supported,
+ "The test name does not match the supportedness it is reporting");
+ if (testResult[i].supported)
+ {
+ BOOST_TEST(CompareTensors(testResult[i].output, testResult[i].outputExpected));
+ }
+ }
+}
+
template<typename FactoryType, typename TFuncPtr, typename... Args>
void RunTestFunction(const char* testName, TFuncPtr testFunction, Args... args)
{
+ std::unique_ptr<armnn::Profiler> profiler = std::make_unique<armnn::Profiler>();
+ armnn::ProfilerManager::GetInstance().RegisterProfiler(profiler.get());
+
FactoryType workloadFactory;
auto testResult = (*testFunction)(workloadFactory, args...);
CompareTestResultIfSupported(testName, testResult);
diff --git a/src/armnn/test/UtilsTests.cpp b/src/armnn/test/UtilsTests.cpp
index 11fa51626c..2268aa31e2 100644
--- a/src/armnn/test/UtilsTests.cpp
+++ b/src/armnn/test/UtilsTests.cpp
@@ -4,10 +4,14 @@
//
#include <boost/test/unit_test.hpp>
+
#include <armnn/Utils.hpp>
#include <armnn/Types.hpp>
#include <armnn/TypesUtils.hpp>
#include <armnn/Descriptors.hpp>
+#include <GraphTopologicalSort.hpp>
+#include <Graph.hpp>
+#include "TypeUtils.hpp"
BOOST_AUTO_TEST_SUITE(Utils)
@@ -55,4 +59,110 @@ BOOST_AUTO_TEST_CASE(PermuteDescriptorWithDuplicatedMappings)
BOOST_CHECK_THROW(armnn::PermuteDescriptor({ 1u, 1u, 0u }), armnn::InvalidArgumentException);
}
+BOOST_AUTO_TEST_CASE(HalfType)
+{
+ using namespace half_float::literal;
+ armnn::Half a = 1.0_h;
+
+ float b = 1.0f;
+ armnn::Half c(b);
+
+ // Test half type
+ BOOST_CHECK_EQUAL(a, b);
+ BOOST_CHECK_EQUAL(sizeof(c), 2);
+
+ // Test half type is floating point type
+ BOOST_CHECK(std::is_floating_point<armnn::Half>::value);
+
+ // Test utility function returns correct type.
+ using ResolvedType = armnn::ResolveType<armnn::DataType::Float16>;
+ constexpr bool isHalfType = std::is_same<armnn::Half, ResolvedType>::value;
+ BOOST_CHECK(isHalfType);
+
+ armnn::DataType dt = armnn::GetDataType<armnn::Half>();
+ BOOST_CHECK(dt == armnn::DataType::Float16);
+
+ //Test utility functions return correct size
+ BOOST_CHECK(GetDataTypeSize(armnn::DataType::Float16) == 2);
+
+ //Test utility functions return correct name
+ BOOST_CHECK((GetDataTypeName(armnn::DataType::Float16) == std::string("Float16")));
+}
+
+BOOST_AUTO_TEST_CASE(GraphTopologicalSortSimpleTest)
+{
+ std::map<int, std::vector<int>> graph;
+
+ graph[0] = {2};
+ graph[1] = {3};
+ graph[2] = {4};
+ graph[3] = {4};
+ graph[4] = {5};
+ graph[5] = {};
+
+ auto getNodeInputs = [graph](int node) -> std::vector<int>
+ {
+ return graph.find(node)->second;
+ };
+
+ std::vector<int> targetNodes = {0, 1};
+
+ std::vector<int> output;
+ bool sortCompleted = armnnUtils::GraphTopologicalSort<int>(targetNodes, getNodeInputs, output);
+
+ BOOST_TEST(sortCompleted);
+
+ std::vector<int> correctResult = {5, 4, 2, 0, 3, 1};
+ BOOST_CHECK_EQUAL_COLLECTIONS(output.begin(), output.end(), correctResult.begin(), correctResult.end());
+}
+
+BOOST_AUTO_TEST_CASE(GraphTopologicalSortVariantTest)
+{
+ std::map<int, std::vector<int>> graph;
+
+ graph[0] = {2};
+ graph[1] = {2};
+ graph[2] = {3, 4};
+ graph[3] = {5};
+ graph[4] = {5};
+ graph[5] = {6};
+ graph[6] = {};
+
+ auto getNodeInputs = [graph](int node) -> std::vector<int>
+ {
+ return graph.find(node)->second;
+ };
+
+ std::vector<int> targetNodes = {0, 1};
+
+ std::vector<int> output;
+ bool sortCompleted = armnnUtils::GraphTopologicalSort<int>(targetNodes, getNodeInputs, output);
+
+ BOOST_TEST(sortCompleted);
+
+ std::vector<int> correctResult = {6, 5, 3, 4, 2, 0, 1};
+ BOOST_CHECK_EQUAL_COLLECTIONS(output.begin(), output.end(), correctResult.begin(), correctResult.end());
+}
+
+BOOST_AUTO_TEST_CASE(CyclicalGraphTopologicalSortTest)
+{
+ std::map<int, std::vector<int>> graph;
+
+ graph[0] = {1};
+ graph[1] = {2};
+ graph[2] = {0};
+
+ auto getNodeInputs = [graph](int node) -> std::vector<int>
+ {
+ return graph.find(node)->second;
+ };
+
+ std::vector<int> targetNodes = {0};
+
+ std::vector<int> output;
+ bool sortCompleted = armnnUtils::GraphTopologicalSort<int>(targetNodes, getNodeInputs, output);
+
+ BOOST_TEST(!sortCompleted);
+}
+
BOOST_AUTO_TEST_SUITE_END()