diff options
Diffstat (limited to 'src/armnn/test')
23 files changed, 3706 insertions, 826 deletions
diff --git a/src/armnn/test/CreateWorkload.hpp b/src/armnn/test/CreateWorkload.hpp index c3f4b8a1bf..ee0c584b13 100644 --- a/src/armnn/test/CreateWorkload.hpp +++ b/src/armnn/test/CreateWorkload.hpp @@ -22,7 +22,7 @@ namespace using namespace std; -// Calls CreateWorkload for a layer, and checks the returned pointer is of the correct type +// Calls CreateWorkload for a layer, and checks the returned pointer is of the correct type. template<typename Workload> std::unique_ptr<Workload> MakeAndCheckWorkload(Layer& layer, Graph& graph, const IWorkloadFactory& factory) { @@ -30,18 +30,19 @@ std::unique_ptr<Workload> MakeAndCheckWorkload(Layer& layer, Graph& graph, const BOOST_TEST(workload.get() == boost::polymorphic_downcast<Workload*>(workload.get()), "Cannot convert to derived class"); std::string reasonIfUnsupported; + layer.SetComputeDevice(factory.GetCompute()); BOOST_TEST(factory.IsLayerSupported(layer, layer.GetDataType(), reasonIfUnsupported)); return std::unique_ptr<Workload>(static_cast<Workload*>(workload.release())); } -// connects two layers +// Connects two layers. void Connect(Layer* from, Layer* to, const TensorInfo& tensorInfo, unsigned int fromIndex = 0, unsigned int toIndex = 0) { from->GetOutputSlot(fromIndex).Connect(to->GetInputSlot(toIndex)); from->GetOutputHandler(fromIndex).SetTensorInfo(tensorInfo); } -// helper function to create tensor handlers for workloads, assuming they all use the same factory +// Helper function to create tensor handlers for workloads, assuming they all use the same factory. void CreateTensorHandles(armnn::Graph& graph, armnn::IWorkloadFactory& factory) { for (auto&& layer : graph.TopologicalSort()) @@ -57,11 +58,11 @@ void CreateTensorHandles(armnn::Graph& graph, armnn::IWorkloadFactory& factory) // They return the created workloads so that backend-specific checks can be performed. ///////////////////////////////////////////////////////////////////////////////////////////// -template <typename ActivationWorkload> +template <typename ActivationWorkload, armnn::DataType DataType> std::unique_ptr<ActivationWorkload> CreateActivationWorkloadTest(armnn::IWorkloadFactory& factory, armnn::Graph& graph) { - // create the layer we're testing + // Creates the layer we're testing. ActivationDescriptor layerDesc; layerDesc.m_Function = ActivationFunction::Abs; layerDesc.m_A = 3.5f; @@ -69,19 +70,19 @@ std::unique_ptr<ActivationWorkload> CreateActivationWorkloadTest(armnn::IWorkloa ActivationLayer* const layer = graph.AddLayer<ActivationLayer>(layerDesc, "layer"); - // create extra layers + // Creates extra layers. Layer* const input = graph.AddLayer<InputLayer>(0, "input"); Layer* const output = graph.AddLayer<OutputLayer>(0, "output"); - // connect up - armnn::TensorInfo tensorInfo({1, 1}, ActivationWorkload::ms_DataType); + // Connects up. + armnn::TensorInfo tensorInfo({1, 1}, DataType); Connect(input, layer, tensorInfo); Connect(layer, output, tensorInfo); CreateTensorHandles(graph, factory); - // make the workload and check it + // Makes the workload and checks it. auto workload = MakeAndCheckWorkload<ActivationWorkload>(*layer, graph, factory); ActivationQueueDescriptor queueDescriptor = workload->GetData(); @@ -91,51 +92,51 @@ std::unique_ptr<ActivationWorkload> CreateActivationWorkloadTest(armnn::IWorkloa BOOST_TEST(queueDescriptor.m_Parameters.m_B == -10.0f); BOOST_TEST((queueDescriptor.m_Parameters.m_Function == ActivationFunction::Abs)); - // return so we can do extra, backend-specific tests + // Returns so we can do extra, backend-specific tests. return workload; } -template <typename AdditionWorkload> +template <typename AdditionWorkload, armnn::DataType DataType> std::unique_ptr<AdditionWorkload> CreateAdditionWorkloadTest(armnn::IWorkloadFactory& factory, armnn::Graph& graph) { - // create the layer we're testing + // Creates the layer we're testing. Layer* const layer = graph.AddLayer<AdditionLayer>("layer"); - // create extra layers + // Creates extra layers. Layer* const input1 = graph.AddLayer<InputLayer>(1, "input1"); Layer* const input2 = graph.AddLayer<InputLayer>(2, "input2"); Layer* const output = graph.AddLayer<OutputLayer>(0, "output"); - // connect up - armnn::TensorInfo tensorInfo({2, 3}, AdditionWorkload::ms_DataType); + // Connects up. + armnn::TensorInfo tensorInfo({2, 3}, DataType); Connect(input1, layer, tensorInfo, 0, 0); Connect(input2, layer, tensorInfo, 0, 1); Connect(layer, output, tensorInfo); CreateTensorHandles(graph, factory); - // make the workload and check it + // Makes the workload and checks it. auto workload = MakeAndCheckWorkload<AdditionWorkload>(*layer, graph, factory); AdditionQueueDescriptor queueDescriptor = workload->GetData(); BOOST_TEST(queueDescriptor.m_Inputs.size() == 2); BOOST_TEST(queueDescriptor.m_Outputs.size() == 1); - // return so we can do extra, backend-specific tests + // Returns so we can do extra, backend-specific tests. return workload; } -template <typename BatchNormalizationFloat32Workload> +template <typename BatchNormalizationFloat32Workload, armnn::DataType DataType> std::unique_ptr<BatchNormalizationFloat32Workload> CreateBatchNormalizationWorkloadTest( armnn::IWorkloadFactory& factory, armnn::Graph& graph) { - // create the layer we're testing + // Creates the layer we're testing. BatchNormalizationDescriptor layerDesc; layerDesc.m_Eps = 0.05f; BatchNormalizationLayer* const layer = graph.AddLayer<BatchNormalizationLayer>(layerDesc, "layer"); - armnn::TensorInfo weightInfo({3}, armnn::DataType::Float32); + armnn::TensorInfo weightInfo({3}, DataType); layer->m_Mean = std::make_unique<ScopedCpuTensorHandle>(weightInfo); layer->m_Variance = std::make_unique<ScopedCpuTensorHandle>(weightInfo); layer->m_Beta = std::make_unique<ScopedCpuTensorHandle>(weightInfo); @@ -145,37 +146,37 @@ std::unique_ptr<BatchNormalizationFloat32Workload> CreateBatchNormalizationWorkl layer->m_Beta->Allocate(); layer->m_Gamma->Allocate(); - // create extra layers + // Creates extra layers. Layer* const input = graph.AddLayer<InputLayer>(0, "input"); Layer* const output = graph.AddLayer<OutputLayer>(0, "output"); - // connect up - armnn::TensorInfo tensorInfo({2, 3, 1, 1}, armnn::DataType::Float32); + // Connects up. + armnn::TensorInfo tensorInfo({2, 3, 1, 1}, DataType); Connect(input, layer, tensorInfo); Connect(layer, output, tensorInfo); CreateTensorHandles(graph, factory); - // make the workload and check it + // Makes the workload and checks it. auto workload = MakeAndCheckWorkload<BatchNormalizationFloat32Workload>(*layer, graph, factory); BatchNormalizationQueueDescriptor queueDescriptor = workload->GetData(); BOOST_TEST(queueDescriptor.m_Parameters.m_Eps == 0.05f); BOOST_TEST(queueDescriptor.m_Inputs.size() == 1); BOOST_TEST(queueDescriptor.m_Outputs.size() == 1); - BOOST_TEST((queueDescriptor.m_Mean->GetTensorInfo() == TensorInfo({3}, DataType::Float32))); - BOOST_TEST((queueDescriptor.m_Variance->GetTensorInfo() == TensorInfo({3}, DataType::Float32))); - BOOST_TEST((queueDescriptor.m_Gamma->GetTensorInfo() == TensorInfo({3}, DataType::Float32))); - BOOST_TEST((queueDescriptor.m_Beta->GetTensorInfo() == TensorInfo({3}, DataType::Float32))); + BOOST_TEST((queueDescriptor.m_Mean->GetTensorInfo() == TensorInfo({3}, DataType))); + BOOST_TEST((queueDescriptor.m_Variance->GetTensorInfo() == TensorInfo({3}, DataType))); + BOOST_TEST((queueDescriptor.m_Gamma->GetTensorInfo() == TensorInfo({3}, DataType))); + BOOST_TEST((queueDescriptor.m_Beta->GetTensorInfo() == TensorInfo({3}, DataType))); - // return so we can do extra, backend-specific tests + // Returns so we can do extra, backend-specific tests. return workload; } -template <typename Convolution2dWorkload> +template <typename Convolution2dWorkload, armnn::DataType DataType> std::unique_ptr<Convolution2dWorkload> CreateConvolution2dWorkloadTest(armnn::IWorkloadFactory& factory, armnn::Graph& graph) { - // create the layer we're testing + // Creates the layer we're testing. Convolution2dDescriptor layerDesc; layerDesc.m_PadLeft = 3; layerDesc.m_PadRight = 3; @@ -187,24 +188,22 @@ std::unique_ptr<Convolution2dWorkload> CreateConvolution2dWorkloadTest(armnn::IW Convolution2dLayer* const layer = graph.AddLayer<Convolution2dLayer>(layerDesc, "layer"); - layer->m_Weight = std::make_unique<ScopedCpuTensorHandle>(TensorInfo({2, 3, 5, 3}, - Convolution2dWorkload::ms_DataType)); - layer->m_Bias = std::make_unique<ScopedCpuTensorHandle> - (TensorInfo({2}, GetBiasDataType(Convolution2dWorkload::ms_DataType))); + layer->m_Weight = std::make_unique<ScopedCpuTensorHandle>(TensorInfo({2, 3, 5, 3}, DataType)); + layer->m_Bias = std::make_unique<ScopedCpuTensorHandle>(TensorInfo({2}, GetBiasDataType(DataType))); layer->m_Weight->Allocate(); layer->m_Bias->Allocate(); - // create extra layers + // Creates extra layers. Layer* const input = graph.AddLayer<InputLayer>(0, "input"); Layer* const output = graph.AddLayer<OutputLayer>(0, "output"); - // connect up - Connect(input, layer, TensorInfo({2, 3, 8, 16}, Convolution2dWorkload::ms_DataType)); - Connect(layer, output, TensorInfo({2, 2, 2, 10}, Convolution2dWorkload::ms_DataType)); + // Connecst up. + Connect(input, layer, TensorInfo({2, 3, 8, 16}, DataType)); + Connect(layer, output, TensorInfo({2, 2, 2, 10}, DataType)); CreateTensorHandles(graph, factory); - // make the workload and check it + // Makes the workload and checks it. auto workload = MakeAndCheckWorkload<Convolution2dWorkload>(*layer, graph, factory); Convolution2dQueueDescriptor queueDescriptor = workload->GetData(); @@ -218,20 +217,123 @@ std::unique_ptr<Convolution2dWorkload> CreateConvolution2dWorkloadTest(armnn::IW BOOST_TEST(queueDescriptor.m_Inputs.size() == 1); BOOST_TEST(queueDescriptor.m_Outputs.size() == 1); - BOOST_TEST((queueDescriptor.m_Weight->GetTensorInfo() == TensorInfo({2, 3, 5, 3}, - Convolution2dWorkload::ms_DataType))); + BOOST_TEST((queueDescriptor.m_Weight->GetTensorInfo() == TensorInfo({2, 3, 5, 3}, DataType))); BOOST_TEST((queueDescriptor.m_Bias->GetTensorInfo() == - TensorInfo({2}, GetBiasDataType(Convolution2dWorkload::ms_DataType)))); + TensorInfo({2}, GetBiasDataType(DataType)))); - // return so we can do extra, backend-specific tests + // Returns so we can do extra, backend-specific tests. return workload; } -template <typename Convolution2dWorkload> +template <typename LstmWorkload> +std::unique_ptr<LstmWorkload> CreateLstmWorkloadTest(armnn::IWorkloadFactory& factory, armnn::Graph& graph) +{ + // This parameter setting is for withCifgWithPeepholeNoProjection + LstmDescriptor layerDesc; + layerDesc.m_ActivationFunc = 4; + layerDesc.m_ClippingThresCell = 0.0f; + layerDesc.m_ClippingThresProj = 0.0f; + layerDesc.m_CifgEnabled = true; + layerDesc.m_PeepholeEnabled = true; + layerDesc.m_ProjectionEnabled = false; + + LstmLayer* const layer = graph.AddLayer<LstmLayer>(layerDesc, "layer"); + unsigned int batchSize = 2; + unsigned int inputSize = 2; + unsigned int numUnits = 4; + unsigned int outputSize = 4; + + layer->m_BasicParameters.m_InputToForgetWeights = std::make_unique<ScopedCpuTensorHandle> + (TensorInfo({ numUnits, inputSize }, DataType::Float32)); + layer->m_BasicParameters.m_InputToCellWeights = std::make_unique<ScopedCpuTensorHandle> + (TensorInfo({ numUnits, inputSize }, DataType::Float32)); + layer->m_BasicParameters.m_InputToOutputWeights = std::make_unique<ScopedCpuTensorHandle> + (TensorInfo({ numUnits, inputSize }, DataType::Float32)); + layer->m_BasicParameters.m_RecurrentToForgetWeights = std::make_unique<ScopedCpuTensorHandle> + (TensorInfo({ numUnits, outputSize }, DataType::Float32)); + layer->m_BasicParameters.m_RecurrentToCellWeights = std::make_unique<ScopedCpuTensorHandle> + (TensorInfo({ numUnits, outputSize }, DataType::Float32)); + layer->m_BasicParameters.m_RecurrentToOutputWeights = std::make_unique<ScopedCpuTensorHandle> + (TensorInfo({ numUnits, outputSize }, DataType::Float32)); + layer->m_BasicParameters.m_ForgetGateBias = std::make_unique<ScopedCpuTensorHandle> + (TensorInfo({ numUnits }, DataType::Float32)); + layer->m_BasicParameters.m_CellBias = std::make_unique<ScopedCpuTensorHandle> + (TensorInfo({ numUnits }, DataType::Float32)); + layer->m_BasicParameters.m_OutputGateBias = std::make_unique<ScopedCpuTensorHandle> + (TensorInfo({ numUnits }, DataType::Float32)); + + layer->m_BasicParameters.m_InputToForgetWeights->Allocate(); + layer->m_BasicParameters.m_InputToCellWeights->Allocate(); + layer->m_BasicParameters.m_InputToOutputWeights->Allocate(); + layer->m_BasicParameters.m_RecurrentToForgetWeights->Allocate(); + layer->m_BasicParameters.m_RecurrentToCellWeights->Allocate(); + layer->m_BasicParameters.m_RecurrentToOutputWeights->Allocate(); + layer->m_BasicParameters.m_ForgetGateBias->Allocate(); + layer->m_BasicParameters.m_CellBias->Allocate(); + layer->m_BasicParameters.m_OutputGateBias->Allocate(); + + + if (layerDesc.m_PeepholeEnabled) + { + layer->m_PeepholeParameters.m_CellToForgetWeights = std::make_unique<ScopedCpuTensorHandle> + (TensorInfo({ numUnits }, DataType::Float32)); + layer->m_PeepholeParameters.m_CellToOutputWeights = std::make_unique<ScopedCpuTensorHandle> + (TensorInfo({ numUnits }, DataType::Float32)); + layer->m_PeepholeParameters.m_CellToForgetWeights->Allocate(); + layer->m_PeepholeParameters.m_CellToOutputWeights->Allocate(); + } + + // create input and output layers + Layer* const input = graph.AddLayer<InputLayer>(0, "input"); + Layer* const outputStateIn = graph.AddLayer<InputLayer>(1, "outputStateIn"); + Layer* const cellStateIn = graph.AddLayer<InputLayer>(2, "cellStateIn"); + Layer* const scratchBuffer = graph.AddLayer<OutputLayer>(0, "scratchBuffer"); + Layer* const outputStateOut = graph.AddLayer<OutputLayer>(1, "outputStateOut"); + Layer* const cellStateOut = graph.AddLayer<OutputLayer>(2, "cellStateOut"); + Layer* const output = graph.AddLayer<OutputLayer>(3, "output"); + + // connect up + armnn::TensorInfo lstmTensorInfo1({ batchSize, inputSize }, DataType::Float32); + armnn::TensorInfo lstmTensorInfo2({ batchSize, numUnits}, DataType::Float32); + armnn::TensorInfo lstmTensorInfo3({ batchSize, outputSize }, DataType::Float32); + armnn::TensorInfo lstmTensorInfoScratchBuff({ batchSize, numUnits*3 }, DataType::Float32); + if (layerDesc.m_CifgEnabled) + { + lstmTensorInfoScratchBuff.SetShape({ batchSize, numUnits*4 }); + } + + Connect(input, layer, lstmTensorInfo1, 0, 0); + Connect(cellStateIn, layer, lstmTensorInfo2, 0, 1); + Connect(outputStateIn, layer, lstmTensorInfo3, 0, 2); + Connect(layer, scratchBuffer, lstmTensorInfoScratchBuff, 0, 0); + Connect(layer, outputStateOut, lstmTensorInfo3, 1, 0); + Connect(layer, cellStateOut, lstmTensorInfo2, 2, 0); + Connect(layer, output, lstmTensorInfo3, 3, 0); + + CreateTensorHandles(graph, factory); + + // make the workload and check it + auto workload = MakeAndCheckWorkload<LstmWorkload>(*layer, graph, factory); + LstmQueueDescriptor queueDescriptor = workload->GetData(); + BOOST_TEST(queueDescriptor.m_Parameters.m_ActivationFunc == 4); + BOOST_TEST(queueDescriptor.m_Parameters.m_ClippingThresCell == 0.0f); + BOOST_TEST(queueDescriptor.m_Parameters.m_ClippingThresProj == 0.0f); + BOOST_TEST(queueDescriptor.m_Inputs.size() == 3); + BOOST_TEST(queueDescriptor.m_Outputs.size() == 4); + + BOOST_TEST((queueDescriptor.m_InputToForgetWeights->GetTensorInfo() == TensorInfo({ numUnits, inputSize }, + DataType::Float32))); + BOOST_TEST((queueDescriptor.m_OutputGateBias->GetTensorInfo() == TensorInfo({ numUnits }, + DataType::Float32))); + BOOST_TEST((queueDescriptor.m_CellBias->GetTensorInfo() == TensorInfo({ numUnits }, DataType::Float32))); + return workload; +} + +template <typename Convolution2dWorkload, armnn::DataType DataType> std::unique_ptr<Convolution2dWorkload> CreateDirectConvolution2dWorkloadTest(armnn::IWorkloadFactory& factory, armnn::Graph& graph) { - // create the layer we're testing + // Creates the layer we're testing. Convolution2dDescriptor layerDesc; layerDesc.m_PadLeft = 1; layerDesc.m_PadRight = 1; @@ -243,26 +345,25 @@ std::unique_ptr<Convolution2dWorkload> CreateDirectConvolution2dWorkloadTest(arm Convolution2dLayer* const layer = graph.AddLayer<Convolution2dLayer>(layerDesc, "layer"); - float inputsQScale = Convolution2dWorkload::ms_DataType == DataType::QuantisedAsymm8 ? 1.0f : 0.0; - float outputQScale = Convolution2dWorkload::ms_DataType == DataType::QuantisedAsymm8 ? 2.0f : 0.0; + float inputsQScale = DataType == armnn::DataType::QuantisedAsymm8 ? 1.0f : 0.0; + float outputQScale = DataType == armnn::DataType::QuantisedAsymm8 ? 2.0f : 0.0; - layer->m_Weight = std::make_unique<ScopedCpuTensorHandle>(TensorInfo({ 2, 3, 3, 3 }, - Convolution2dWorkload::ms_DataType, inputsQScale)); + layer->m_Weight = std::make_unique<ScopedCpuTensorHandle>(TensorInfo({ 2, 3, 3, 3 }, DataType, inputsQScale)); layer->m_Bias = std::make_unique<ScopedCpuTensorHandle> - (TensorInfo({2}, GetBiasDataType(Convolution2dWorkload::ms_DataType), inputsQScale)); + (TensorInfo({2}, GetBiasDataType(DataType), inputsQScale)); layer->m_Weight->Allocate(); layer->m_Bias->Allocate(); - // create extra layers + // Creates extra layers. Layer* const input = graph.AddLayer<InputLayer>(0, "input"); Layer* const output = graph.AddLayer<OutputLayer>(0, "output"); - // connect up - Connect(input, layer, TensorInfo({2, 3, 6, 6}, Convolution2dWorkload::ms_DataType, inputsQScale)); - Connect(layer, output, TensorInfo({2, 2, 6, 6}, Convolution2dWorkload::ms_DataType, outputQScale)); + // Connects up. + Connect(input, layer, TensorInfo({2, 3, 6, 6}, DataType, inputsQScale)); + Connect(layer, output, TensorInfo({2, 2, 6, 6}, DataType, outputQScale)); CreateTensorHandles(graph, factory); - // make the workload and check it + // Makes the workload and checks it. auto workload = MakeAndCheckWorkload<Convolution2dWorkload>(*layer, graph, factory); Convolution2dQueueDescriptor queueDescriptor = workload->GetData(); @@ -277,11 +378,11 @@ std::unique_ptr<Convolution2dWorkload> CreateDirectConvolution2dWorkloadTest(arm BOOST_TEST(queueDescriptor.m_Inputs.size() == 1); BOOST_TEST(queueDescriptor.m_Outputs.size() == 1); BOOST_TEST((queueDescriptor.m_Weight->GetTensorInfo() == TensorInfo({2, 3, 3, 3}, - Convolution2dWorkload::ms_DataType, inputsQScale))); + DataType, inputsQScale))); BOOST_TEST((queueDescriptor.m_Bias->GetTensorInfo() - == TensorInfo({2}, GetBiasDataType(Convolution2dWorkload::ms_DataType), inputsQScale))); + == TensorInfo({2}, GetBiasDataType(DataType), inputsQScale))); - // return so we can do extra, backend-specific tests + // Returns so we can do extra, backend-specific tests. return workload; } @@ -289,7 +390,7 @@ template <typename DepthwiseConvolution2dFloat32Workload> std::unique_ptr<DepthwiseConvolution2dFloat32Workload> CreateDepthwiseConvolution2dWorkloadTest( armnn::IWorkloadFactory& factory, armnn::Graph& graph) { - // create the layer we're testing + // Creates the layer we're testing. DepthwiseConvolution2dDescriptor layerDesc; layerDesc.m_PadLeft = 3; layerDesc.m_PadRight = 3; @@ -306,16 +407,16 @@ std::unique_ptr<DepthwiseConvolution2dFloat32Workload> CreateDepthwiseConvolutio layer->m_Weight->Allocate(); layer->m_Bias->Allocate(); - // create extra layers + // Creates extra layers. Layer* const input = graph.AddLayer<InputLayer>(0, "input"); Layer* const output = graph.AddLayer<OutputLayer>(0, "output"); - // connect up + // Connects up. Connect(input, layer, TensorInfo({2, 3, 8, 16}, armnn::DataType::Float32)); Connect(layer, output, TensorInfo({2, 9, 2, 10}, armnn::DataType::Float32)); CreateTensorHandles(graph, factory); - // make the workload and check it + // Makes the workload and checks it. auto workload = MakeAndCheckWorkload<DepthwiseConvolution2dFloat32Workload>(*layer, graph, factory); DepthwiseConvolution2dQueueDescriptor queueDescriptor = workload->GetData(); @@ -332,41 +433,39 @@ std::unique_ptr<DepthwiseConvolution2dFloat32Workload> CreateDepthwiseConvolutio BOOST_TEST((queueDescriptor.m_Weight->GetTensorInfo() == TensorInfo({3, 3, 5, 3}, DataType::Float32))); BOOST_TEST((queueDescriptor.m_Bias->GetTensorInfo() == TensorInfo({9}, DataType::Float32))); - // return so we can do extra, backend-specific tests + // Returns so we can do extra, backend-specific tests. return workload; } -template <typename FullyConnectedWorkload> +template <typename FullyConnectedWorkload, armnn::DataType DataType> std::unique_ptr<FullyConnectedWorkload> CreateFullyConnectedWorkloadTest(armnn::IWorkloadFactory& factory, armnn::Graph& graph) { - // create the layer we're testing + // Creates the layer we're testing. FullyConnectedDescriptor layerDesc; layerDesc.m_BiasEnabled = true; layerDesc.m_TransposeWeightMatrix = true; FullyConnectedLayer* const layer = graph.AddLayer<FullyConnectedLayer>(layerDesc, "layer"); - float inputsQScale = FullyConnectedWorkload::ms_DataType == DataType::QuantisedAsymm8 ? 1.0f : 0.0; - float outputQScale = FullyConnectedWorkload::ms_DataType == DataType::QuantisedAsymm8 ? 2.0f : 0.0; + float inputsQScale = DataType == armnn::DataType::QuantisedAsymm8 ? 1.0f : 0.0; + float outputQScale = DataType == armnn::DataType::QuantisedAsymm8 ? 2.0f : 0.0; - layer->m_Weight = std::make_unique<ScopedCpuTensorHandle>(TensorInfo({7, 20}, - FullyConnectedWorkload::ms_DataType, inputsQScale, 0)); - layer->m_Bias = std::make_unique<ScopedCpuTensorHandle>(TensorInfo({7}, - GetBiasDataType(FullyConnectedWorkload::ms_DataType), inputsQScale)); + layer->m_Weight = std::make_unique<ScopedCpuTensorHandle>(TensorInfo({7, 20}, DataType, inputsQScale, 0)); + layer->m_Bias = std::make_unique<ScopedCpuTensorHandle>(TensorInfo({7}, GetBiasDataType(DataType), inputsQScale)); layer->m_Weight->Allocate(); layer->m_Bias->Allocate(); - // create extra layers + // Creates extra layers. Layer* const input = graph.AddLayer<InputLayer>(0, "input"); Layer* const output = graph.AddLayer<OutputLayer>(0, "output"); - // connect up - Connect(input, layer, TensorInfo({3, 1, 4, 5}, FullyConnectedWorkload::ms_DataType, inputsQScale)); - Connect(layer, output, TensorInfo({3, 7}, FullyConnectedWorkload::ms_DataType, outputQScale)); + // Connects up. + Connect(input, layer, TensorInfo({3, 1, 4, 5}, DataType, inputsQScale)); + Connect(layer, output, TensorInfo({3, 7}, DataType, outputQScale)); CreateTensorHandles(graph, factory); - // make the workload and check it + // Makes the workload and checks it. auto workload = MakeAndCheckWorkload<FullyConnectedWorkload>(*layer, graph, factory); FullyConnectedQueueDescriptor queueDescriptor = workload->GetData(); @@ -375,50 +474,48 @@ std::unique_ptr<FullyConnectedWorkload> CreateFullyConnectedWorkloadTest(armnn:: BOOST_TEST(queueDescriptor.m_Inputs.size() == 1); BOOST_TEST(queueDescriptor.m_Outputs.size() == 1); - BOOST_TEST((queueDescriptor.m_Weight->GetTensorInfo() == - TensorInfo({7, 20}, FullyConnectedWorkload::ms_DataType, inputsQScale))); - BOOST_TEST((queueDescriptor.m_Bias->GetTensorInfo() == - TensorInfo({7}, GetBiasDataType(FullyConnectedWorkload::ms_DataType), inputsQScale))); + BOOST_TEST((queueDescriptor.m_Weight->GetTensorInfo() == TensorInfo({7, 20}, DataType, inputsQScale))); + BOOST_TEST((queueDescriptor.m_Bias->GetTensorInfo() == TensorInfo({7}, GetBiasDataType(DataType), inputsQScale))); - // return so we can do extra, backend-specific tests + // Returns so we can do extra, backend-specific tests. return workload; } -template <typename MultiplicationWorkload> +template <typename MultiplicationWorkload, armnn::DataType DataType> std::unique_ptr<MultiplicationWorkload> CreateMultiplicationWorkloadTest(armnn::IWorkloadFactory& factory, armnn::Graph& graph) { - // create the layer we're testing + // Creates the layer we're testing. Layer* const layer = graph.AddLayer<MultiplicationLayer>("layer"); - // create extra layers + // Creates extra layers. Layer* const input1 = graph.AddLayer<InputLayer>(1, "input1"); Layer* const input2 = graph.AddLayer<InputLayer>(2, "input2"); Layer* const output = graph.AddLayer<OutputLayer>(0, "output"); - // connect up - armnn::TensorInfo tensorInfo({2, 3}, MultiplicationWorkload::ms_DataType); + // Connects up. + armnn::TensorInfo tensorInfo({2, 3}, DataType); Connect(input1, layer, tensorInfo, 0, 0); Connect(input2, layer, tensorInfo, 0, 1); Connect(layer, output, tensorInfo); CreateTensorHandles(graph, factory); - // make the workload and check it + // Makes the workload and checks it. auto workload = MakeAndCheckWorkload<MultiplicationWorkload>(*layer, graph, factory); MultiplicationQueueDescriptor queueDescriptor = workload->GetData(); BOOST_TEST(queueDescriptor.m_Inputs.size() == 2); BOOST_TEST(queueDescriptor.m_Outputs.size() == 1); - // return so we can do extra, backend-specific tests + // Returns so we can do extra, backend-specific tests. return workload; } -template <typename NormalizationFloat32Workload> +template <typename NormalizationFloat32Workload, armnn::DataType DataType> std::unique_ptr<NormalizationFloat32Workload> CreateNormalizationWorkloadTest(armnn::IWorkloadFactory& factory, armnn::Graph& graph) { - // create the layer we're testing + // Creates the layer we're testing. NormalizationDescriptor layerDesc; layerDesc.m_NormChannelType = NormalizationAlgorithmChannel::Across; layerDesc.m_NormMethodType = NormalizationAlgorithmMethod::LocalBrightness; @@ -429,16 +526,16 @@ std::unique_ptr<NormalizationFloat32Workload> CreateNormalizationWorkloadTest(ar NormalizationLayer* layer = graph.AddLayer<NormalizationLayer>(layerDesc, "layer"); - // create extra layers + // Creatse extra layers. Layer* const input = graph.AddLayer<InputLayer>(0, "input"); Layer* const output = graph.AddLayer<OutputLayer>(0, "output"); - // connect up - Connect(input, layer, TensorInfo({3, 5, 5, 1}, armnn::DataType::Float32)); - Connect(layer, output, TensorInfo({3, 5, 5, 1}, armnn::DataType::Float32)); + // Connects up. + Connect(input, layer, TensorInfo({3, 5, 5, 1}, DataType)); + Connect(layer, output, TensorInfo({3, 5, 5, 1}, DataType)); CreateTensorHandles(graph, factory); - // make the workload and check it + // Makes the workload and checks it. auto workload = MakeAndCheckWorkload<NormalizationFloat32Workload>(*layer, graph, factory); NormalizationQueueDescriptor queueDescriptor = workload->GetData(); @@ -452,15 +549,15 @@ std::unique_ptr<NormalizationFloat32Workload> CreateNormalizationWorkloadTest(ar BOOST_TEST(queueDescriptor.m_Inputs.size() == 1); BOOST_TEST(queueDescriptor.m_Outputs.size() == 1); - // return so we can do extra, backend-specific tests + // Returns so we can do extra, backend-specific tests. return workload; } -template <typename Pooling2dWorkload> +template <typename Pooling2dWorkload, armnn::DataType DataType> std::unique_ptr<Pooling2dWorkload> CreatePooling2dWorkloadTest(armnn::IWorkloadFactory& factory, armnn::Graph& graph) { - // create the layer we're testing + // Creates the layer we're testing. Pooling2dDescriptor layerDesc; layerDesc.m_PoolType = PoolingAlgorithm::Average; layerDesc.m_PoolWidth = 3; @@ -475,16 +572,16 @@ std::unique_ptr<Pooling2dWorkload> CreatePooling2dWorkloadTest(armnn::IWorkloadF Pooling2dLayer* const layer = graph.AddLayer<Pooling2dLayer>(layerDesc, "layer"); - // create extra layers + // Create extra layers Layer* const input = graph.AddLayer<InputLayer>(0, "input"); Layer* const output = graph.AddLayer<OutputLayer>(0, "output"); - // connect up - Connect(input, layer, TensorInfo({3, 2, 5, 5}, Pooling2dWorkload::ms_DataType)); - Connect(layer, output, TensorInfo({3, 2, 2, 4}, Pooling2dWorkload::ms_DataType)); + // Connect up + Connect(input, layer, TensorInfo({3, 2, 5, 5}, DataType)); + Connect(layer, output, TensorInfo({3, 2, 2, 4}, DataType)); CreateTensorHandles(graph, factory); - // make the workload and check it + // Make the workload and checks it auto workload = MakeAndCheckWorkload<Pooling2dWorkload>(*layer, graph, factory); Pooling2dQueueDescriptor queueDescriptor = workload->GetData(); @@ -502,70 +599,70 @@ std::unique_ptr<Pooling2dWorkload> CreatePooling2dWorkloadTest(armnn::IWorkloadF BOOST_TEST(queueDescriptor.m_Inputs.size() == 1); BOOST_TEST(queueDescriptor.m_Outputs.size() == 1); - // return so we can do extra, backend-specific tests + // Return so we can do extra, backend-specific tests return workload; } -template <typename SoftmaxWorkload> +template <typename SoftmaxWorkload, armnn::DataType DataType> std::unique_ptr<SoftmaxWorkload> CreateSoftmaxWorkloadTest(armnn::IWorkloadFactory& factory, armnn::Graph& graph) { - // create the layer we're testing + // Create the layer we're testing. SoftmaxDescriptor softmaxDescriptor; Layer* const layer = graph.AddLayer<SoftmaxLayer>(softmaxDescriptor, "layer"); - // create extra layers + // Create extra layers. Layer* const input = graph.AddLayer<InputLayer>(0, "input"); Layer* const output = graph.AddLayer<OutputLayer>(0, "output"); - // connect up - armnn::TensorInfo tensorInfo({4, 1}, SoftmaxWorkload::ms_DataType); + // Connect up + armnn::TensorInfo tensorInfo({4, 1}, DataType); Connect(input, layer, tensorInfo); Connect(layer, output, tensorInfo); CreateTensorHandles(graph, factory); - // make the workload and check it + // Make the workload and checks it. auto workload = MakeAndCheckWorkload<SoftmaxWorkload>(*layer, graph, factory); SoftmaxQueueDescriptor queueDescriptor = workload->GetData(); BOOST_TEST(queueDescriptor.m_Inputs.size() == 1); BOOST_TEST(queueDescriptor.m_Outputs.size() == 1); - // return so we can do extra, backend-specific tests + // Return so we can do extra, backend-specific tests. return workload; } -template<typename SplitterWorkload> +template<typename SplitterWorkload, armnn::DataType DataType> std::unique_ptr<SplitterWorkload> CreateSplitterWorkloadTest(armnn::IWorkloadFactory& factory, armnn::Graph& graph) { - // create the layer we're testing + // Create the layer we're testing. // NOTE: need three dimensions channels, height/y, width/x because the Compute // library restricts subtensors to have the same x and y dimensions as // their parent tensors, and therefore the origin on the x and y dimension // has to be zero for any view. So we need a third dimension to split... - // NOTE: arguments are: number of views, number of dimensions + // NOTE: arguments are: number of views, number of dimensions. ViewsDescriptor layerDesc(3, 3); - // NOTE: arguments are: view, dimension, value + // NOTE: arguments are: view, dimension, value. layerDesc.SetViewOriginCoord(0, 0, 0); layerDesc.SetViewOriginCoord(1, 0, 1); layerDesc.SetViewOriginCoord(2, 0, 3); Layer* const layer = graph.AddLayer<SplitterLayer>(layerDesc, "layer"); - // add extra layers + // Adds extra layers. Layer* const input = graph.AddLayer<InputLayer>(0, "input"); Layer* const output0 = graph.AddLayer<OutputLayer>(0, "output0"); Layer* const output1 = graph.AddLayer<OutputLayer>(1, "output1"); Layer* const output2 = graph.AddLayer<OutputLayer>(2, "output2"); - // connect up - armnn::TensorInfo tensorInfo({5, 7, 7}, SplitterWorkload::ms_DataType); + // Connects up. + armnn::TensorInfo tensorInfo({5, 7, 7}, DataType); Connect(input, layer, tensorInfo); - armnn::TensorInfo output0Info({1, 7, 7}, SplitterWorkload::ms_DataType); - armnn::TensorInfo output1Info({2, 7, 7}, SplitterWorkload::ms_DataType); - armnn::TensorInfo output2Info({2, 7, 7}, SplitterWorkload::ms_DataType); + armnn::TensorInfo output0Info({1, 7, 7}, DataType); + armnn::TensorInfo output1Info({2, 7, 7}, DataType); + armnn::TensorInfo output2Info({2, 7, 7}, DataType); Connect(layer, output0, output0Info, 0, 0); Connect(layer, output1, output1Info, 1, 0); @@ -573,7 +670,7 @@ std::unique_ptr<SplitterWorkload> CreateTensorHandles(graph, factory); - // make the workload and check it + // Makes the workload and checks it. auto workload = MakeAndCheckWorkload<SplitterWorkload>(*layer, graph, factory); SplitterQueueDescriptor queueDescriptor = workload->GetData(); @@ -591,24 +688,21 @@ std::unique_ptr<SplitterWorkload> BOOST_TEST(queueDescriptor.m_ViewOrigins[1].m_Origin[2] == 0); BOOST_TEST(queueDescriptor.m_ViewOrigins[2].m_Origin[2] == 0); - // return so we can do extra, backend-specific tests + // Returns so we can do extra, backend-specific tests. return workload; } -/// This function constructs a graph with both a splitter and a merger, and returns a pair of the workloads -template<typename SplitterWorkload, typename MergerWorkload> +/// This function constructs a graph with both a splitter and a merger, and returns a pair of the workloads. +template<typename SplitterWorkload, typename MergerWorkload, armnn::DataType DataType> std::pair<std::unique_ptr<SplitterWorkload>, std::unique_ptr<MergerWorkload>> CreateSplitterMergerWorkloadTest(armnn::IWorkloadFactory& factory, armnn::Graph& graph) { - static_assert(SplitterWorkload::ms_DataType == MergerWorkload::ms_DataType, - "Splitter and merger workloads must have the same data type"); + armnn::TensorInfo inputTensorInfo({ 1, 2, 100, 10 }, DataType); - armnn::TensorInfo inputTensorInfo({ 1, 2, 100, 10 }, SplitterWorkload::ms_DataType); + armnn::TensorInfo splitTensorInfo1({ 1, 1, 100, 10 }, DataType); + armnn::TensorInfo splitTensorInfo2({ 1, 1, 100, 10 }, DataType); - armnn::TensorInfo splitTensorInfo1({ 1, 1, 100, 10 }, SplitterWorkload::ms_DataType); - armnn::TensorInfo splitTensorInfo2({ 1, 1, 100, 10 }, SplitterWorkload::ms_DataType); - - //construct the graph + //Constructs the graph. Layer* const input = graph.AddLayer<InputLayer>(0, "input"); armnn::ViewsDescriptor splitterViews(2); @@ -641,12 +735,12 @@ std::pair<std::unique_ptr<SplitterWorkload>, std::unique_ptr<MergerWorkload>> Layer* const output = graph.AddLayer<OutputLayer>(0, "output"); - // add connections + // Adds connections. Connect(input, splitter, inputTensorInfo, 0, 0); BOOST_TEST_CHECKPOINT("connect input to splitter"); - Connect(splitter, merger, splitTensorInfo1, 0, 1); // The splitter & merger are connected up + Connect(splitter, merger, splitTensorInfo1, 0, 1); // The splitter & merger are connected up. BOOST_TEST_CHECKPOINT("connect splitter[0] to merger[1]"); - Connect(splitter, merger, splitTensorInfo2, 1, 0); // so that the outputs are flipped round + Connect(splitter, merger, splitTensorInfo2, 1, 0); // So that the outputs are flipped round. BOOST_TEST_CHECKPOINT("connect splitter[1] to merger[0]"); Connect(merger, output, inputTensorInfo, 0, 0); BOOST_TEST_CHECKPOINT("connect merger to output"); @@ -665,7 +759,7 @@ std::pair<std::unique_ptr<SplitterWorkload>, std::unique_ptr<MergerWorkload>> /// This function constructs a graph with a splitter with two outputs. Each of the outputs is then /// connected to two different activation layers -template<typename SplitterWorkload, typename ActivationWorkload> +template<typename SplitterWorkload, typename ActivationWorkload, armnn::DataType DataType> void CreateSplitterMultipleInputsOneOutputWorkloadTest(armnn::IWorkloadFactory& factory, armnn::Graph& graph, std::unique_ptr<SplitterWorkload>& wlSplitter, std::unique_ptr<ActivationWorkload>& wlActiv0_0, @@ -673,14 +767,11 @@ void CreateSplitterMultipleInputsOneOutputWorkloadTest(armnn::IWorkloadFactory& std::unique_ptr<ActivationWorkload>& wlActiv1_0, std::unique_ptr<ActivationWorkload>& wlActiv1_1) { - static_assert(SplitterWorkload::ms_DataType == ActivationWorkload::ms_DataType, - "Splitter and activation workloads must have the same data type"); - - armnn::TensorInfo inputTensorInfo ({ 1, 3, 100, 50 }, SplitterWorkload::ms_DataType); - armnn::TensorInfo splitTensorInfo1({ 1, 1, 100, 50 }, SplitterWorkload::ms_DataType); - armnn::TensorInfo splitTensorInfo2({ 1, 2, 100, 50 }, SplitterWorkload::ms_DataType); + armnn::TensorInfo inputTensorInfo ({ 1, 3, 100, 50 }, DataType); + armnn::TensorInfo splitTensorInfo1({ 1, 1, 100, 50 }, DataType); + armnn::TensorInfo splitTensorInfo2({ 1, 2, 100, 50 }, DataType); - //construct the graph + //Constructs the graph. Layer* const input = graph.AddLayer<InputLayer>(0, "input"); armnn::ViewsDescriptor splitterViews(2); @@ -709,7 +800,7 @@ void CreateSplitterMultipleInputsOneOutputWorkloadTest(armnn::IWorkloadFactory& Layer* const output3 = graph.AddLayer<OutputLayer>(3, "output3"); Layer* const output4 = graph.AddLayer<OutputLayer>(4, "output4"); - // add connections + // Adds connections. Connect(input, splitter, inputTensorInfo, 0, 0); Connect(splitter, activ0_0, splitTensorInfo1, 0, 0); Connect(splitter, activ0_1, splitTensorInfo1, 0, 0); @@ -737,97 +828,155 @@ void CreateSplitterMultipleInputsOneOutputWorkloadTest(armnn::IWorkloadFactory& wlActiv1_1 = std::move(workloadActiv1_1); } -template <typename ResizeBilinearWorkload> +template <typename ResizeBilinearWorkload, armnn::DataType DataType> std::unique_ptr<ResizeBilinearWorkload> CreateResizeBilinearWorkloadTest(armnn::IWorkloadFactory& factory, armnn::Graph& graph) { - // create the layer we're testing + // Creates the layer we're testing. TensorShape outputShape({ 2, 3, 2, 2 }); ResizeBilinearDescriptor resizeDesc; resizeDesc.m_TargetWidth = outputShape[3]; resizeDesc.m_TargetHeight = outputShape[2]; Layer* const layer = graph.AddLayer<ResizeBilinearLayer>(resizeDesc, "layer"); - // create extra layers + // Creates extra layers. Layer* const input = graph.AddLayer<InputLayer>(0, "input"); Layer* const output = graph.AddLayer<OutputLayer>(0, "output"); - // connect up - armnn::TensorInfo inputTensorInfo({ 2, 3, 4, 4 }, ResizeBilinearWorkload::ms_DataType); - armnn::TensorInfo outputTensorInfo(outputShape, ResizeBilinearWorkload::ms_DataType); + // Connects up. + armnn::TensorInfo inputTensorInfo({ 2, 3, 4, 4 }, DataType); + armnn::TensorInfo outputTensorInfo(outputShape, DataType); Connect(input, layer, inputTensorInfo); Connect(layer, output, outputTensorInfo); CreateTensorHandles(graph, factory); - // make the workload and check it + // Makes the workload and checks it. auto workload = MakeAndCheckWorkload<ResizeBilinearWorkload>(*layer, graph, factory); ResizeBilinearQueueDescriptor queueDescriptor = workload->GetData(); BOOST_TEST(queueDescriptor.m_Inputs.size() == 1); BOOST_TEST(queueDescriptor.m_Outputs.size() == 1); - // return so we can do extra, backend-specific tests + // Returns so we can do extra, backend-specific tests. return workload; } -template <typename L2NormalizationWorkload> +template <typename L2NormalizationWorkload, armnn::DataType DataType> std::unique_ptr<L2NormalizationWorkload> CreateL2NormalizationWorkloadTest(armnn::IWorkloadFactory& factory, armnn::Graph& graph) { - // create the layer we're testing + // Creates the layer we're testing. Layer* const layer = graph.AddLayer<L2NormalizationLayer>("l2norm"); - // create extra layers + // Creates extra layers. Layer* const input = graph.AddLayer<InputLayer>(0, "input"); Layer* const output = graph.AddLayer<OutputLayer>(0, "output"); - // connect up - armnn::TensorInfo inputTensorInfo({ 5, 20, 50, 67 }, L2NormalizationWorkload::ms_DataType); - armnn::TensorInfo outputTensorInfo({ 5, 20, 50, 67 }, L2NormalizationWorkload::ms_DataType); + // Connects up. + armnn::TensorInfo inputTensorInfo({ 5, 20, 50, 67 }, DataType); + armnn::TensorInfo outputTensorInfo({ 5, 20, 50, 67 }, DataType); Connect(input, layer, inputTensorInfo); Connect(layer, output, outputTensorInfo); CreateTensorHandles(graph, factory); - // make the workload and check it + // Makes the workload and checks it. auto workload = MakeAndCheckWorkload<L2NormalizationWorkload>(*layer, graph, factory); L2NormalizationQueueDescriptor queueDescriptor = workload->GetData(); BOOST_TEST(queueDescriptor.m_Inputs.size() == 1); BOOST_TEST(queueDescriptor.m_Outputs.size() == 1); - // return so we can do extra, backend-specific tests + // Returns so we can do extra, backend-specific tests. return workload; } -template <typename ReshapeWorkload> +template <typename ReshapeWorkload, armnn::DataType DataType> std::unique_ptr<ReshapeWorkload> CreateReshapeWorkloadTest(armnn::IWorkloadFactory& factory, armnn::Graph& graph) { - // create the layer we're testing + // Creates the layer we're testing. TensorShape outputShape({ 1, 4 }); ReshapeDescriptor reshapeDesc; reshapeDesc.m_TargetShape = outputShape; Layer* const layer = graph.AddLayer<ReshapeLayer>(reshapeDesc, "layer"); - // create extra layers + // Creates extra layers. Layer* const input = graph.AddLayer<InputLayer>(0, "input"); Layer* const output = graph.AddLayer<OutputLayer>(0, "output"); - // connect up - armnn::TensorInfo inputTensorInfo({ 4, 1 }, ReshapeWorkload::ms_DataType); - armnn::TensorInfo outputTensorInfo(outputShape, ReshapeWorkload::ms_DataType); + // Connects up. + armnn::TensorInfo inputTensorInfo({ 4, 1 }, DataType); + armnn::TensorInfo outputTensorInfo(outputShape, DataType); Connect(input, layer, inputTensorInfo); Connect(layer, output, outputTensorInfo); CreateTensorHandles(graph, factory); - // make the workload and check it + // Makes the workload and checks it. auto workload = MakeAndCheckWorkload<ReshapeWorkload>(*layer, graph, factory); ReshapeQueueDescriptor queueDescriptor = workload->GetData(); BOOST_TEST(queueDescriptor.m_Inputs.size() == 1); BOOST_TEST(queueDescriptor.m_Outputs.size() == 1); - // return so we can do extra, backend-specific tests + // Returns so we can do extra, backend-specific tests. + return workload; +} + +template <typename ConvertFp16ToFp32Float32Workload> +std::unique_ptr<ConvertFp16ToFp32Float32Workload> CreateConvertFp16ToFp32WorkloadTest( + armnn::IWorkloadFactory& factory, armnn::Graph& graph) +{ + // Creates the layer we're testing. + ConvertFp16ToFp32Layer* const layer = graph.AddLayer<ConvertFp16ToFp32Layer>("Fp16ToFp32Converter"); + + // Creates extra layers. + Layer* const input = graph.AddLayer<InputLayer>(0, "input"); + Layer* const output = graph.AddLayer<OutputLayer>(0, "output"); + + // Connects up. + armnn::TensorInfo inputTensorInfo({1, 3, 2, 3}, armnn::DataType::Float16); + armnn::TensorInfo outputTensorInfo({1, 3, 2, 3}, armnn::DataType::Float32); + Connect(input, layer, inputTensorInfo); + Connect(layer, output, outputTensorInfo); + CreateTensorHandles(graph, factory); + + // Makes the workload and checks it. + auto workload = MakeAndCheckWorkload<ConvertFp16ToFp32Float32Workload>(*layer, graph, factory); + + ConvertFp16ToFp32QueueDescriptor queueDescriptor = workload->GetData(); + BOOST_TEST(queueDescriptor.m_Inputs.size() == 1); + BOOST_TEST(queueDescriptor.m_Outputs.size() == 1); + + // Returns so we can do extra, backend-specific tests. + return workload; +} + +template <typename ConvertFp32ToFp16Float16Workload> +std::unique_ptr<ConvertFp32ToFp16Float16Workload> CreateConvertFp32ToFp16WorkloadTest( + armnn::IWorkloadFactory& factory, armnn::Graph& graph) +{ + // Creates the layer we're testing. + ConvertFp32ToFp16Layer* const layer = graph.AddLayer<ConvertFp32ToFp16Layer>("Fp32ToFp16Converter"); + + // Creates extra layers. + Layer* const input = graph.AddLayer<InputLayer>(0, "input"); + Layer* const output = graph.AddLayer<OutputLayer>(0, "output"); + + // Connects up. + armnn::TensorInfo inputTensorInfo({1, 3, 2, 3}, armnn::DataType::Float32); + armnn::TensorInfo outputTensorInfo({1, 3, 2, 3}, armnn::DataType::Float16); + Connect(input, layer, inputTensorInfo); + Connect(layer, output, outputTensorInfo); + CreateTensorHandles(graph, factory); + + // Makes the workload and checks it. + auto workload = MakeAndCheckWorkload<ConvertFp32ToFp16Float16Workload>(*layer, graph, factory); + + ConvertFp32ToFp16QueueDescriptor queueDescriptor = workload->GetData(); + BOOST_TEST(queueDescriptor.m_Inputs.size() == 1); + BOOST_TEST(queueDescriptor.m_Outputs.size() == 1); + + // Returns so we can do extra, backend-specific tests. return workload; } diff --git a/src/armnn/test/CreateWorkloadClNeon.hpp b/src/armnn/test/CreateWorkloadClNeon.hpp index a41a70755f..d92111ac41 100644 --- a/src/armnn/test/CreateWorkloadClNeon.hpp +++ b/src/armnn/test/CreateWorkloadClNeon.hpp @@ -56,22 +56,21 @@ boost::test_tools::predicate_result CompareTensorHandleShape(IComputeTensorHandl return true; } -template<template <DataType> class CopyFromCpuWorkload, template <DataType> class CopyToCpuWorkload, - typename IComputeTensorHandle> +template<typename IComputeTensorHandle> void CreateMemCopyWorkloads(IWorkloadFactory& factory) { Graph graph; RefWorkloadFactory refFactory; - // create the layers we're testing + // Creates the layers we're testing. Layer* const layer1 = graph.AddLayer<MemCopyLayer>("layer1"); Layer* const layer2 = graph.AddLayer<MemCopyLayer>("layer2"); - // create extra layers + // Creates extra layers. Layer* const input = graph.AddLayer<InputLayer>(0, "input"); Layer* const output = graph.AddLayer<OutputLayer>(0, "output"); - // connect up + // Connects up. TensorInfo tensorInfo({2, 3}, DataType::Float32); Connect(input, layer1, tensorInfo); Connect(layer1, layer2, tensorInfo); @@ -83,8 +82,8 @@ void CreateMemCopyWorkloads(IWorkloadFactory& factory) output->CreateTensorHandles(graph, refFactory); // make the workloads and check them - auto workload1 = MakeAndCheckWorkload<CopyFromCpuWorkload<DataType::Float32>>(*layer1, graph, factory); - auto workload2 = MakeAndCheckWorkload<CopyToCpuWorkload<DataType::Float32>>(*layer2, graph, refFactory); + auto workload1 = MakeAndCheckWorkload<CopyMemGenericWorkload>(*layer1, graph, factory); + auto workload2 = MakeAndCheckWorkload<CopyMemGenericWorkload>(*layer2, graph, refFactory); MemCopyQueueDescriptor queueDescriptor1 = workload1->GetData(); BOOST_TEST(queueDescriptor1.m_Inputs.size() == 1); @@ -104,4 +103,4 @@ void CreateMemCopyWorkloads(IWorkloadFactory& factory) BOOST_TEST((outputHandle2->GetTensorInfo() == TensorInfo({2, 3}, DataType::Float32))); } -}
\ No newline at end of file +} //namespace
\ No newline at end of file diff --git a/src/armnn/test/CsvReaderTest.cpp b/src/armnn/test/CsvReaderTest.cpp new file mode 100644 index 0000000000..8df61e1fdd --- /dev/null +++ b/src/armnn/test/CsvReaderTest.cpp @@ -0,0 +1,124 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// See LICENSE file in the project root for full license information. +// +#include "CsvReader.hpp" + +#include <boost/algorithm/string.hpp> +#include <boost/test/unit_test.hpp> + +#include <iostream> +#include <string> +#include <boost/filesystem.hpp> + +using namespace armnnUtils; + +struct TestHelper { + + TestHelper() + { + BOOST_TEST_MESSAGE("setup fixture"); + } + + ~TestHelper() + { + BOOST_TEST_MESSAGE("teardown fixture"); + TearDown(); + } + + std::string CreateTempCsvFile() + { + std::string fileDir = boost::filesystem::temp_directory_path().c_str(); + boost::filesystem::path p{fileDir + "/sampleFile.csv"}; + try + { + boost::filesystem::ofstream ofs{p}; + ofs << "airplane, bicycle , bird , \"m,o,n,k,e,y\"\n"; + ofs << "banana, shoe, \"ice\""; + ofs.close(); + } catch (std::exception &e) + { + std::cerr << "Unable to write to file at location [" << p.c_str() << "] : " << e.what() << std::endl; + BOOST_TEST(false); + } + return fileDir + "/sampleFile.csv"; + } + + int CheckStringsMatch(CsvRow &row, unsigned int index, std::string expectedValue) + { + return row.values.at(index).compare(expectedValue); + } + + void TearDown() + { + RemoveCsvFile(); + } + + void RemoveCsvFile() + { + std::string fileDir = boost::filesystem::temp_directory_path().c_str(); + std::string filePath = fileDir + "/sampleFile.csv"; + try + { + boost::filesystem::remove(filePath); + } + catch (std::exception &e) + { + std::cerr << "Unable to delete file [" << filePath << "] : " << e.what() << std::endl; + BOOST_TEST(false); + } + } +}; + +BOOST_AUTO_TEST_SUITE(CsvReaderTest) + +BOOST_FIXTURE_TEST_CASE(TestParseVector, TestHelper) +{ + CsvReader reader; + std::vector<std::string> csvStrings; + csvStrings.reserve(2); + csvStrings.push_back("airplane, automobile , bird , \"c,a,t\""); + csvStrings.push_back("banana, shoe, \"ice\""); + + std::vector<CsvRow> row = reader.ParseVector(csvStrings); + CsvRow row1 = row[0]; + CsvRow row2 = row[1]; + + BOOST_CHECK(row.size() == 2); + + BOOST_CHECK(row1.values.size() == 4); + BOOST_CHECK(CheckStringsMatch(row1, 0, "airplane") == 0); + BOOST_CHECK(CheckStringsMatch(row1, 1, "automobile") == 0); + BOOST_CHECK(CheckStringsMatch(row1, 2, "bird") == 0); + BOOST_CHECK(CheckStringsMatch(row1, 3, "c,a,t") == 0); + + BOOST_CHECK(row2.values.size() == 3); + BOOST_CHECK(CheckStringsMatch(row2, 0, "banana") == 0); + BOOST_CHECK(CheckStringsMatch(row2, 1, "shoe") == 0); + BOOST_CHECK(CheckStringsMatch(row2, 2, "ice") == 0); +} + +BOOST_FIXTURE_TEST_CASE(TestLoadingFileFromDisk, TestHelper) +{ + CsvReader reader; + std::string theFilePath = TestHelper::CreateTempCsvFile(); + + std::vector<CsvRow> row = reader.ParseFile(theFilePath); + CsvRow row1 = row[0]; + CsvRow row2 = row[1]; + + BOOST_CHECK(row.size() == 2); + + BOOST_CHECK(row1.values.size() == 4); + BOOST_CHECK(CheckStringsMatch(row1, 0, "airplane") == 0); + BOOST_CHECK(CheckStringsMatch(row1, 1, "bicycle") == 0); + BOOST_CHECK(CheckStringsMatch(row1, 2, "bird") == 0); + BOOST_CHECK(CheckStringsMatch(row1, 3, "m,o,n,k,e,y") == 0); + + BOOST_CHECK(row2.values.size() == 3); + BOOST_CHECK(CheckStringsMatch(row2, 0, "banana") == 0); + BOOST_CHECK(CheckStringsMatch(row2, 1, "shoe") == 0); + BOOST_CHECK(CheckStringsMatch(row2, 2, "ice") == 0); +} + +BOOST_AUTO_TEST_SUITE_END()
\ No newline at end of file diff --git a/src/armnn/test/EndToEndTest.cpp b/src/armnn/test/EndToEndTest.cpp index 5ed84d22d0..4a8a0dfd81 100644 --- a/src/armnn/test/EndToEndTest.cpp +++ b/src/armnn/test/EndToEndTest.cpp @@ -11,6 +11,8 @@ #include "backends/test/QuantizeHelper.hpp" #include <boost/core/ignore_unused.hpp> +#include <set> + BOOST_AUTO_TEST_SUITE(EndToEnd) namespace @@ -47,9 +49,10 @@ BOOST_AUTO_TEST_CASE(Unsigned8) using namespace armnn; // Create runtime in which test will run - armnn::IRuntimePtr runtime(armnn::IRuntime::Create(armnn::Compute::CpuRef)); + armnn::IRuntime::CreationOptions options; + armnn::IRuntimePtr runtime(armnn::IRuntime::Create(options)); - // build up the structure of the network + // Builds up the structure of the network. armnn::INetworkPtr net(INetwork::Create()); IConnectableLayer* input = net->AddInputLayer(0, "input"); @@ -59,7 +62,7 @@ BOOST_AUTO_TEST_CASE(Unsigned8) input->GetOutputSlot(0).Connect(softmax->GetInputSlot(0)); softmax->GetOutputSlot(0).Connect(output->GetInputSlot(0)); - // set the tensors in the network + // Sets the tensors in the network. TensorInfo inputTensorInfo(TensorShape({1, 5}), DataType::QuantisedAsymm8); inputTensorInfo.SetQuantizationOffset(100); inputTensorInfo.SetQuantizationScale(10000.0f); @@ -71,17 +74,18 @@ BOOST_AUTO_TEST_CASE(Unsigned8) softmax->GetOutputSlot(0).SetTensorInfo(outputTensorInfo); // optimize the network - IOptimizedNetworkPtr optNet = Optimize(*net, runtime->GetDeviceSpec()); + std::vector<armnn::Compute> backends = {armnn::Compute::CpuRef}; + IOptimizedNetworkPtr optNet = Optimize(*net, backends, runtime->GetDeviceSpec()); - // load it into the runtime + // Loads it into the runtime. NetworkId netId; auto error = runtime->LoadNetwork(netId, std::move(optNet)); BOOST_TEST(error == Status::Success); - // create structures for input & output + // Creates structures for input & output. std::vector<uint8_t> inputData { - 1, 10, 3, 200, 5 // some inputs - one of which is sufficiently larger than the others to saturate softmax + 1, 10, 3, 200, 5 // Some inputs - one of which is sufficiently larger than the others to saturate softmax. }; std::vector<uint8_t> outputData(5); @@ -94,19 +98,19 @@ BOOST_AUTO_TEST_CASE(Unsigned8) {0, armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), outputData.data())} }; - // do the inference + // Does the inference. runtime->EnqueueWorkload(netId, inputTensors, outputTensors); - // check the results + // Checks the results. BOOST_TEST(outputData[0] == 0); BOOST_TEST(outputData[1] == 0); BOOST_TEST(outputData[2] == 0); - BOOST_TEST(outputData[3] == 255); // softmax has been saturated + BOOST_TEST(outputData[3] == 255); // softmax has been saturated. BOOST_TEST(outputData[4] == 0); } template <typename T> -void ConstantUsageTest(armnn::Compute computeDevice, +void ConstantUsageTest(const std::vector<armnn::Compute>& computeDevice, const armnn::TensorInfo& commonTensorInfo, const std::vector<T>& inputData, const std::vector<T>& constantData, @@ -115,9 +119,10 @@ void ConstantUsageTest(armnn::Compute computeDevice, using namespace armnn; // Create runtime in which test will run - armnn::IRuntimePtr runtime(armnn::IRuntime::Create(computeDevice)); + armnn::IRuntime::CreationOptions options; + armnn::IRuntimePtr runtime(armnn::IRuntime::Create(options)); - // build up the structure of the network + // Builds up the structure of the network. INetworkPtr net(INetwork::Create()); IConnectableLayer* input = net->AddInputLayer(0); @@ -129,19 +134,19 @@ void ConstantUsageTest(armnn::Compute computeDevice, constant->GetOutputSlot(0).Connect(add->GetInputSlot(1)); add->GetOutputSlot(0).Connect(output->GetInputSlot(0)); - // set the tensors in the network + // Sets the tensors in the network. input->GetOutputSlot(0).SetTensorInfo(commonTensorInfo); constant->GetOutputSlot(0).SetTensorInfo(commonTensorInfo); add->GetOutputSlot(0).SetTensorInfo(commonTensorInfo); // optimize the network - IOptimizedNetworkPtr optNet = Optimize(*net, runtime->GetDeviceSpec()); + IOptimizedNetworkPtr optNet = Optimize(*net, computeDevice, runtime->GetDeviceSpec()); - // load it into the runtime + // Loads it into the runtime. NetworkId netId; runtime->LoadNetwork(netId, std::move(optNet)); - // create structures for input & output + // Creates structures for input & output. std::vector<T> outputData(inputData.size()); InputTensors inputTensors @@ -153,26 +158,26 @@ void ConstantUsageTest(armnn::Compute computeDevice, {0, armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), outputData.data())} }; - // do the inference + // Does the inference. runtime->EnqueueWorkload(netId, inputTensors, outputTensors); - // check the results + // Checks the results. BOOST_TEST(outputData == expectedOutputData); } -static void ConstantUsageFloat32Test(armnn::Compute computeDevice) +static void ConstantUsageFloat32Test(const std::vector<armnn::Compute>& computeDevice) { const armnn::TensorInfo commonTensorInfo({ 2, 3 }, armnn::DataType::Float32); ConstantUsageTest(computeDevice, commonTensorInfo, - std::vector<float>{ 1.f, 2.f, 3.f, 4.f, 5.f, 6.f }, // input - std::vector<float>{ 6.f, 5.f, 4.f, 3.f, 2.f, 1.f }, // const input - std::vector<float>{ 7.f, 7.f, 7.f, 7.f, 7.f, 7.f } // expected output + std::vector<float>{ 1.f, 2.f, 3.f, 4.f, 5.f, 6.f }, // Input. + std::vector<float>{ 6.f, 5.f, 4.f, 3.f, 2.f, 1.f }, // Const input. + std::vector<float>{ 7.f, 7.f, 7.f, 7.f, 7.f, 7.f } // Expected output. ); } -static void ConstantUsageUint8Test(armnn::Compute computeDevice) +static void ConstantUsageUint8Test(const std::vector<armnn::Compute>& computeDevice) { armnn::TensorInfo commonTensorInfo({ 2, 3 }, armnn::DataType::QuantisedAsymm8); @@ -184,46 +189,49 @@ static void ConstantUsageUint8Test(armnn::Compute computeDevice) ConstantUsageTest(computeDevice, commonTensorInfo, - QuantizedVector<uint8_t>(scale, offset, { 1.f, 2.f, 3.f, 4.f, 5.f, 6.f }), // input - QuantizedVector<uint8_t>(scale, offset, { 6.f, 5.f, 4.f, 3.f, 2.f, 1.f }), // const input - QuantizedVector<uint8_t>(scale, offset, { 7.f, 7.f, 7.f, 7.f, 7.f, 7.f }) // expected output + QuantizedVector<uint8_t>(scale, offset, { 1.f, 2.f, 3.f, 4.f, 5.f, 6.f }), // Input. + QuantizedVector<uint8_t>(scale, offset, { 6.f, 5.f, 4.f, 3.f, 2.f, 1.f }), // Const input. + QuantizedVector<uint8_t>(scale, offset, { 7.f, 7.f, 7.f, 7.f, 7.f, 7.f }) // Expected output. ); } BOOST_AUTO_TEST_CASE(ConstantUsage_Ref_Float32) { - ConstantUsageFloat32Test(armnn::Compute::CpuRef); + std::vector<armnn::Compute> backends = {armnn::Compute::CpuRef}; + ConstantUsageFloat32Test(backends); } #if ARMCOMPUTENEON_ENABLED BOOST_AUTO_TEST_CASE(ConstantUsage_Neon_Float32) { - ConstantUsageFloat32Test(armnn::Compute::CpuAcc); + ConstantUsageFloat32Test({armnn::Compute::CpuAcc}); } #endif #if ARMCOMPUTECL_ENABLED BOOST_AUTO_TEST_CASE(ConstantUsage_Cl_Float32) { - ConstantUsageFloat32Test(armnn::Compute::GpuAcc); + ConstantUsageFloat32Test({armnn::Compute::GpuAcc}); } #endif BOOST_AUTO_TEST_CASE(ConstantUsage_Ref_Uint8) { - ConstantUsageUint8Test(armnn::Compute::CpuRef); + std::vector<armnn::Compute> backends = {armnn::Compute::CpuRef}; + ConstantUsageUint8Test(backends); } BOOST_AUTO_TEST_CASE(TrivialAdd) { - // This test was designed to match "AddTwo" in android nn/runtime/test/TestTrivialModel.cpp + // This test was designed to match "AddTwo" in android nn/runtime/test/TestTrivialModel.cpp. using namespace armnn; // Create runtime in which test will run - armnn::IRuntimePtr runtime(armnn::IRuntime::Create(armnn::Compute::CpuRef)); + armnn::IRuntime::CreationOptions options; + armnn::IRuntimePtr runtime(armnn::IRuntime::Create(options)); - // build up the structure of the network + // Builds up the structure of the network. armnn::INetworkPtr net(INetwork::Create()); IConnectableLayer* input1 = net->AddInputLayer(0); @@ -235,20 +243,21 @@ BOOST_AUTO_TEST_CASE(TrivialAdd) input2->GetOutputSlot(0).Connect(add->GetInputSlot(1)); add->GetOutputSlot(0).Connect(output->GetInputSlot(0)); - // set the tensors in the network + // Sets the tensors in the network. TensorInfo tensorInfo(TensorShape({3, 4}), DataType::Float32); input1->GetOutputSlot(0).SetTensorInfo(tensorInfo); input2->GetOutputSlot(0).SetTensorInfo(tensorInfo); add->GetOutputSlot(0).SetTensorInfo(tensorInfo); // optimize the network - IOptimizedNetworkPtr optNet = Optimize(*net, runtime->GetDeviceSpec()); + std::vector<armnn::Compute> backends = {armnn::Compute::CpuRef}; + IOptimizedNetworkPtr optNet = Optimize(*net, backends, runtime->GetDeviceSpec()); - // load it into the runtime + // Loads it into the runtime. NetworkId netId; runtime->LoadNetwork(netId, std::move(optNet)); - // create structures for input & output - matching android nn test + // Creates structures for input & output - matching android nn test. std::vector<float> input1Data { 1.f, 2.f, 3.f, 4.f, 5.f, 6.f, 7.f, 8.f, 9.f, 10.f, 11.f, 12.f @@ -269,10 +278,10 @@ BOOST_AUTO_TEST_CASE(TrivialAdd) {0,armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), outputData.data())} }; - // do the inference + // Does the inference. runtime->EnqueueWorkload(netId, inputTensors, outputTensors); - // check the results + // Checks the results BOOST_TEST(outputData[0] == 101); BOOST_TEST(outputData[1] == 202); BOOST_TEST(outputData[2] == 303); @@ -292,9 +301,10 @@ BOOST_AUTO_TEST_CASE(MultipleOutputs) using namespace armnn; // Create runtime in which test will run - armnn::IRuntimePtr runtime(armnn::IRuntime::Create(armnn::Compute::CpuRef)); + armnn::IRuntime::CreationOptions options; + armnn::IRuntimePtr runtime(armnn::IRuntime::Create(options)); - // build up the structure of the network + // Builds up the structure of the network. INetworkPtr net(INetwork::Create()); IConnectableLayer* input = net->AddInputLayer(0); @@ -331,7 +341,7 @@ BOOST_AUTO_TEST_CASE(MultipleOutputs) activation2->GetOutputSlot(0).Connect(output2->GetInputSlot(0)); activation3->GetOutputSlot(0).Connect(output3->GetInputSlot(0)); - // set the tensors in the network + // Sets the tensors in the network. TensorInfo tensorInfo(TensorShape({ 10 }), DataType::Float32); input->GetOutputSlot(0).SetTensorInfo(tensorInfo); activation1->GetOutputSlot(0).SetTensorInfo(tensorInfo); @@ -339,13 +349,14 @@ BOOST_AUTO_TEST_CASE(MultipleOutputs) activation3->GetOutputSlot(0).SetTensorInfo(tensorInfo); // optimize the network - IOptimizedNetworkPtr optNet = Optimize(*net, runtime->GetDeviceSpec()); + std::vector<armnn::Compute> backends = {armnn::Compute::CpuRef}; + IOptimizedNetworkPtr optNet = Optimize(*net, backends, runtime->GetDeviceSpec()); - // load it into the runtime + // Loads it into the runtime. NetworkId netId; runtime->LoadNetwork(netId, std::move(optNet)); - // create structures for input & output + // Creates structures for input & output. const std::vector<float> inputData{ 3.f, 5.f, 2.f, 3.f, 7.f, 0.f, -2.f, -1.f, 3.f, 3.f }; std::vector<float> output1Data(inputData.size()); @@ -363,32 +374,66 @@ BOOST_AUTO_TEST_CASE(MultipleOutputs) {2,armnn::Tensor(runtime->GetOutputTensorInfo(netId, 2), output3Data.data())} }; - // do the inference + // Does the inference. runtime->EnqueueWorkload(netId, inputTensors, outputTensors); - // check the results + // Checks the results. BOOST_TEST(output1Data == std::vector<float>({ 1.f, 1.f, 1.f, 1.f, 1.f, 0.f, -1.f, -1.f, 1.f, 1.f })); // ReLu1 BOOST_TEST(output2Data == std::vector<float>({ 3.f, 5.f, 2.f, 3.f, 6.f, 0.f, 0.f, 0.f, 3.f, 3.f })); // ReLu6 BOOST_TEST(output3Data == std::vector<float>({ 3.f, 5.f, 2.f, 3.f, 5.f, 2.f, 2.f, 2.f, 3.f, 3.f })); // [2, 5] } #if ARMCOMPUTENEON_ENABLED +BOOST_AUTO_TEST_CASE(FallbackToCpuRef) +{ + using namespace armnn; + + // Create runtime in which test will run and allow fallback to CpuRef. + IRuntime::CreationOptions options; + IRuntimePtr runtime(IRuntime::Create(options)); + + // Builds up the structure of the network. + INetworkPtr net(INetwork::Create()); + + IConnectableLayer* input = net->AddInputLayer(0); + + // This layer configuration isn't supported by CpuAcc but we allow fallback to CpuRef so it shoud pass. + NormalizationDescriptor descriptor; + IConnectableLayer* pooling = net->AddNormalizationLayer(descriptor); + + IConnectableLayer* output = net->AddOutputLayer(0); + + input->GetOutputSlot(0).Connect(pooling->GetInputSlot(0)); + pooling->GetOutputSlot(0).Connect(output->GetInputSlot(0)); + + input->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 4, 4 }, DataType::Float32)); + pooling->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 4, 4 }, DataType::Float32)); + + // optimize the network + std::vector<Compute> backends = {Compute::CpuAcc, Compute::CpuRef}; + IOptimizedNetworkPtr optNet = Optimize(*net, backends, runtime->GetDeviceSpec()); + + // Load it into the runtime. It should pass. + NetworkId netId; + BOOST_TEST(runtime->LoadNetwork(netId, std::move(optNet)) == Status::Success); +} +#endif // ARMCOMPUTENEON_ENABLED + BOOST_AUTO_TEST_CASE(ErrorOnLoadNetwork) { using namespace armnn; // Create runtime in which test will run // Note we don't allow falling back to CpuRef if an operation (excluding inputs, outputs, etc.) isn't supported - armnn::IRuntime::CreationOptions options(armnn::Compute::CpuAcc); - options.m_UseCpuRefAsFallback = false; - armnn::IRuntimePtr runtime(armnn::IRuntime::Create(options)); + IRuntime::CreationOptions options; + IRuntimePtr runtime(IRuntime::Create(options)); // build up the structure of the network INetworkPtr net(INetwork::Create()); IConnectableLayer* input = net->AddInputLayer(0); - // This layer configuration isn't supported by CpuAcc and isn't allowed to fall back, so LoadNetwork will fail. + // This layer configuration isn't supported by CpuAcc and isn't allowed to fall back, so Optimize will return null. NormalizationDescriptor descriptor; IConnectableLayer* pooling = net->AddNormalizationLayer(descriptor); @@ -401,12 +446,9 @@ BOOST_AUTO_TEST_CASE(ErrorOnLoadNetwork) pooling->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 4, 4 }, DataType::Float32)); // optimize the network - IOptimizedNetworkPtr optNet = Optimize(*net, runtime->GetDeviceSpec()); - - // Load it into the runtime. It should fail. - NetworkId netId; - BOOST_TEST(runtime->LoadNetwork(netId, std::move(optNet)) == Status::Failure); + std::vector<Compute> backends = {Compute::CpuAcc}; + IOptimizedNetworkPtr optNet = Optimize(*net, backends, runtime->GetDeviceSpec()); + BOOST_CHECK(!optNet); } -#endif // ARMCOMPUTENEON_ENABLED BOOST_AUTO_TEST_SUITE_END() diff --git a/src/armnn/test/FP16SupportTest.cpp b/src/armnn/test/FP16SupportTest.cpp new file mode 100644 index 0000000000..cc3b60369c --- /dev/null +++ b/src/armnn/test/FP16SupportTest.cpp @@ -0,0 +1,114 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// See LICENSE file in the project root for full license information. +// + +#include "armnn/ArmNN.hpp" +#include "armnn/Descriptors.hpp" +#include "Graph.hpp" +#include "armnn/IRuntime.hpp" +#include "armnn/INetwork.hpp" +#include "Optimizer.hpp" +#include "backends/CpuTensorHandle.hpp" +#include "backends/test/QuantizeHelper.hpp" + +#include <boost/core/ignore_unused.hpp> +#include <boost/test/unit_test.hpp> + +#include <Half.hpp> +#include <set> + +using namespace armnn; + +BOOST_AUTO_TEST_SUITE(Fp16Support) + +BOOST_AUTO_TEST_CASE(Fp16DataTypeSupport) +{ + Graph graph; + + Layer* const inputLayer1 = graph.AddLayer<InputLayer>(1, "input1"); + Layer* const inputLayer2 = graph.AddLayer<InputLayer>(2, "input2"); + + Layer* const additionLayer = graph.AddLayer<AdditionLayer>("addition"); + Layer* const outputLayer = graph.AddLayer<armnn::OutputLayer>(0, "output"); + + TensorInfo fp16TensorInfo({1, 2, 3, 5}, armnn::DataType::Float16); + inputLayer1->GetOutputSlot(0).Connect(additionLayer->GetInputSlot(0)); + inputLayer2->GetOutputSlot(0).Connect(additionLayer->GetInputSlot(1)); + additionLayer->GetOutputSlot(0).Connect(outputLayer->GetInputSlot(0)); + + inputLayer1->GetOutputSlot().SetTensorInfo(fp16TensorInfo); + inputLayer2->GetOutputSlot().SetTensorInfo(fp16TensorInfo); + additionLayer->GetOutputSlot().SetTensorInfo(fp16TensorInfo); + + BOOST_CHECK(inputLayer1->GetOutputSlot(0).GetTensorInfo().GetDataType() == armnn::DataType::Float16); + BOOST_CHECK(inputLayer2->GetOutputSlot(0).GetTensorInfo().GetDataType() == armnn::DataType::Float16); + BOOST_CHECK(additionLayer->GetOutputSlot(0).GetTensorInfo().GetDataType() == armnn::DataType::Float16); + +} + +BOOST_AUTO_TEST_CASE(Fp16AdditionTest) +{ + using namespace half_float::literal; + // Create runtime in which test will run + IRuntime::CreationOptions options; + IRuntimePtr runtime(IRuntime::Create(options)); + + // Builds up the structure of the network. + INetworkPtr net(INetwork::Create()); + + + IConnectableLayer* inputLayer1 = net->AddInputLayer(0); + IConnectableLayer* inputLayer2 = net->AddInputLayer(1); + IConnectableLayer* additionLayer = net->AddAdditionLayer(); + IConnectableLayer* outputLayer = net->AddOutputLayer(0); + + inputLayer1->GetOutputSlot(0).Connect(additionLayer->GetInputSlot(0)); + inputLayer2->GetOutputSlot(0).Connect(additionLayer->GetInputSlot(1)); + additionLayer->GetOutputSlot(0).Connect(outputLayer->GetInputSlot(0)); + + //change to float16 + TensorInfo fp16TensorInfo(TensorShape({4}), DataType::Float16); + inputLayer1->GetOutputSlot(0).SetTensorInfo(fp16TensorInfo); + inputLayer2->GetOutputSlot(0).SetTensorInfo(fp16TensorInfo); + additionLayer->GetOutputSlot(0).SetTensorInfo(fp16TensorInfo); + + // optimize the network + std::vector<Compute> backends = {Compute::GpuAcc}; + IOptimizedNetworkPtr optNet = Optimize(*net, backends, runtime->GetDeviceSpec()); + + // Loads it into the runtime. + + NetworkId netId; + runtime->LoadNetwork(netId, std::move(optNet)); + + std::vector<Half> input1Data + { + 1.0_h, 2.0_h, 3.0_h, 4.0_h + }; + + std::vector<Half> input2Data + { + 100.0_h, 200.0_h, 300.0_h, 400.0_h + }; + + InputTensors inputTensors + { + {0,ConstTensor(runtime->GetInputTensorInfo(netId, 0), input1Data.data())}, + {1,ConstTensor(runtime->GetInputTensorInfo(netId, 0), input2Data.data())} + }; + + std::vector<Half> outputData(input1Data.size()); + OutputTensors outputTensors + { + {0,Tensor(runtime->GetOutputTensorInfo(netId, 0), outputData.data())} + }; + + // Does the inference. + runtime->EnqueueWorkload(netId, inputTensors, outputTensors); + + // Checks the results. + BOOST_TEST(outputData == std::vector<Half>({ 101.0_h, 202.0_h, 303.0_h, 404.0_h})); // Add +} + +BOOST_AUTO_TEST_SUITE_END()
\ No newline at end of file diff --git a/src/armnn/test/FloatingPointConverterTest.cpp b/src/armnn/test/FloatingPointConverterTest.cpp new file mode 100644 index 0000000000..d936e801ef --- /dev/null +++ b/src/armnn/test/FloatingPointConverterTest.cpp @@ -0,0 +1,58 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// See LICENSE file in the project root for full license information. +// + +#include "FloatingPointConverter.hpp" +#include "Half.hpp" + +#include <malloc.h> +#include <iostream> +#include <algorithm> + +#include <boost/test/unit_test.hpp> + +BOOST_AUTO_TEST_SUITE(TestFPConversion) + +BOOST_AUTO_TEST_CASE(TestConvertFp32ToFp16) +{ + using namespace half_float::literal; + + float floatArray[] = { 1.0f, 2.0f, 0.5f, 3.1f, 2.4f, + 5.666f, 6.444f, 7.1f, 432.121f, 12.22f }; + size_t numFloats = sizeof(floatArray) / sizeof(floatArray[0]); + std::vector<armnn::Half> convertedBuffer(numFloats, 0.0_h); + + armnnUtils::FloatingPointConverter::ConvertFloat32To16(floatArray, numFloats, convertedBuffer.data()); + + for (size_t i = 0; i < numFloats; i++) + { + armnn::Half expected(floatArray[i]); + armnn::Half actual = convertedBuffer[i]; + BOOST_CHECK_EQUAL(expected, actual); + + float convertedHalf = actual; + BOOST_CHECK_CLOSE(floatArray[i], convertedHalf, 0.07); + } +} + +BOOST_AUTO_TEST_CASE(TestConvertFp16ToFp32) +{ + using namespace half_float::literal; + + armnn::Half halfArray[] = { 1.0_h, 2.0_h, 0.5_h, 3.1_h, 2.4_h, + 5.666_h, 6.444_h, 7.1_h, 432.121_h, 12.22_h }; + size_t numFloats = sizeof(halfArray) / sizeof(halfArray[0]); + std::vector<float> convertedBuffer(numFloats, 0.0f); + + armnnUtils::FloatingPointConverter::ConvertFloat16To32(halfArray, numFloats, convertedBuffer.data()); + + for (size_t i = 0; i < numFloats; i++) + { + float expected(halfArray[i]); + float actual = convertedBuffer[i]; + BOOST_CHECK_EQUAL(expected, actual); + } +} + +BOOST_AUTO_TEST_SUITE_END() diff --git a/src/armnn/test/GraphTests.cpp b/src/armnn/test/GraphTests.cpp index 99789e4737..ccbcb8b00b 100644 --- a/src/armnn/test/GraphTests.cpp +++ b/src/armnn/test/GraphTests.cpp @@ -15,7 +15,7 @@ #include <boost/cast.hpp> -/// checks that first comes before second in the order +/// Checks that first comes before second in the order. bool CheckOrder(const armnn::Graph& graph, const armnn::Layer* first, const armnn::Layer* second) { graph.Print(); @@ -69,7 +69,7 @@ BOOST_AUTO_TEST_CASE(TopologicalSort) armnn::Layer* const layerE = GetFirstLayerWithName(graph, "layerE"); armnn::Layer* const layerD = GetFirstLayerWithName(graph, "layerD"); - // simple graph which branches and rejoins + // Simple graph which branches and rejoins. // A // / \' // D E @@ -92,7 +92,7 @@ BOOST_AUTO_TEST_CASE(TopologicalSort) BOOST_TEST(CheckOrder(graph, layerB, layerC)); } -BOOST_AUTO_TEST_CASE(InsertNewLayer) +BOOST_AUTO_TEST_CASE(InsertNewLayerBefore) { armnn::Graph graph; armnn::TensorInfo tensorInfo({ 1, 1, 1, 1 }, armnn::DataType::Float32); @@ -128,7 +128,7 @@ BOOST_AUTO_TEST_CASE(InsertNewLayer) layerC->GetOutputSlot(0).Connect(layerD->GetInputSlot(1)); layerD->GetOutputSlot(0).Connect(layerO->GetInputSlot(0)); - // check order is valid + // Checks order is valid. BOOST_TEST(CheckOrder(graph, layerA, layerB)); BOOST_TEST(CheckOrder(graph, layerA, layerC)); BOOST_TEST(CheckOrder(graph, layerB, layerD)); @@ -147,7 +147,7 @@ BOOST_AUTO_TEST_CASE(InsertNewLayer) armnn::Layer* const layerE = GetFirstLayerWithName(graph, "layerE"); - // check order is valid + // Checks order is valid. BOOST_TEST(CheckOrder(graph, layerA, layerB)); BOOST_TEST(CheckOrder(graph, layerA, layerC)); BOOST_TEST(CheckOrder(graph, layerB, layerD)); @@ -169,7 +169,7 @@ BOOST_AUTO_TEST_CASE(InsertNewLayer) armnn::Layer* const layerF = GetFirstLayerWithName(graph, "layerF"); - // check order is valid + // Checks order is valid. BOOST_TEST(CheckOrder(graph, layerA, layerB)); BOOST_TEST(CheckOrder(graph, layerA, layerF)); BOOST_TEST(CheckOrder(graph, layerF, layerC)); @@ -178,6 +178,93 @@ BOOST_AUTO_TEST_CASE(InsertNewLayer) BOOST_TEST(CheckOrder(graph, layerE, layerD)); } +BOOST_AUTO_TEST_CASE(InsertNewLayerAfter) +{ + armnn::Graph graph; + armnn::TensorInfo tensorInfo({ 1, 1, 1, 1 }, armnn::DataType::Float32); + + std::vector<armnn::Layer*> order; + + armnn::ActivationDescriptor activationDefaults; + BOOST_CHECK_NO_THROW(graph.AddLayer<armnn::InputLayer>(0, "layerA")); + BOOST_CHECK_NO_THROW(graph.AddLayer<armnn::ActivationLayer>(activationDefaults, "layerB")); + BOOST_CHECK_NO_THROW(graph.AddLayer<armnn::ActivationLayer>(activationDefaults, "layerC")); + BOOST_CHECK_NO_THROW(graph.AddLayer<armnn::AdditionLayer>("layerD")); + BOOST_CHECK_NO_THROW(graph.AddLayer<armnn::OutputLayer>(0, "output")); + + armnn::Layer* const layerA = GetFirstLayerWithName(graph, "layerA"); + armnn::Layer* const layerB = GetFirstLayerWithName(graph, "layerB"); + armnn::Layer* const layerC = GetFirstLayerWithName(graph, "layerC"); + armnn::Layer* const layerD = GetFirstLayerWithName(graph, "layerD"); + armnn::Layer* const layerO = GetFirstLayerWithName(graph, "output"); + + // A + // / \' + // B C + // \ / + // D + layerA->GetOutputSlot(0).SetTensorInfo(tensorInfo); + layerB->GetOutputSlot(0).SetTensorInfo(tensorInfo); + layerC->GetOutputSlot(0).SetTensorInfo(tensorInfo); + layerD->GetOutputSlot(0).SetTensorInfo(tensorInfo); + + layerA->GetOutputSlot(0).Connect(layerB->GetInputSlot(0)); + layerA->GetOutputSlot(0).Connect(layerC->GetInputSlot(0)); + layerB->GetOutputSlot(0).Connect(layerD->GetInputSlot(0)); + layerC->GetOutputSlot(0).Connect(layerD->GetInputSlot(1)); + layerD->GetOutputSlot(0).Connect(layerO->GetInputSlot(0)); + + // Checks order is valid. + BOOST_TEST(CheckOrder(graph, layerA, layerB)); + BOOST_TEST(CheckOrder(graph, layerA, layerC)); + BOOST_TEST(CheckOrder(graph, layerB, layerD)); + BOOST_TEST(CheckOrder(graph, layerC, layerD)); + + // A + // / \' + // B C + // \ | + // \ E + // \| + // D + BOOST_CHECK_NO_THROW(graph.InsertNewLayer<armnn::ActivationLayer>(layerC->GetOutputSlot(), + activationDefaults, + "layerE")); + + armnn::Layer* const layerE = GetFirstLayerWithName(graph, "layerE"); + + // Checks order is valid. + BOOST_TEST(CheckOrder(graph, layerA, layerB)); + BOOST_TEST(CheckOrder(graph, layerA, layerC)); + BOOST_TEST(CheckOrder(graph, layerB, layerD)); + BOOST_TEST(CheckOrder(graph, layerC, layerE)); + BOOST_TEST(CheckOrder(graph, layerE, layerD)); + + + // A + // | + // F + // / \' + // B C + // \ | + // \ E + // \ / + // D + BOOST_CHECK_NO_THROW(graph.InsertNewLayer<armnn::ActivationLayer>(layerA->GetOutputSlot(), + activationDefaults, + "layerF")); + + armnn::Layer* const layerF = GetFirstLayerWithName(graph, "layerF"); + + // Checks order is valid. + BOOST_TEST(CheckOrder(graph, layerA, layerF)); + BOOST_TEST(CheckOrder(graph, layerF, layerB)); + BOOST_TEST(CheckOrder(graph, layerF, layerC)); + BOOST_TEST(CheckOrder(graph, layerB, layerD)); + BOOST_TEST(CheckOrder(graph, layerC, layerE)); + BOOST_TEST(CheckOrder(graph, layerE, layerD)); +} + namespace { using Edge = std::pair<const armnn::Layer*, const armnn::Layer*>; @@ -210,7 +297,7 @@ static void TestGraphAfterAddingCopyLayers(const armnn::Graph& graph, const armn std::vector<Edge> origEdges = GetEdgeList(origGraph); std::vector<Edge> newEdges = GetEdgeList(graph); - // Adding copy layers should not produce any duplicate edges + // Adding copy layers should not produce any duplicate edges. { std::vector<Edge> sortedNewEdges = newEdges; std::sort(sortedNewEdges.begin(), sortedNewEdges.end()); @@ -219,7 +306,7 @@ static void TestGraphAfterAddingCopyLayers(const armnn::Graph& graph, const armn BOOST_CHECK_MESSAGE(last == sortedNewEdges.end(), "New graph contains duplicate edges!"); } - // Each new edge must be tested + // Each new edge must be tested. while (!newEdges.empty()) { const Edge edge = std::move(newEdges.back()); @@ -251,7 +338,7 @@ static void TestGraphAfterAddingCopyLayers(const armnn::Graph& graph, const armn BOOST_TEST((srcLayer->GetComputeDevice() == dstLayer->GetComputeDevice())); } - // Mark edge in original graph as observed (by deleting it) + // Marks edge in original graph as observed (by deleting it). origEdges.erase(origEdges.begin() + originalEdge); } else @@ -288,7 +375,7 @@ static void TestGraphAfterAddingCopyLayers(const armnn::Graph& graph, const armn const armnn::Layer* copyLayer = srcLayerInOrigGraph ? edge.second : edge.first; const armnn::Layer* nonCopyLayer = srcLayerInOrigGraph ? srcLayer : dstLayer; - // Find all edges connecting the copy layer to other layers + // Finds all edges connecting the copy layer to other layers. std::vector<Edge> adjEdges; auto it = newEdges.begin(); while (it != newEdges.end()) @@ -298,7 +385,7 @@ static void TestGraphAfterAddingCopyLayers(const armnn::Graph& graph, const armn { adjEdges.push_back(newEdge); - // Since the adjacent edge is immediately tested below, no need to consider it afterwards + // Since the adjacent edge is immediately tested below, there is no need to consider it afterwards. it = newEdges.erase(it); } else @@ -315,10 +402,10 @@ static void TestGraphAfterAddingCopyLayers(const armnn::Graph& graph, const armn continue; } - // Test adjacent edges now + // Tests adjacent edges now. for (const Edge& adjEdge : adjEdges) { - // The adjacent edge must connect the copy layer to another layer + // The adjacent edge must connect the copy layer to another layer. const armnn::Layer* adjLayer = srcLayerInOrigGraph ? adjEdge.second : adjEdge.first; if (!adjLayer) @@ -329,10 +416,10 @@ static void TestGraphAfterAddingCopyLayers(const armnn::Graph& graph, const armn continue; } - // Both layers must have different compute devices + // Both layers must have different compute devices. BOOST_TEST((nonCopyLayer->GetComputeDevice() != adjLayer->GetComputeDevice())); - // There must exist an edge connecting both layers directly in the original graph + // There must exist an edge connecting both layers directly in the original graph. { const armnn::Layer* origEdgeN1 = srcLayerInOrigGraph ? nonCopyLayer : adjLayer; const armnn::Layer* origEdgeN2 = srcLayerInOrigGraph ? adjLayer : nonCopyLayer; @@ -434,7 +521,7 @@ BOOST_FIXTURE_TEST_CASE(AddCopyLayersSeveralTimes, CopyLayersFixture) { m_Graph.AddCopyLayers(); - // Calling AddCopyLayers() several times should not change the connections + // Calling AddCopyLayers() several times should not change the connections. const std::vector<Edge> edges = GetEdgeList(m_Graph); for (int i = 0; i < 4; ++i) { diff --git a/src/armnn/test/InstrumentTests.cpp b/src/armnn/test/InstrumentTests.cpp new file mode 100644 index 0000000000..a219b39b0d --- /dev/null +++ b/src/armnn/test/InstrumentTests.cpp @@ -0,0 +1,62 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// See LICENSE file in the project root for full license information. +// +#include <boost/test/unit_test.hpp> + +#include "WallClockTimer.hpp" + +#include <chrono> +#include <thread> + +using namespace armnn; + +BOOST_AUTO_TEST_SUITE(Instruments) + +BOOST_AUTO_TEST_CASE(WallClockTimerInMilliseconds) +{ + WallClockTimer wallClockTimer; + + BOOST_CHECK_EQUAL(wallClockTimer.GetName(), "WallClockTimer"); + + // start the timer + wallClockTimer.Start(); + + // wait for 10 milliseconds + std::this_thread::sleep_for(std::chrono::milliseconds(10)); + + // stop the timer + wallClockTimer.Stop(); + + BOOST_CHECK_EQUAL(wallClockTimer.GetMeasurements().front().m_Name, WallClockTimer::WALL_CLOCK_TIME); + + // check that WallClockTimer measurement should be >= 10 milliseconds + BOOST_CHECK_GE(wallClockTimer.GetMeasurements().front().m_Value, std::chrono::milliseconds(10).count()); +} + +BOOST_AUTO_TEST_CASE(WallClockTimerInNanoseconds) +{ + WallClockTimer wallClockTimer; + + BOOST_CHECK_EQUAL(wallClockTimer.GetName(), "WallClockTimer"); + + // start the timer + wallClockTimer.Start(); + + // wait for 500 nanoseconds - 0.0005 milliseconds + std::this_thread::sleep_for(std::chrono::nanoseconds(500)); + + // stop the timer + wallClockTimer.Stop(); + + BOOST_CHECK_EQUAL(wallClockTimer.GetMeasurements().front().m_Name, WallClockTimer::WALL_CLOCK_TIME); + + // delta is 0.0005 milliseconds + const auto delta = + std::chrono::duration_cast<std::chrono::duration<double, std::milli>>(std::chrono::nanoseconds(500)); + + // check that WallClockTimer measurement should be >= 0.0005 milliseconds + BOOST_CHECK_GE(wallClockTimer.GetMeasurements().front().m_Value, delta.count()); +} + +BOOST_AUTO_TEST_SUITE_END() diff --git a/src/armnn/test/JsonPrinterTests.cpp b/src/armnn/test/JsonPrinterTests.cpp new file mode 100644 index 0000000000..28cbfd61a5 --- /dev/null +++ b/src/armnn/test/JsonPrinterTests.cpp @@ -0,0 +1,378 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// See LICENSE file in the project root for full license information. +// +#include <boost/test/unit_test.hpp> +#include <boost/algorithm/string.hpp> +#include <boost/lexical_cast.hpp> +#include <stack> +#include <string> +#include <vector> +#include <sstream> + +#include "Profiling.hpp" +#include "armnn/Descriptors.hpp" +#include "armnn/IRuntime.hpp" +#include "armnn/INetwork.hpp" +#include "backends/test/ClContextControlFixture.hpp" +#include "backends/ClWorkloadFactory.hpp" + +BOOST_FIXTURE_TEST_SUITE(JsonPrinterTests, ClProfilingContextControlFixture) + +bool AreMatchingPair(const char opening, const char closing) +{ + return (opening == '{' && closing == '}') || (opening == '[' && closing == ']'); +} + +bool AreParenthesesMatching(const std::string& exp) +{ + std::stack<char> expStack; + for (size_t i = 0; i < exp.length(); ++i) + { + if (exp[i] == '{' || exp[i] == '[') + { + expStack.push(exp[i]); + } + else if (exp[i] == '}' || exp[i] == ']') + { + if (expStack.empty() || !AreMatchingPair(expStack.top(), exp[i])) + { + return false; + } + else + { + expStack.pop(); + } + } + } + return expStack.empty(); +} + +std::vector<double> ExtractMeasurements(const std::string& exp) +{ + std::vector<double> numbers; + bool inArray = false; + std::string numberString; + for (size_t i = 0; i < exp.size(); ++i) + { + if (exp[i] == '[') + { + inArray = true; + } + else if (exp[i] == ']' && inArray) + { + try + { + boost::trim_if(numberString, boost::is_any_of("\t,\n")); + numbers.push_back(std::stod(numberString)); + } + catch (std::invalid_argument const& e) + { + BOOST_FAIL("Could not convert measurements to double: " + numberString); + } + + numberString.clear(); + inArray = false; + } + else if (exp[i] == ',' && inArray) + { + try + { + boost::trim_if(numberString, boost::is_any_of("\t,\n")); + numbers.push_back(std::stod(numberString)); + } + catch (std::invalid_argument const& e) + { + BOOST_FAIL("Could not convert measurements to double: " + numberString); + } + numberString.clear(); + } + else if (exp[i] != '[' && inArray && exp[i] != ',' && exp[i] != ' ') + { + numberString += exp[i]; + } + } + return numbers; +} + +std::vector<std::string> ExtractSections(const std::string& exp) +{ + std::vector<std::string> sections; + + std::stack<size_t> s; + for (size_t i = 0; i < exp.size(); i++) + { + if (exp.at(i) == '{') + { + s.push(i); + } + else if (exp.at(i) == '}') + { + size_t from = s.top(); + s.pop(); + sections.push_back(exp.substr(from, i - from + 1)); + } + } + + return sections; +} + +std::string SoftmaxProfilerTestSetupHelper(const std::vector<armnn::Compute>& backends) +{ + using namespace armnn; + + BOOST_CHECK(!backends.empty()); + + ProfilerManager& profilerManager = armnn::ProfilerManager::GetInstance(); + + // Create runtime in which test will run + IRuntime::CreationOptions options; + options.m_EnableGpuProfiling = backends.front() == armnn::Compute::GpuAcc; + IRuntimePtr runtime(IRuntime::Create(options)); + + // build up the structure of the network + INetworkPtr net(INetwork::Create()); + + IConnectableLayer* input = net->AddInputLayer(0, "input"); + IConnectableLayer* softmax = net->AddSoftmaxLayer(SoftmaxDescriptor(), "softmax"); + IConnectableLayer* output = net->AddOutputLayer(0, "output"); + + input->GetOutputSlot(0).Connect(softmax->GetInputSlot(0)); + softmax->GetOutputSlot(0).Connect(output->GetInputSlot(0)); + + // set the tensors in the network + TensorInfo inputTensorInfo(TensorShape({1, 5}), DataType::QuantisedAsymm8); + inputTensorInfo.SetQuantizationOffset(100); + inputTensorInfo.SetQuantizationScale(10000.0f); + input->GetOutputSlot(0).SetTensorInfo(inputTensorInfo); + + TensorInfo outputTensorInfo(TensorShape({1, 5}), DataType::QuantisedAsymm8); + outputTensorInfo.SetQuantizationOffset(0); + outputTensorInfo.SetQuantizationScale(1.0f / 256.0f); + softmax->GetOutputSlot(0).SetTensorInfo(outputTensorInfo); + + // optimize the network + IOptimizedNetworkPtr optNet = Optimize(*net, backends, runtime->GetDeviceSpec()); + if(!optNet) + { + BOOST_FAIL("Error occurred during Optimization, Optimize() returned nullptr."); + } + // load it into the runtime + NetworkId netId; + auto error = runtime->LoadNetwork(netId, std::move(optNet)); + BOOST_TEST(error == Status::Success); + + // create structures for input & output + std::vector<uint8_t> inputData + { + 1, 10, 3, 200, 5 + // one of inputs is sufficiently larger than the others to saturate softmax + }; + std::vector<uint8_t> outputData(5); + + armnn::InputTensors inputTensors + { + {0, armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 0), inputData.data())} + }; + armnn::OutputTensors outputTensors + { + {0, armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), outputData.data())} + }; + + runtime->GetProfiler(netId)->EnableProfiling(true); + + // do the inferences + runtime->EnqueueWorkload(netId, inputTensors, outputTensors); + runtime->EnqueueWorkload(netId, inputTensors, outputTensors); + runtime->EnqueueWorkload(netId, inputTensors, outputTensors); + + // retrieve the Profiler.Print() output + std::stringstream ss; + profilerManager.GetProfiler()->Print(ss); + + return ss.str(); +} + +void SoftmaxProfilerTestValidationHelper(std::string& result, const std::string& testData) +{ + // ensure all measurements are greater than zero + std::vector<double> measurementsVector = ExtractMeasurements(result); + BOOST_CHECK(!measurementsVector.empty()); + + // check sections contain raw and unit tags + // first ensure Parenthesis are balanced + if (AreParenthesesMatching(result)) + { + // remove parent sections that will not have raw or unit tag + std::vector<std::string> sectionVector = ExtractSections(result); + for (size_t i = 0; i < sectionVector.size(); ++i) + { + if (boost::contains(sectionVector[i], "\"ArmNN\":") + || boost::contains(sectionVector[i], "\"inference_measurements\":")) + { + sectionVector.erase(sectionVector.begin() + static_cast<int>(i)); + } + } + BOOST_CHECK(!sectionVector.empty()); + + BOOST_CHECK(std::all_of(sectionVector.begin(), sectionVector.end(), + [](std::string i) { return boost::contains(i, "\"raw\":"); })); + + BOOST_CHECK(std::all_of(sectionVector.begin(), sectionVector.end(), + [](std::string i) { return boost::contains(i, "\"unit\":"); })); + } + + // remove the time measurements as they vary from test to test + result.erase(std::remove_if (result.begin(),result.end(), + [](char c) { return c == '.'; }), result.end()); + result.erase(std::remove_if (result.begin(), result.end(), &isdigit), result.end()); + result.erase(std::remove_if (result.begin(),result.end(), + [](char c) { return c == '\t'; }), result.end()); + + BOOST_CHECK(boost::contains(result, "ArmNN")); + BOOST_CHECK(boost::contains(result, "inference_measurements")); + BOOST_CHECK(boost::contains(result, "layer_measurements")); + BOOST_CHECK_EQUAL(result, testData); + + // ensure no spare parenthesis present in print output + BOOST_CHECK(AreParenthesesMatching(result)); +} + +void SetupSoftmaxProfilerWithSpecifiedBackendsAndValidateJSONPrinterResult( + const std::vector<armnn::Compute>& backends) +{ + // setup the test fixture and obtain JSON Printer result + std::string result = SoftmaxProfilerTestSetupHelper(backends); + + std::string backend = "Ref"; + std::string changeLine31 = "\n},\n\"CopyMemGeneric_Execute\": {"; + std::string changeLine39 = "ms\""; + std::string changeLine40; + std::string changeLine45; + + switch(backends[0]) { + case armnn::Compute::GpuAcc: backend = "Cl"; + changeLine31 = ",\n\"OpenClKernelTimer/: softmax_layer_max_shift_exp_sum_quantized_serial GWS[,,]\": {"; + changeLine39 = R"(us" +}, +"OpenClKernelTimer/: softmax_layer_norm_quantized GWS[,,]": { +"raw": [ +, +, + +], +"unit": "us")"; + + changeLine40 = R"( +}, +"CopyMemGeneric_Execute": { +"raw": [ +, +, + +], +"unit": "ms")"; + changeLine45 = "}\n"; + break; + case armnn::Compute::CpuAcc: backend = "Neon"; + changeLine31 = ",\n\"NeonKernelTimer/: NEFillBorderKernel\": {"; + changeLine39 = R"(ms" +}, +"NeonKernelTimer/: NELogitsDMaxKernel": { +"raw": [ +, +, + +], +"unit": "ms" +}, +"NeonKernelTimer/: NELogitsDSoftmaxKernel": { +"raw": [ +, +, + +], +"unit": "ms")"; + changeLine40 = R"( +}, +"CopyMemGeneric_Execute": { +"raw": [ +, +, + +], +"unit": "ms")"; + changeLine45 = "}\n"; + break; + default: + break; + } + std::string testData = R"({ +"ArmNN": { +"inference_measurements": { +"raw": [ +, +, + +], +"unit": "ms", +"layer_measurements": { +"raw": [ +, +, + +], +"unit": "ms", +"CopyMemGeneric_Execute": { +"raw": [ +, +, + +], +"unit": "ms" +}, +")" + backend + R"(SoftmaxUintWorkload_Execute": { +"raw": [ +, +, + +], +"unit": "ms")" + changeLine31 + R"( +"raw": [ +, +, + +], +"unit": ")" + changeLine39 + R"( +})" + changeLine40 + R"( +} +} +} +} +)" + changeLine45 + R"()"; + + // validate the JSON Printer result + SoftmaxProfilerTestValidationHelper(result, testData); +} + +BOOST_AUTO_TEST_CASE(SoftmaxProfilerJSONPrinterCpuRefTest) +{ + SetupSoftmaxProfilerWithSpecifiedBackendsAndValidateJSONPrinterResult({armnn::Compute::CpuRef}); +} + + +#if ARMCOMPUTENEON_ENABLED +BOOST_AUTO_TEST_CASE(SoftmaxProfilerJSONPrinterCpuAccTest) +{ + SetupSoftmaxProfilerWithSpecifiedBackendsAndValidateJSONPrinterResult({armnn::Compute::CpuAcc}); +} +#endif + +#if ARMCOMPUTECL_ENABLED +BOOST_AUTO_TEST_CASE(SoftmaxProfilerJSONPrinterGpuAccTest) +{ + SetupSoftmaxProfilerWithSpecifiedBackendsAndValidateJSONPrinterResult({armnn::Compute::GpuAcc}); +} +#endif + +BOOST_AUTO_TEST_SUITE_END() diff --git a/src/armnn/test/NeonTimerTest.cpp b/src/armnn/test/NeonTimerTest.cpp new file mode 100644 index 0000000000..4502756e07 --- /dev/null +++ b/src/armnn/test/NeonTimerTest.cpp @@ -0,0 +1,104 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// See LICENSE file in the project root for full license information. +// + +#include "NeonTimer.hpp" +#include "TensorHelpers.hpp" + +#include "armnn/ArmNN.hpp" +#include "armnn/Tensor.hpp" +#include "armnn/TypesUtils.hpp" +#include "backends/CpuTensorHandle.hpp" +#include "backends/NeonWorkloadFactory.hpp" +#include "backends/WorkloadInfo.hpp" +#include "backends/WorkloadFactory.hpp" +#include "backends/test/LayerTests.hpp" +#include "backends/test/TensorCopyUtils.hpp" +#include "backends/test/WorkloadTestUtils.hpp" + +#include <boost/test/unit_test.hpp> +#include <cstdlib> +#include <algorithm> + +using namespace armnn; + +BOOST_AUTO_TEST_SUITE(NeonTimerInstrument) + + +BOOST_AUTO_TEST_CASE(NeonTimerGetName) +{ + NeonTimer neonTimer; + BOOST_CHECK_EQUAL(neonTimer.GetName(), "NeonKernelTimer"); +} + +BOOST_AUTO_TEST_CASE(NeonTimerMeasure) +{ + NeonWorkloadFactory workloadFactory; + + unsigned int inputWidth = 4000u; + unsigned int inputHeight = 5000u; + unsigned int inputChannels = 1u; + unsigned int inputBatchSize = 1u; + + float upperBound = 1.0f; + float lowerBound = -1.0f; + + size_t inputSize = inputWidth * inputHeight * inputChannels * inputBatchSize; + std::vector<float> inputData(inputSize, 0.f); + std::generate(inputData.begin(), inputData.end(), [](){ + return (static_cast<float>(rand()) / static_cast<float>(RAND_MAX / 3)) + 1.f; }); + + unsigned int outputWidth = inputWidth; + unsigned int outputHeight = inputHeight; + unsigned int outputChannels = inputChannels; + unsigned int outputBatchSize = inputBatchSize; + + armnn::TensorInfo inputTensorInfo({ inputBatchSize, inputChannels, inputHeight, inputWidth }, + armnn::GetDataType<float>()); + + armnn::TensorInfo outputTensorInfo({ outputBatchSize, outputChannels, outputHeight, outputWidth }, + armnn::GetDataType<float>()); + + LayerTestResult<float, 4> result(inputTensorInfo); + + auto input = MakeTensor<float, 4>(inputTensorInfo, inputData); + + std::unique_ptr<armnn::ITensorHandle> inputHandle = workloadFactory.CreateTensorHandle(inputTensorInfo); + std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo); + + // Setup bounded ReLu + armnn::ActivationQueueDescriptor descriptor; + armnn::WorkloadInfo workloadInfo; + AddInputToWorkload(descriptor, workloadInfo, inputTensorInfo, inputHandle.get()); + AddOutputToWorkload(descriptor, workloadInfo, outputTensorInfo, outputHandle.get()); + + descriptor.m_Parameters.m_Function = armnn::ActivationFunction::BoundedReLu; + descriptor.m_Parameters.m_A = upperBound; + descriptor.m_Parameters.m_B = lowerBound; + + std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateActivation(descriptor, workloadInfo); + + inputHandle->Allocate(); + outputHandle->Allocate(); + + CopyDataToITensorHandle(inputHandle.get(), &input[0][0][0][0]); + + NeonTimer neonTimer; + // Start the timer. + neonTimer.Start(); + // Execute the workload. + workload->Execute(); + // Stop the timer. + neonTimer.Stop(); + + std::vector<Measurement> measurements = neonTimer.GetMeasurements(); + + BOOST_CHECK_EQUAL(measurements.size(), 2); + BOOST_CHECK_EQUAL(measurements[0].m_Name, "NeonKernelTimer/0: NEFillBorderKernel"); + BOOST_CHECK(measurements[0].m_Value > 0.0); + BOOST_CHECK_EQUAL(measurements[1].m_Name, "NeonKernelTimer/1: NEActivationLayerKernel"); + BOOST_CHECK(measurements[1].m_Value > 0.0); +} + +BOOST_AUTO_TEST_SUITE_END() diff --git a/src/armnn/test/NetworkTests.cpp b/src/armnn/test/NetworkTests.cpp new file mode 100644 index 0000000000..66fa327221 --- /dev/null +++ b/src/armnn/test/NetworkTests.cpp @@ -0,0 +1,968 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// See LICENSE file in the project root for full license information. +// +#include <boost/test/unit_test.hpp> + +#include "armnn/ArmNN.hpp" +#include "Network.hpp" +#include "Graph.hpp" +#include "backends/RefWorkloadFactory.hpp" +#include "backends/ClWorkloadFactory.hpp" +#include "backends/NeonWorkloadFactory.hpp" + +#include "GraphUtils.hpp" + +namespace +{ + +bool AreAllLayerInputSlotsConnected(const armnn::IConnectableLayer& layer) +{ + bool allConnected = true; + for (unsigned int i = 0; i < layer.GetNumInputSlots(); ++i) + { + const bool inputConnected = layer.GetInputSlot(i).GetConnection() != nullptr; + allConnected &= inputConnected; + } + return allConnected; +} + +} + +BOOST_AUTO_TEST_SUITE(Network) + +BOOST_AUTO_TEST_CASE(LayerGuids) +{ + armnn::Network net; + armnn::LayerGuid inputId = net.AddInputLayer(0)->GetGuid(); + armnn::LayerGuid addId = net.AddAdditionLayer()->GetGuid(); + armnn::LayerGuid outputId = net.AddOutputLayer(0)->GetGuid(); + + BOOST_TEST(inputId != addId); + BOOST_TEST(addId != outputId); + BOOST_TEST(inputId != outputId); +} + +BOOST_AUTO_TEST_CASE(SerializeToDot) +{ + armnn::Network net; + + //Defines layers. + auto input = net.AddInputLayer(0); + auto add = net.AddAdditionLayer(); + auto output = net.AddOutputLayer(0); + + // Connects layers. + input->GetOutputSlot(0).Connect(add->GetInputSlot(0)); + input->GetOutputSlot(0).Connect(add->GetInputSlot(1)); + add->GetOutputSlot(0).Connect(output->GetInputSlot(0)); + + armnn::TensorShape shape({4}); + armnn::TensorInfo info(shape, armnn::DataType::Float32); + input->GetOutputSlot(0).SetTensorInfo(info); + add->GetOutputSlot(0).SetTensorInfo(info); + + armnn::IRuntime::CreationOptions options; + armnn::IRuntimePtr runtime(armnn::IRuntime::Create(options)); + + std::vector<armnn::Compute> backends = {armnn::Compute::CpuRef}; + armnn::IOptimizedNetworkPtr optimizedNet = armnn::Optimize(net, backends, runtime->GetDeviceSpec()); + + std::ostringstream ss; + optimizedNet->SerializeToDot(ss); + + auto inputId = input->GetGuid(); + auto addId = add->GetGuid(); + auto outputId = output->GetGuid(); + + std::stringstream expected; + expected << + "digraph Optimized {\n" + " node [shape=\"record\"];\n" + " edge [fontsize=8 fontcolor=\"blue\" fontname=\"arial-bold\"];\n" + " " << inputId << " [label=\"{Input}\"];\n" + " " << addId << " [label=\"{Addition}\"];\n" + " " << outputId << " [label=\"{Output}\"];\n" + " " << inputId << " -> " << addId << " [label=< [4] >];\n" + " " << inputId << " -> " << addId << " [label=< [4] >];\n" + " " << addId << " -> " << outputId << " [label=< [4] >];\n" + "}\n"; + + BOOST_TEST(ss.str() == expected.str()); +} + +BOOST_AUTO_TEST_CASE(NetworkBasic) +{ + armnn::Network net; + BOOST_TEST(net.PrintGraph() == armnn::Status::Success); +} + +BOOST_AUTO_TEST_CASE(LayerNamesAreOptionalForINetwork) +{ + armnn::Network net; + armnn::INetwork& inet = net; + inet.AddInputLayer(0); + inet.AddAdditionLayer(); + inet.AddActivationLayer(armnn::ActivationDescriptor()); + inet.AddOutputLayer(0); +} + +BOOST_AUTO_TEST_CASE(LayerNamesAreOptionalForNetwork) +{ + armnn::Network net; + net.AddInputLayer(0); + net.AddAdditionLayer(); + net.AddActivationLayer(armnn::ActivationDescriptor()); + net.AddOutputLayer(0); +} + +BOOST_AUTO_TEST_CASE(NetworkModification) +{ + armnn::Network net; + + armnn::IConnectableLayer* const inputLayer = net.AddInputLayer(0, "input layer"); + BOOST_TEST(inputLayer); + + unsigned int dims[] = { 10,1,1,1 }; + std::vector<float> convWeightsData(10); + armnn::ConstTensor weights(armnn::TensorInfo(4, dims, armnn::DataType::Float32), convWeightsData); + + armnn::Convolution2dDescriptor convDesc2d; + armnn::IConnectableLayer* const convLayer = net.AddConvolution2dLayer(convDesc2d, weights, "conv layer"); + BOOST_TEST(convLayer); + + inputLayer->GetOutputSlot(0).Connect(convLayer->GetInputSlot(0)); + + armnn::FullyConnectedDescriptor fullyConnectedDesc; + armnn::IConnectableLayer* const fullyConnectedLayer = net.AddFullyConnectedLayer(fullyConnectedDesc, + weights, + "fully connected"); + BOOST_TEST(fullyConnectedLayer); + + convLayer->GetOutputSlot(0).Connect(fullyConnectedLayer->GetInputSlot(0)); + + armnn::Pooling2dDescriptor pooling2dDesc; + armnn::IConnectableLayer* const poolingLayer = net.AddPooling2dLayer(pooling2dDesc, "pooling2d"); + BOOST_TEST(poolingLayer); + + fullyConnectedLayer->GetOutputSlot(0).Connect(poolingLayer->GetInputSlot(0)); + + armnn::ActivationDescriptor activationDesc; + armnn::IConnectableLayer* const activationLayer = net.AddActivationLayer(activationDesc, "activation"); + BOOST_TEST(activationLayer); + + poolingLayer->GetOutputSlot(0).Connect(activationLayer->GetInputSlot(0)); + + armnn::NormalizationDescriptor normalizationDesc; + armnn::IConnectableLayer* const normalizationLayer = net.AddNormalizationLayer(normalizationDesc, "normalization"); + BOOST_TEST(normalizationLayer); + + activationLayer->GetOutputSlot(0).Connect(normalizationLayer->GetInputSlot(0)); + + armnn::SoftmaxDescriptor softmaxDesc; + armnn::IConnectableLayer* const softmaxLayer = net.AddSoftmaxLayer(softmaxDesc, "softmax"); + BOOST_TEST(softmaxLayer); + + normalizationLayer->GetOutputSlot(0).Connect(softmaxLayer->GetInputSlot(0)); + + armnn::BatchNormalizationDescriptor batchNormDesc; + + armnn::TensorInfo tensorInfo({ 1 }, armnn::DataType::Float32); + std::vector<float> data(tensorInfo.GetNumBytes() / sizeof(float)); + armnn::ConstTensor invalidTensor(tensorInfo, data); + + armnn::IConnectableLayer* const batchNormalizationLayer = net.AddBatchNormalizationLayer(batchNormDesc, + invalidTensor, + invalidTensor, + invalidTensor, + invalidTensor, + "batch norm"); + BOOST_TEST(batchNormalizationLayer); + + softmaxLayer->GetOutputSlot(0).Connect(batchNormalizationLayer->GetInputSlot(0)); + + armnn::IConnectableLayer* const additionLayer = net.AddAdditionLayer("addition"); + BOOST_TEST(additionLayer); + + batchNormalizationLayer->GetOutputSlot(0).Connect(additionLayer->GetInputSlot(0)); + batchNormalizationLayer->GetOutputSlot(0).Connect(additionLayer->GetInputSlot(1)); + + armnn::IConnectableLayer* const multiplicationLayer = net.AddMultiplicationLayer("multiplication"); + BOOST_TEST(multiplicationLayer); + + additionLayer->GetOutputSlot(0).Connect(multiplicationLayer->GetInputSlot(0)); + additionLayer->GetOutputSlot(0).Connect(multiplicationLayer->GetInputSlot(1)); + + armnn::IConnectableLayer* const outputLayer = net.AddOutputLayer(0, "output layer"); + BOOST_TEST(outputLayer); + + multiplicationLayer->GetOutputSlot(0).Connect(outputLayer->GetInputSlot(0)); + + //Tests that all layers are present in the graph. + BOOST_TEST(net.GetGraph().GetNumLayers() == 11); + + //Tests that the vertices exist and have correct names. + BOOST_TEST(GraphHasNamedLayer(net.GetGraph(), "input layer")); + BOOST_TEST(GraphHasNamedLayer(net.GetGraph(), "conv layer")); + BOOST_TEST(GraphHasNamedLayer(net.GetGraph(), "fully connected")); + BOOST_TEST(GraphHasNamedLayer(net.GetGraph(), "pooling2d")); + BOOST_TEST(GraphHasNamedLayer(net.GetGraph(), "activation")); + BOOST_TEST(GraphHasNamedLayer(net.GetGraph(), "normalization")); + BOOST_TEST(GraphHasNamedLayer(net.GetGraph(), "softmax")); + BOOST_TEST(GraphHasNamedLayer(net.GetGraph(), "batch norm")); + BOOST_TEST(GraphHasNamedLayer(net.GetGraph(), "addition")); + BOOST_TEST(GraphHasNamedLayer(net.GetGraph(), "multiplication")); + BOOST_TEST(GraphHasNamedLayer(net.GetGraph(), "output layer")); + + auto checkOneOutputToOneInputConnection = [] + (const armnn::IConnectableLayer* const srcLayer, + const armnn::IConnectableLayer* const tgtLayer, + int expectedSrcNumInputs = 1, + int expectedDstNumOutputs = 1) + { + BOOST_TEST(srcLayer->GetNumInputSlots() == expectedSrcNumInputs); + BOOST_TEST(srcLayer->GetNumOutputSlots() == 1); + BOOST_TEST(tgtLayer->GetNumInputSlots() == 1); + BOOST_TEST(tgtLayer->GetNumOutputSlots() == expectedDstNumOutputs); + + BOOST_TEST(srcLayer->GetOutputSlot(0).GetNumConnections() == 1); + BOOST_TEST(srcLayer->GetOutputSlot(0).GetConnection(0) == &tgtLayer->GetInputSlot(0)); + BOOST_TEST(&srcLayer->GetOutputSlot(0) == tgtLayer->GetInputSlot(0).GetConnection()); + }; + auto checkOneOutputToTwoInputsConnections = [] + (const armnn::IConnectableLayer* const srcLayer, + const armnn::IConnectableLayer* const tgtLayer, + int expectedSrcNumInputs, + int expectedDstNumOutputs = 1) + { + BOOST_TEST(srcLayer->GetNumInputSlots() == expectedSrcNumInputs); + BOOST_TEST(srcLayer->GetNumOutputSlots() == 1); + BOOST_TEST(tgtLayer->GetNumInputSlots() == 2); + BOOST_TEST(tgtLayer->GetNumOutputSlots() == expectedDstNumOutputs); + + BOOST_TEST(srcLayer->GetOutputSlot(0).GetNumConnections() == 2); + for (unsigned int i = 0; i < srcLayer->GetOutputSlot(0).GetNumConnections(); ++i) + { + BOOST_TEST(srcLayer->GetOutputSlot(0).GetConnection(i) == &tgtLayer->GetInputSlot(i)); + BOOST_TEST(&srcLayer->GetOutputSlot(0) == tgtLayer->GetInputSlot(i).GetConnection()); + } + }; + + BOOST_TEST(AreAllLayerInputSlotsConnected(*convLayer)); + BOOST_TEST(AreAllLayerInputSlotsConnected(*fullyConnectedLayer)); + BOOST_TEST(AreAllLayerInputSlotsConnected(*poolingLayer)); + BOOST_TEST(AreAllLayerInputSlotsConnected(*activationLayer)); + BOOST_TEST(AreAllLayerInputSlotsConnected(*normalizationLayer)); + BOOST_TEST(AreAllLayerInputSlotsConnected(*softmaxLayer)); + BOOST_TEST(AreAllLayerInputSlotsConnected(*batchNormalizationLayer)); + BOOST_TEST(AreAllLayerInputSlotsConnected(*additionLayer)); + BOOST_TEST(AreAllLayerInputSlotsConnected(*multiplicationLayer)); + BOOST_TEST(AreAllLayerInputSlotsConnected(*outputLayer)); + + // Checks connectivity. + checkOneOutputToOneInputConnection(inputLayer, convLayer, 0); + checkOneOutputToOneInputConnection(convLayer, fullyConnectedLayer); + checkOneOutputToOneInputConnection(fullyConnectedLayer, poolingLayer); + checkOneOutputToOneInputConnection(poolingLayer, activationLayer); + checkOneOutputToOneInputConnection(activationLayer, normalizationLayer); + checkOneOutputToOneInputConnection(normalizationLayer, softmaxLayer); + checkOneOutputToOneInputConnection(softmaxLayer, batchNormalizationLayer); + checkOneOutputToTwoInputsConnections(batchNormalizationLayer, additionLayer, 1); + checkOneOutputToTwoInputsConnections(additionLayer, multiplicationLayer, 2); + checkOneOutputToOneInputConnection(multiplicationLayer, outputLayer, 2, 0); +} + +BOOST_AUTO_TEST_CASE(NetworkModification_SplitterMerger) +{ + armnn::Network net; + + // Adds an input layer and an input tensor descriptor. + armnn::IConnectableLayer* inputLayer = net.AddInputLayer(0, "input layer"); + BOOST_TEST(inputLayer); + + // Adds a splitter layer. + armnn::ViewsDescriptor splitterDesc(2,4); + + armnn::IConnectableLayer* splitterLayer = net.AddSplitterLayer(splitterDesc, "splitter layer"); + BOOST_TEST(splitterLayer); + + inputLayer->GetOutputSlot(0).Connect(splitterLayer->GetInputSlot(0)); + + // Adds a softmax layer 1. + armnn::SoftmaxDescriptor softmaxDescriptor; + armnn::IConnectableLayer* softmaxLayer1 = net.AddSoftmaxLayer(softmaxDescriptor, "softmax_1"); + BOOST_TEST(softmaxLayer1); + + splitterLayer->GetOutputSlot(0).Connect(softmaxLayer1->GetInputSlot(0)); + + // Adds a softmax layer 2. + armnn::IConnectableLayer* softmaxLayer2 = net.AddSoftmaxLayer(softmaxDescriptor, "softmax_2"); + BOOST_TEST(softmaxLayer2); + + splitterLayer->GetOutputSlot(1).Connect(softmaxLayer2->GetInputSlot(0)); + + // Adds a merger layer. + armnn::OriginsDescriptor mergerDesc(2, 4); + + armnn::IConnectableLayer* mergerLayer = net.AddMergerLayer(mergerDesc, "merger layer"); + BOOST_TEST(mergerLayer); + + softmaxLayer1->GetOutputSlot(0).Connect(mergerLayer->GetInputSlot(0)); + softmaxLayer2->GetOutputSlot(0).Connect(mergerLayer->GetInputSlot(1)); + + // Adds an output layer. + armnn::IConnectableLayer* outputLayer = net.AddOutputLayer(0, "output layer"); + BOOST_TEST(outputLayer); + + mergerLayer->GetOutputSlot(0).Connect(outputLayer->GetInputSlot(0)); + + BOOST_TEST(splitterLayer->GetNumOutputSlots() == 2); + BOOST_TEST(splitterLayer->GetOutputSlot(0).GetConnection(0) == &softmaxLayer1->GetInputSlot(0)); + BOOST_TEST(&splitterLayer->GetOutputSlot(0) == softmaxLayer1->GetInputSlot(0).GetConnection()); + BOOST_TEST(splitterLayer->GetOutputSlot(1).GetConnection(0) == &softmaxLayer2->GetInputSlot(0)); + BOOST_TEST(&splitterLayer->GetOutputSlot(1) == softmaxLayer2->GetInputSlot(0).GetConnection()); + + BOOST_TEST(mergerLayer->GetNumInputSlots() == 2); + BOOST_TEST(softmaxLayer1->GetOutputSlot(0).GetConnection(0) == &mergerLayer->GetInputSlot(0)); + BOOST_TEST(&softmaxLayer1->GetOutputSlot(0) == mergerLayer->GetInputSlot(0).GetConnection()); + BOOST_TEST(softmaxLayer2->GetOutputSlot(0).GetConnection(0) == &mergerLayer->GetInputSlot(1)); + BOOST_TEST(&softmaxLayer2->GetOutputSlot(0) == mergerLayer->GetInputSlot(1).GetConnection()); +} + +BOOST_AUTO_TEST_CASE(NetworkModification_SplitterAddition) +{ + armnn::Network net; + + // Adds an input layer and an input tensor descriptor. + armnn::IConnectableLayer* layer = net.AddInputLayer(0, "input layer"); + BOOST_TEST(layer); + + // Adds a splitter layer. + armnn::ViewsDescriptor splitterDesc(2,4); + + armnn::IConnectableLayer* const splitterLayer = net.AddSplitterLayer(splitterDesc, "splitter layer"); + BOOST_TEST(splitterLayer); + + layer->GetOutputSlot(0).Connect(splitterLayer->GetInputSlot(0)); + + // Adds a softmax layer 1. + armnn::SoftmaxDescriptor softmaxDescriptor; + armnn::IConnectableLayer* const softmax1Layer = net.AddSoftmaxLayer(softmaxDescriptor, "softmax_1"); + BOOST_TEST(softmax1Layer); + + splitterLayer->GetOutputSlot(0).Connect(softmax1Layer->GetInputSlot(0)); + + // Adds a softmax layer 2. + armnn::IConnectableLayer* const softmax2Layer = net.AddSoftmaxLayer(softmaxDescriptor, "softmax_2"); + BOOST_TEST(softmax2Layer); + + splitterLayer->GetOutputSlot(1).Connect(softmax2Layer->GetInputSlot(0)); + + // Adds addition layer. + layer = net.AddAdditionLayer("add layer"); + BOOST_TEST(layer); + + softmax1Layer->GetOutputSlot(0).Connect(layer->GetInputSlot(0)); + softmax2Layer->GetOutputSlot(0).Connect(layer->GetInputSlot(1)); + + // Adds an output layer. + armnn::IConnectableLayer* prevLayer = layer; + layer = net.AddOutputLayer(0, "output layer"); + + prevLayer->GetOutputSlot(0).Connect(layer->GetInputSlot(0)); + + BOOST_TEST(layer); +} + +BOOST_AUTO_TEST_CASE(NetworkModification_SplitterMultiplication) +{ + armnn::Network net; + + // Adds an input layer and an input tensor descriptor. + armnn::IConnectableLayer* layer = net.AddInputLayer(0, "input layer"); + BOOST_TEST(layer); + + // Adds a splitter layer. + armnn::ViewsDescriptor splitterDesc(2,4); + armnn::IConnectableLayer* const splitterLayer = net.AddSplitterLayer(splitterDesc, "splitter layer"); + BOOST_TEST(splitterLayer); + + layer->GetOutputSlot(0).Connect(splitterLayer->GetInputSlot(0)); + + // Adds a softmax layer 1. + armnn::SoftmaxDescriptor softmaxDescriptor; + armnn::IConnectableLayer* const softmax1Layer = net.AddSoftmaxLayer(softmaxDescriptor, "softmax_1"); + BOOST_TEST(softmax1Layer); + + splitterLayer->GetOutputSlot(0).Connect(softmax1Layer->GetInputSlot(0)); + + // Adds a softmax layer 2. + armnn::IConnectableLayer* const softmax2Layer = net.AddSoftmaxLayer(softmaxDescriptor, "softmax_2"); + BOOST_TEST(softmax2Layer); + + splitterLayer->GetOutputSlot(1).Connect(softmax2Layer->GetInputSlot(0)); + + // Adds multiplication layer. + layer = net.AddMultiplicationLayer("multiplication layer"); + BOOST_TEST(layer); + + softmax1Layer->GetOutputSlot(0).Connect(layer->GetInputSlot(0)); + softmax2Layer->GetOutputSlot(0).Connect(layer->GetInputSlot(1)); + + // Adds an output layer. + armnn::IConnectableLayer* prevLayer = layer; + layer = net.AddOutputLayer(0, "output layer"); + BOOST_TEST(layer); + + prevLayer->GetOutputSlot(0).Connect(layer->GetInputSlot(0)); +} + +BOOST_AUTO_TEST_CASE(OptimizeValidateCpuRefWorkloads) +{ + const armnn::TensorInfo desc({3, 5}, armnn::DataType::Float32); + + armnn::Network net; + + armnn::NormalizationDescriptor nmDesc; + armnn::ActivationDescriptor acDesc; + + // in + // | + // nm + // / | + // ac | + // \ | + // ml + // | + // sm + // | + // ot + armnn::IConnectableLayer* layer = net.AddInputLayer(0, "in"); + layer->GetOutputSlot(0).SetTensorInfo(desc); + + armnn::IConnectableLayer* const normLayer = net.AddNormalizationLayer(nmDesc, "nm"); + + layer->GetOutputSlot(0).Connect(normLayer->GetInputSlot(0)); + normLayer->GetOutputSlot(0).SetTensorInfo(desc); + + layer = net.AddActivationLayer(acDesc, "ac"); + + normLayer->GetOutputSlot(0).Connect(layer->GetInputSlot(0)); + layer->GetOutputSlot(0).SetTensorInfo(desc); + + armnn::IConnectableLayer* prevLayer = layer; + layer = net.AddMultiplicationLayer("ml"); + + prevLayer->GetOutputSlot(0).Connect(layer->GetInputSlot(0)); + normLayer->GetOutputSlot(0).Connect(layer->GetInputSlot(1)); + layer->GetOutputSlot(0).SetTensorInfo(desc); + + prevLayer = layer; + armnn::SoftmaxDescriptor softmaxDescriptor; + layer = net.AddSoftmaxLayer(softmaxDescriptor, "sm"); + + prevLayer->GetOutputSlot(0).Connect(layer->GetInputSlot(0)); + layer->GetOutputSlot(0).SetTensorInfo(desc); + + prevLayer = layer; + layer = net.AddOutputLayer(0, "ot"); + + prevLayer->GetOutputSlot(0).Connect(layer->GetInputSlot(0)); + + armnn::IRuntime::CreationOptions options; + armnn::IRuntimePtr runtime(armnn::IRuntime::Create(options)); + + std::vector<armnn::Compute> backends = { armnn::Compute::CpuRef }; + armnn::IOptimizedNetworkPtr optNet = armnn::Optimize(net, backends, runtime->GetDeviceSpec()); + static_cast<armnn::OptimizedNetwork*>(optNet.get())->GetGraph().AllocateDynamicBuffers(); + BOOST_CHECK(optNet); + + // Validates workloads. + armnn::RefWorkloadFactory fact; + for (auto&& layer : static_cast<armnn::OptimizedNetwork*>(optNet.get())->GetGraph()) + { + BOOST_CHECK_NO_THROW( + layer->CreateWorkload(static_cast<armnn::OptimizedNetwork*>(optNet.get())->GetGraph(), fact)); + } +} + +#if ARMCOMPUTENEON_ENABLED +BOOST_AUTO_TEST_CASE(OptimizeValidateCpuAccDeviceSupportLayerNoFallback) +{ + // build up the structure of the network + armnn::INetworkPtr net(armnn::INetwork::Create()); + + armnn::IConnectableLayer* input = net->AddInputLayer(0); + + armnn::IConnectableLayer* output = net->AddOutputLayer(0); + + input->GetOutputSlot(0).Connect(output->GetInputSlot(0)); + + input->GetOutputSlot(0).SetTensorInfo(armnn::TensorInfo({ 1, 1, 4, 4 }, armnn::DataType::Float32)); + + armnn::IRuntime::CreationOptions options; + armnn::IRuntimePtr runtime(armnn::IRuntime::Create(options)); + + std::vector<armnn::Compute> backends = { armnn::Compute::CpuAcc }; + armnn::IOptimizedNetworkPtr optNet = armnn::Optimize(*net, backends, runtime->GetDeviceSpec()); + BOOST_CHECK(optNet); + // validate workloads + armnn::NeonWorkloadFactory fact; + for (auto&& layer : static_cast<armnn::OptimizedNetwork*>(optNet.get())->GetGraph()) + { + BOOST_CHECK_EQUAL(armnn::Compute::CpuAcc, layer->GetComputeDevice()); + BOOST_CHECK_NO_THROW( + layer->CreateWorkload(static_cast<armnn::OptimizedNetwork*>(optNet.get())->GetGraph(), fact)); + } +} +#endif // ARMCOMPUTENEON_ENABLED + +#if ARMCOMPUTECL_ENABLED +BOOST_AUTO_TEST_CASE(OptimizeValidateGpuDeviceSupportLayerNoFallback) +{ + // build up the structure of the network + armnn::INetworkPtr net(armnn::INetwork::Create()); + + armnn::IConnectableLayer* input = net->AddInputLayer(0); + + armnn::IConnectableLayer* output = net->AddOutputLayer(0); + + input->GetOutputSlot(0).Connect(output->GetInputSlot(0)); + + input->GetOutputSlot(0).SetTensorInfo(armnn::TensorInfo({ 1, 1, 4, 4 }, armnn::DataType::Float32)); + + armnn::IRuntime::CreationOptions options; + armnn::IRuntimePtr runtime(armnn::IRuntime::Create(options)); + + std::vector<armnn::Compute> backends = { armnn::Compute::GpuAcc }; + armnn::IOptimizedNetworkPtr optNet = armnn::Optimize(*net, backends, runtime->GetDeviceSpec()); + BOOST_CHECK(optNet); + // validate workloads + armnn::ClWorkloadFactory fact; + for (auto&& layer : static_cast<armnn::OptimizedNetwork*>(optNet.get())->GetGraph()) + { + BOOST_CHECK_EQUAL(armnn::Compute::GpuAcc, layer->GetComputeDevice()); + BOOST_CHECK_NO_THROW( + layer->CreateWorkload(static_cast<armnn::OptimizedNetwork*>(optNet.get())->GetGraph(), fact)); + } +} +#endif // ARMCOMPUTECL_ENABLED + +BOOST_AUTO_TEST_CASE(OptimizeValidateDeviceNonSupportLayerNoFallback) +{ + // build up the structure of the network + armnn::INetworkPtr net(armnn::INetwork::Create()); + + armnn::IConnectableLayer* input = net->AddInputLayer(0); + + // This layer configuration isn't supported by CpuAcc and isn't allowed to fall back, so Optimize will return null. + armnn::NormalizationDescriptor descriptor; + armnn::IConnectableLayer* normalize = net->AddNormalizationLayer(descriptor); + + armnn::IConnectableLayer* output = net->AddOutputLayer(0); + + input->GetOutputSlot(0).Connect(normalize->GetInputSlot(0)); + normalize->GetOutputSlot(0).Connect(output->GetInputSlot(0)); + + input->GetOutputSlot(0).SetTensorInfo(armnn::TensorInfo({ 1, 1, 4, 4 }, armnn::DataType::Float32)); + normalize->GetOutputSlot(0).SetTensorInfo(armnn::TensorInfo({ 1, 1, 4, 4 }, armnn::DataType::Float32)); + + armnn::IRuntime::CreationOptions options; + armnn::IRuntimePtr runtime(armnn::IRuntime::Create(options)); + + std::vector<armnn::Compute> backends = { armnn::Compute::CpuAcc }; + armnn::IOptimizedNetworkPtr optNet = armnn::Optimize(*net, backends, runtime->GetDeviceSpec()); + BOOST_CHECK(!optNet); +} + +BOOST_AUTO_TEST_CASE(OptimizeValidateDeviceNonSupportLayerWithFallback) +{ + // build up the structure of the network + armnn::INetworkPtr net(armnn::INetwork::Create()); + + armnn::IConnectableLayer* input = net->AddInputLayer(0); + + // This layer configuration isn't supported by CpuAcc but it allows to fallback to CpuRef. + armnn::NormalizationDescriptor descriptor; + armnn::IConnectableLayer* normalize = net->AddNormalizationLayer(descriptor); + + armnn::IConnectableLayer* output = net->AddOutputLayer(0); + + input->GetOutputSlot(0).Connect(normalize->GetInputSlot(0)); + normalize->GetOutputSlot(0).Connect(output->GetInputSlot(0)); + + input->GetOutputSlot(0).SetTensorInfo(armnn::TensorInfo({ 1, 1, 4, 4 }, armnn::DataType::Float32)); + normalize->GetOutputSlot(0).SetTensorInfo(armnn::TensorInfo({ 1, 1, 4, 4 }, armnn::DataType::Float32)); + + armnn::IRuntime::CreationOptions options; + armnn::IRuntimePtr runtime(armnn::IRuntime::Create(options)); + + std::vector<armnn::Compute> backends = { armnn::Compute::CpuAcc, armnn::Compute::CpuRef }; + armnn::IOptimizedNetworkPtr optNet = armnn::Optimize(*net, backends, runtime->GetDeviceSpec()); + BOOST_REQUIRE(optNet); + + for (auto&& layer : static_cast<armnn::OptimizedNetwork*>(optNet.get())->GetGraph()) + { + // If NEON is enabled, Input and Output layers are supported by CpuAcc, + // the other layers are supported by CpuRef. + // If NEON is not enabled, all layers are supported by CpuRef. +#if ARMCOMPUTENEON_ENABLED + if (layer->GetType() == armnn::LayerType::Input || layer->GetType() == armnn::LayerType::Output) + { + BOOST_CHECK_EQUAL(armnn::Compute::CpuAcc, layer->GetComputeDevice()); + } + else if (layer->GetType() == armnn::LayerType::Normalization) + { + BOOST_CHECK_EQUAL(armnn::Compute::CpuRef, layer->GetComputeDevice()); + } +#else + BOOST_CHECK_EQUAL(armnn::Compute::CpuRef, layer->GetComputeDevice()); +#endif + } +} + +BOOST_AUTO_TEST_CASE(OptimizeValidateWorkloadsUndefinedComputeDevice) +{ + const armnn::TensorInfo desc({3, 5}, armnn::DataType::Float32); + + armnn::Network net; + + armnn::NormalizationDescriptor nmDesc; + armnn::ActivationDescriptor acDesc; + + // in + // | + // nm + // / | + // ac | + // \ | + // ml + // | + // sm + // | + // ot + armnn::IConnectableLayer* layer = net.AddInputLayer(0, "in"); + layer->GetOutputSlot(0).SetTensorInfo(desc); + + armnn::IConnectableLayer* const normLayer = net.AddNormalizationLayer(nmDesc, "nm"); + + layer->GetOutputSlot(0).Connect(normLayer->GetInputSlot(0)); + normLayer->GetOutputSlot(0).SetTensorInfo(desc); + + layer = net.AddActivationLayer(acDesc, "ac"); + + normLayer->GetOutputSlot(0).Connect(layer->GetInputSlot(0)); + layer->GetOutputSlot(0).SetTensorInfo(desc); + + armnn::IConnectableLayer* prevLayer = layer; + layer = net.AddMultiplicationLayer("ml"); + + prevLayer->GetOutputSlot(0).Connect(layer->GetInputSlot(0)); + normLayer->GetOutputSlot(0).Connect(layer->GetInputSlot(1)); + layer->GetOutputSlot(0).SetTensorInfo(desc); + + prevLayer = layer; + armnn::SoftmaxDescriptor softmaxDescriptor; + layer = net.AddSoftmaxLayer(softmaxDescriptor, "sm"); + + prevLayer->GetOutputSlot(0).Connect(layer->GetInputSlot(0)); + layer->GetOutputSlot(0).SetTensorInfo(desc); + + prevLayer = layer; + layer = net.AddOutputLayer(0, "ot"); + + prevLayer->GetOutputSlot(0).Connect(layer->GetInputSlot(0)); + + armnn::IRuntime::CreationOptions options; + armnn::IRuntimePtr runtime(armnn::IRuntime::Create(options)); + + std::vector<armnn::Compute> backends = { armnn::Compute::Undefined }; + + armnn::IOptimizedNetworkPtr optNet = armnn::Optimize(net, backends, runtime->GetDeviceSpec()); + BOOST_CHECK(!optNet); + +} + +BOOST_AUTO_TEST_CASE(OptimizeValidateWorkloadsUndefinedComputeDeviceWithFallback) +{ + const armnn::TensorInfo desc({3, 5}, armnn::DataType::Float32); + + armnn::Network net; + + armnn::NormalizationDescriptor nmDesc; + armnn::ActivationDescriptor acDesc; + + // in + // | + // nm + // / | + // ac | + // \ | + // ml + // | + // sm + // | + // ot + armnn::IConnectableLayer* layer = net.AddInputLayer(0, "in"); + layer->GetOutputSlot(0).SetTensorInfo(desc); + + armnn::IConnectableLayer* const normLayer = net.AddNormalizationLayer(nmDesc, "nm"); + + layer->GetOutputSlot(0).Connect(normLayer->GetInputSlot(0)); + normLayer->GetOutputSlot(0).SetTensorInfo(desc); + + layer = net.AddActivationLayer(acDesc, "ac"); + + normLayer->GetOutputSlot(0).Connect(layer->GetInputSlot(0)); + layer->GetOutputSlot(0).SetTensorInfo(desc); + + armnn::IConnectableLayer* prevLayer = layer; + layer = net.AddMultiplicationLayer("ml"); + + prevLayer->GetOutputSlot(0).Connect(layer->GetInputSlot(0)); + normLayer->GetOutputSlot(0).Connect(layer->GetInputSlot(1)); + layer->GetOutputSlot(0).SetTensorInfo(desc); + + prevLayer = layer; + armnn::SoftmaxDescriptor softmaxDescriptor; + layer = net.AddSoftmaxLayer(softmaxDescriptor, "sm"); + + prevLayer->GetOutputSlot(0).Connect(layer->GetInputSlot(0)); + layer->GetOutputSlot(0).SetTensorInfo(desc); + + prevLayer = layer; + layer = net.AddOutputLayer(0, "ot"); + + prevLayer->GetOutputSlot(0).Connect(layer->GetInputSlot(0)); + + armnn::IRuntime::CreationOptions options; + armnn::IRuntimePtr runtime(armnn::IRuntime::Create(options)); + + std::vector<armnn::Compute> backends = { armnn::Compute::Undefined, armnn::Compute::CpuRef }; + + armnn::IOptimizedNetworkPtr optNet = armnn::Optimize(net, backends, runtime->GetDeviceSpec()); + BOOST_CHECK(optNet); + + // validate workloads + armnn::RefWorkloadFactory fact; + for (auto&& layer : static_cast<armnn::OptimizedNetwork*>(optNet.get())->GetGraph()) + { + BOOST_CHECK_EQUAL(armnn::Compute::CpuRef, layer->GetComputeDevice()); + BOOST_CHECK_NO_THROW( + layer->CreateWorkload(static_cast<armnn::OptimizedNetwork*>(optNet.get())->GetGraph(), fact)); + } +} +BOOST_AUTO_TEST_CASE(OptimizeValidateWorkloadsDuplicateComputeDeviceWithFallback) +{ + // build up the structure of the network + armnn::INetworkPtr net(armnn::INetwork::Create()); + + armnn::IConnectableLayer* input = net->AddInputLayer(0); + + // This layer configuration isn't supported by CpuAcc but it allows to fallback to CpuRef. + armnn::NormalizationDescriptor descriptor; + armnn::IConnectableLayer* normalize = net->AddNormalizationLayer(descriptor); + + armnn::IConnectableLayer* output = net->AddOutputLayer(0); + + input->GetOutputSlot(0).Connect(normalize->GetInputSlot(0)); + normalize->GetOutputSlot(0).Connect(output->GetInputSlot(0)); + + input->GetOutputSlot(0).SetTensorInfo(armnn::TensorInfo({ 1, 1, 4, 4 }, armnn::DataType::Float32)); + normalize->GetOutputSlot(0).SetTensorInfo(armnn::TensorInfo({ 1, 1, 4, 4 }, armnn::DataType::Float32)); + + armnn::IRuntime::CreationOptions options; + armnn::IRuntimePtr runtime(armnn::IRuntime::Create(options)); + + std::vector<armnn::Compute> backends = { armnn::Compute::CpuAcc, + armnn::Compute::GpuAcc, + armnn::Compute::CpuRef }; + + armnn::IOptimizedNetworkPtr optNet = armnn::Optimize(*net, backends, runtime->GetDeviceSpec()); + BOOST_REQUIRE(optNet); + + for (auto&& layer : static_cast<armnn::OptimizedNetwork*>(optNet.get())->GetGraph()) + { + // If NEON is enabled, Input and Output layers are supported by CpuAcc, + // the other layers are supported by CpuRef. + // If only CL is enabled, Input and Output layers are supported by GpuAcc, + // the other layers are supported by CpuRef. + // If neither NEON, nor CL is enabled, all layers are supported by CpuRef. +#if ARMCOMPUTENEON_ENABLED + if (layer->GetType() == armnn::LayerType::Input || layer->GetType() == armnn::LayerType::Output) + { + BOOST_CHECK_EQUAL(armnn::Compute::CpuAcc, layer->GetComputeDevice()); + } + else if (layer->GetType() == armnn::LayerType::Normalization) + { + BOOST_CHECK_EQUAL(armnn::Compute::CpuRef, layer->GetComputeDevice()); + } +#elif ARMCOMPUTECL_ENABLED + if (layer->GetType() == armnn::LayerType::Input || layer->GetType() == armnn::LayerType::Output) + { + BOOST_CHECK_EQUAL(armnn::Compute::GpuAcc, layer->GetComputeDevice()); + } + else if (layer->GetType() == armnn::LayerType::Normalization) + { + BOOST_CHECK_EQUAL(armnn::Compute::CpuRef, layer->GetComputeDevice()); + } +#else + BOOST_CHECK_EQUAL(armnn::Compute::CpuRef, layer->GetComputeDevice()); +#endif + } +} + +BOOST_AUTO_TEST_CASE(OptimizeValidateWorkloadsCpuRefPermuteLayer) +{ + // Create runtime in which test will run + armnn::IRuntime::CreationOptions options; + armnn::IRuntimePtr runtime(armnn::IRuntime::Create(options)); + + std::vector<armnn::Compute> backends = {armnn::Compute::CpuRef}; + + // build up the structure of the network + armnn::INetworkPtr net(armnn::INetwork::Create()); + + armnn::IConnectableLayer* input = net->AddInputLayer(0); + + armnn::PermuteDescriptor descriptor({0, 2, 3, 1}); + armnn::IConnectableLayer* permute = net->AddPermuteLayer(descriptor); + + armnn::IConnectableLayer* output = net->AddOutputLayer(0); + + input->GetOutputSlot(0).Connect(permute->GetInputSlot(0)); + permute->GetOutputSlot(0).Connect(output->GetInputSlot(0)); + + input->GetOutputSlot(0).SetTensorInfo(armnn::TensorInfo({ 1, 1, 4, 4 }, armnn::DataType::Float32)); + permute->GetOutputSlot(0).SetTensorInfo(armnn::TensorInfo({ 1, 4, 1, 4 }, armnn::DataType::Float32)); + + // optimize the network + armnn::IOptimizedNetworkPtr optNet = armnn::Optimize(*net, backends, runtime->GetDeviceSpec()); + + for (auto&& layer : static_cast<armnn::OptimizedNetwork*>(optNet.get())->GetGraph()) + { + BOOST_CHECK_EQUAL(armnn::Compute::CpuRef, layer->GetComputeDevice()); + } +} + +BOOST_AUTO_TEST_CASE(FP16TurboModeTestOnCpuRef) +{ + // Test to check when FP16 Turbo mode set + // it converts the FP32 network to FP16 Network + // add FP32ToFP16 conversion layer after the InputLayer + // add FP16ToFP32 conversion layer after the OutputLayer + // checks the other layers if they are supported in FP16 + // if they are not put the conversion layers before and after + // if they are not supported in FP16 use FP32 instead + // if there are inverse conversion layers remove them with optimization + // at the moment FloorLayer is not supported in FP16 so it rolls back to FP32 + // and inverse conversion layers are removed by the optimizer + armnn::Network net; + + // Defines layers. + auto input = net.AddInputLayer(0); + auto floor = net.AddFloorLayer(); + auto output = net.AddOutputLayer(0); + + // Connects layers. + input->GetOutputSlot(0).Connect(floor->GetInputSlot(0)); + floor->GetOutputSlot(0).Connect(output->GetInputSlot(0)); + + armnn::TensorShape shape({4}); + armnn::TensorInfo info(shape, armnn::DataType::Float32); + input->GetOutputSlot(0).SetTensorInfo(info); + floor->GetOutputSlot(0).SetTensorInfo(info); + + armnn::IRuntime::CreationOptions options; + armnn::IRuntimePtr runtime(armnn::IRuntime::Create(options)); + + std::vector<armnn::Compute> backends = {armnn::Compute::CpuRef}; + + armnn::OptimizerOptions optimizerOptions; + optimizerOptions.m_ReduceFp32ToFp16 = true; + + armnn::IOptimizedNetworkPtr optimizedNet = armnn::Optimize(net, backends, runtime->GetDeviceSpec(), + optimizerOptions); + + std::ostringstream ss; + optimizedNet->SerializeToDot(ss); + + auto inputId = input->GetGuid(); + auto floorId = floor->GetGuid(); + auto outputId = output->GetGuid(); + + std::stringstream expected; + expected << + "digraph Optimized {\n" + " node [shape=\"record\"];\n" + " edge [fontsize=8 fontcolor=\"blue\" fontname=\"arial-bold\"];\n" + " " << inputId << " [label=\"{Input}\"];\n" + " " << floorId << " [label=\"{Floor}\"];\n" + " " << outputId << " [label=\"{Output}\"];\n" + " " << inputId << " -> " << floorId << " [label=< [4] >];\n" + " " << floorId << " -> " << outputId << " [label=< [4] >];\n" + "}\n"; + + BOOST_TEST(ss.str() == expected.str()); +} + +#if ARMCOMPUTECL_ENABLED +BOOST_AUTO_TEST_CASE(FP16TurboModeTestOnGpuAcc) +{ + // Test to check when Fp16 Turbo mode set + // it converts the Fp32 network to Fp16 Network + // add Fp32ToFp16 conversion layer after the InputLayer + // add Fp16ToFp32 conversion layer after the OutputLayer + // checks the other layers if they are supported in Fp16 + // if they are not put the conversion layers before and after + // if they are not supported in Fp16 use Fp32 instead + // if there are inverse conversion layers remove them with optimization + // at the moment FloorLayer is not supported in Fp16 so it rolls back to Fp32 + // and inverse conversion layers are removed by the optimizer + armnn::Network net; + + // Defines layers. + auto input = net.AddInputLayer(0, "input layer"); + // ReLu1 + armnn::ActivationDescriptor activation1Descriptor; + activation1Descriptor.m_Function = armnn::ActivationFunction::BoundedReLu; + activation1Descriptor.m_A = 1.f; + activation1Descriptor.m_B = -1.f; + auto activation = net.AddActivationLayer(activation1Descriptor, "activation layer"); + auto output = net.AddOutputLayer(0, "output layer"); + + // Connects layers. + input->GetOutputSlot(0).Connect(activation->GetInputSlot(0)); + activation->GetOutputSlot(0).Connect(output->GetInputSlot(0)); + + armnn::TensorShape shape({4}); + armnn::TensorInfo info(shape, armnn::DataType::Float32); + input->GetOutputSlot(0).SetTensorInfo(info); + activation->GetOutputSlot(0).SetTensorInfo(info); + + armnn::IRuntime::CreationOptions options; + armnn::IRuntimePtr runtime(armnn::IRuntime::Create(options)); + + std::vector<armnn::Compute> backends = {armnn::Compute::GpuAcc}; + + armnn::OptimizerOptions optimizerOptions; + optimizerOptions.m_ReduceFp32ToFp16 = true; + + armnn::IOptimizedNetworkPtr optimizedNet = armnn::Optimize(net, backends, runtime->GetDeviceSpec(), + optimizerOptions); + + const armnn::Graph& graph = static_cast<armnn::OptimizedNetwork*>(optimizedNet.get())->GetGraph(); + + // Tests that all layers are present in the graph. + BOOST_TEST(graph.GetNumLayers() == 5); + + // Tests that the vertices exist and have correct names. + BOOST_TEST(GraphHasNamedLayer(graph, "input layer")); + BOOST_TEST(GraphHasNamedLayer(graph, "convert_fp32_to_fp16-0-input layer")); + BOOST_TEST(GraphHasNamedLayer(graph, "activation layer")); + BOOST_TEST(GraphHasNamedLayer(graph, "convert_fp16_to_fp32-0-output layer")); + BOOST_TEST(GraphHasNamedLayer(graph, "output layer")); +} +#endif + +BOOST_AUTO_TEST_SUITE_END() diff --git a/src/armnn/test/Network_test.cpp b/src/armnn/test/Network_test.cpp deleted file mode 100644 index 057caa0505..0000000000 --- a/src/armnn/test/Network_test.cpp +++ /dev/null @@ -1,483 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// See LICENSE file in the project root for full license information. -// -#include <boost/test/unit_test.hpp> - -#include "armnn/ArmNN.hpp" -#include "Network.hpp" -#include "Graph.hpp" -#include "backends/RefWorkloadFactory.hpp" - -#include "GraphUtils.hpp" - -namespace -{ - -bool AreAllLayerInputSlotsConnected(const armnn::IConnectableLayer& layer) -{ - bool allConnected = true; - for (unsigned int i = 0; i < layer.GetNumInputSlots(); ++i) - { - const bool inputConnected = layer.GetInputSlot(i).GetConnection() != nullptr; - allConnected &= inputConnected; - } - return allConnected; -} - -} - -BOOST_AUTO_TEST_SUITE(Network) - -BOOST_AUTO_TEST_CASE(LayerGuids) -{ - armnn::Network net; - armnn::LayerGuid inputId = net.AddInputLayer(0)->GetGuid(); - armnn::LayerGuid addId = net.AddAdditionLayer()->GetGuid(); - armnn::LayerGuid outputId = net.AddOutputLayer(0)->GetGuid(); - - BOOST_TEST(inputId != addId); - BOOST_TEST(addId != outputId); - BOOST_TEST(inputId != outputId); -} - -BOOST_AUTO_TEST_CASE(SerializeToDot) -{ - armnn::Network net; - - //define layers - auto input = net.AddInputLayer(0); - auto add = net.AddAdditionLayer(); - auto output = net.AddOutputLayer(0); - - // connect layers - input->GetOutputSlot(0).Connect(add->GetInputSlot(0)); - input->GetOutputSlot(0).Connect(add->GetInputSlot(1)); - add->GetOutputSlot(0).Connect(output->GetInputSlot(0)); - - armnn::TensorShape shape({4}); - armnn::TensorInfo info(shape, armnn::DataType::Float32); - input->GetOutputSlot(0).SetTensorInfo(info); - add->GetOutputSlot(0).SetTensorInfo(info); - - armnn::DeviceSpec spec; - spec.DefaultComputeDevice = armnn::Compute::CpuAcc; - armnn::IOptimizedNetworkPtr optimizedNet = armnn::Optimize(net, spec); - - std::ostringstream ss; - optimizedNet->SerializeToDot(ss); - - auto inputId = input->GetGuid(); - auto addId = add->GetGuid(); - auto outputId = output->GetGuid(); - - std::stringstream expected; - expected << - "digraph Optimized {\n" - " node [shape=\"record\"];\n" - " edge [fontsize=8 fontcolor=\"blue\" fontname=\"arial-bold\"];\n" - " " << inputId << " [label=\"{Input}\"];\n" - " " << addId << " [label=\"{Addition}\"];\n" - " " << outputId << " [label=\"{Output}\"];\n" - " " << inputId << " -> " << addId << " [label=< [4] >];\n" - " " << inputId << " -> " << addId << " [label=< [4] >];\n" - " " << addId << " -> " << outputId << " [label=< [4] >];\n" - "}\n"; - - BOOST_TEST(ss.str() == expected.str()); -} - -BOOST_AUTO_TEST_CASE(NetworkBasic) -{ - armnn::Network net; - BOOST_TEST(net.PrintGraph() == armnn::Status::Success); -} - -BOOST_AUTO_TEST_CASE(LayerNamesAreOptionalForINetwork) -{ - armnn::Network net; - armnn::INetwork& inet = net; - inet.AddInputLayer(0); - inet.AddAdditionLayer(); - inet.AddActivationLayer(armnn::ActivationDescriptor()); - inet.AddOutputLayer(0); -} - -BOOST_AUTO_TEST_CASE(LayerNamesAreOptionalForNetwork) -{ - armnn::Network net; - net.AddInputLayer(0); - net.AddAdditionLayer(); - net.AddActivationLayer(armnn::ActivationDescriptor()); - net.AddOutputLayer(0); -} - -BOOST_AUTO_TEST_CASE(NetworkModification) -{ - armnn::Network net; - - armnn::IConnectableLayer* const inputLayer = net.AddInputLayer(0, "input layer"); - BOOST_TEST(inputLayer); - - unsigned int dims[] = { 10,1,1,1 }; - std::vector<float> convWeightsData(10); - armnn::ConstTensor weights(armnn::TensorInfo(4, dims, armnn::DataType::Float32), convWeightsData); - - armnn::Convolution2dDescriptor convDesc2d; - armnn::IConnectableLayer* const convLayer = net.AddConvolution2dLayer(convDesc2d, weights, "conv layer"); - BOOST_TEST(convLayer); - - inputLayer->GetOutputSlot(0).Connect(convLayer->GetInputSlot(0)); - - armnn::FullyConnectedDescriptor fullyConnectedDesc; - armnn::IConnectableLayer* const fullyConnectedLayer = net.AddFullyConnectedLayer(fullyConnectedDesc, - weights, - "fully connected"); - BOOST_TEST(fullyConnectedLayer); - - convLayer->GetOutputSlot(0).Connect(fullyConnectedLayer->GetInputSlot(0)); - - armnn::Pooling2dDescriptor pooling2dDesc; - armnn::IConnectableLayer* const poolingLayer = net.AddPooling2dLayer(pooling2dDesc, "pooling2d"); - BOOST_TEST(poolingLayer); - - fullyConnectedLayer->GetOutputSlot(0).Connect(poolingLayer->GetInputSlot(0)); - - armnn::ActivationDescriptor activationDesc; - armnn::IConnectableLayer* const activationLayer = net.AddActivationLayer(activationDesc, "activation"); - BOOST_TEST(activationLayer); - - poolingLayer->GetOutputSlot(0).Connect(activationLayer->GetInputSlot(0)); - - armnn::NormalizationDescriptor normalizationDesc; - armnn::IConnectableLayer* const normalizationLayer = net.AddNormalizationLayer(normalizationDesc, "normalization"); - BOOST_TEST(normalizationLayer); - - activationLayer->GetOutputSlot(0).Connect(normalizationLayer->GetInputSlot(0)); - - armnn::SoftmaxDescriptor softmaxDesc; - armnn::IConnectableLayer* const softmaxLayer = net.AddSoftmaxLayer(softmaxDesc, "softmax"); - BOOST_TEST(softmaxLayer); - - normalizationLayer->GetOutputSlot(0).Connect(softmaxLayer->GetInputSlot(0)); - - armnn::BatchNormalizationDescriptor batchNormDesc; - - armnn::TensorInfo tensorInfo({ 1 }, armnn::DataType::Float32); - std::vector<float> data(tensorInfo.GetNumBytes() / sizeof(float)); - armnn::ConstTensor invalidTensor(tensorInfo, data); - - armnn::IConnectableLayer* const batchNormalizationLayer = net.AddBatchNormalizationLayer(batchNormDesc, - invalidTensor, - invalidTensor, - invalidTensor, - invalidTensor, - "batch norm"); - BOOST_TEST(batchNormalizationLayer); - - softmaxLayer->GetOutputSlot(0).Connect(batchNormalizationLayer->GetInputSlot(0)); - - armnn::IConnectableLayer* const additionLayer = net.AddAdditionLayer("addition"); - BOOST_TEST(additionLayer); - - batchNormalizationLayer->GetOutputSlot(0).Connect(additionLayer->GetInputSlot(0)); - batchNormalizationLayer->GetOutputSlot(0).Connect(additionLayer->GetInputSlot(1)); - - armnn::IConnectableLayer* const multiplicationLayer = net.AddMultiplicationLayer("multiplication"); - BOOST_TEST(multiplicationLayer); - - additionLayer->GetOutputSlot(0).Connect(multiplicationLayer->GetInputSlot(0)); - additionLayer->GetOutputSlot(0).Connect(multiplicationLayer->GetInputSlot(1)); - - armnn::IConnectableLayer* const outputLayer = net.AddOutputLayer(0, "output layer"); - BOOST_TEST(outputLayer); - - multiplicationLayer->GetOutputSlot(0).Connect(outputLayer->GetInputSlot(0)); - - //Test that all layers are present in the graph - BOOST_TEST(net.GetGraph().GetNumLayers() == 11); - - //Test that the vertices exist and have correct names - BOOST_TEST(GraphHasNamedLayer(net.GetGraph(), "input layer")); - BOOST_TEST(GraphHasNamedLayer(net.GetGraph(), "conv layer")); - BOOST_TEST(GraphHasNamedLayer(net.GetGraph(), "fully connected")); - BOOST_TEST(GraphHasNamedLayer(net.GetGraph(), "pooling2d")); - BOOST_TEST(GraphHasNamedLayer(net.GetGraph(), "activation")); - BOOST_TEST(GraphHasNamedLayer(net.GetGraph(), "normalization")); - BOOST_TEST(GraphHasNamedLayer(net.GetGraph(), "softmax")); - BOOST_TEST(GraphHasNamedLayer(net.GetGraph(), "batch norm")); - BOOST_TEST(GraphHasNamedLayer(net.GetGraph(), "addition")); - BOOST_TEST(GraphHasNamedLayer(net.GetGraph(), "multiplication")); - BOOST_TEST(GraphHasNamedLayer(net.GetGraph(), "output layer")); - - auto checkOneOutputToOneInputConnection = [] - (const armnn::IConnectableLayer* const srcLayer, - const armnn::IConnectableLayer* const tgtLayer, - int expectedSrcNumInputs = 1, - int expectedDstNumOutputs = 1) - { - BOOST_TEST(srcLayer->GetNumInputSlots() == expectedSrcNumInputs); - BOOST_TEST(srcLayer->GetNumOutputSlots() == 1); - BOOST_TEST(tgtLayer->GetNumInputSlots() == 1); - BOOST_TEST(tgtLayer->GetNumOutputSlots() == expectedDstNumOutputs); - - BOOST_TEST(srcLayer->GetOutputSlot(0).GetNumConnections() == 1); - BOOST_TEST(srcLayer->GetOutputSlot(0).GetConnection(0) == &tgtLayer->GetInputSlot(0)); - BOOST_TEST(&srcLayer->GetOutputSlot(0) == tgtLayer->GetInputSlot(0).GetConnection()); - }; - auto checkOneOutputToTwoInputsConnections = [] - (const armnn::IConnectableLayer* const srcLayer, - const armnn::IConnectableLayer* const tgtLayer, - int expectedSrcNumInputs, - int expectedDstNumOutputs = 1) - { - BOOST_TEST(srcLayer->GetNumInputSlots() == expectedSrcNumInputs); - BOOST_TEST(srcLayer->GetNumOutputSlots() == 1); - BOOST_TEST(tgtLayer->GetNumInputSlots() == 2); - BOOST_TEST(tgtLayer->GetNumOutputSlots() == expectedDstNumOutputs); - - BOOST_TEST(srcLayer->GetOutputSlot(0).GetNumConnections() == 2); - for (unsigned int i = 0; i < srcLayer->GetOutputSlot(0).GetNumConnections(); ++i) - { - BOOST_TEST(srcLayer->GetOutputSlot(0).GetConnection(i) == &tgtLayer->GetInputSlot(i)); - BOOST_TEST(&srcLayer->GetOutputSlot(0) == tgtLayer->GetInputSlot(i).GetConnection()); - } - }; - - BOOST_TEST(AreAllLayerInputSlotsConnected(*convLayer)); - BOOST_TEST(AreAllLayerInputSlotsConnected(*fullyConnectedLayer)); - BOOST_TEST(AreAllLayerInputSlotsConnected(*poolingLayer)); - BOOST_TEST(AreAllLayerInputSlotsConnected(*activationLayer)); - BOOST_TEST(AreAllLayerInputSlotsConnected(*normalizationLayer)); - BOOST_TEST(AreAllLayerInputSlotsConnected(*softmaxLayer)); - BOOST_TEST(AreAllLayerInputSlotsConnected(*batchNormalizationLayer)); - BOOST_TEST(AreAllLayerInputSlotsConnected(*additionLayer)); - BOOST_TEST(AreAllLayerInputSlotsConnected(*multiplicationLayer)); - BOOST_TEST(AreAllLayerInputSlotsConnected(*outputLayer)); - - // Check connectivity - checkOneOutputToOneInputConnection(inputLayer, convLayer, 0); - checkOneOutputToOneInputConnection(convLayer, fullyConnectedLayer); - checkOneOutputToOneInputConnection(fullyConnectedLayer, poolingLayer); - checkOneOutputToOneInputConnection(poolingLayer, activationLayer); - checkOneOutputToOneInputConnection(activationLayer, normalizationLayer); - checkOneOutputToOneInputConnection(normalizationLayer, softmaxLayer); - checkOneOutputToOneInputConnection(softmaxLayer, batchNormalizationLayer); - checkOneOutputToTwoInputsConnections(batchNormalizationLayer, additionLayer, 1); - checkOneOutputToTwoInputsConnections(additionLayer, multiplicationLayer, 2); - checkOneOutputToOneInputConnection(multiplicationLayer, outputLayer, 2, 0); -} - -BOOST_AUTO_TEST_CASE(NetworkModification_SplitterMerger) -{ - armnn::Network net; - - // Add an input layer and an input tensor descriptor. - armnn::IConnectableLayer* inputLayer = net.AddInputLayer(0, "input layer"); - BOOST_TEST(inputLayer); - - // Add a splitter layer - armnn::ViewsDescriptor splitterDesc(2,4); - - armnn::IConnectableLayer* splitterLayer = net.AddSplitterLayer(splitterDesc, "splitter layer"); - BOOST_TEST(splitterLayer); - - inputLayer->GetOutputSlot(0).Connect(splitterLayer->GetInputSlot(0)); - - // Add a softmax layer 1 - armnn::SoftmaxDescriptor softmaxDescriptor; - armnn::IConnectableLayer* softmaxLayer1 = net.AddSoftmaxLayer(softmaxDescriptor, "softmax_1"); - BOOST_TEST(softmaxLayer1); - - splitterLayer->GetOutputSlot(0).Connect(softmaxLayer1->GetInputSlot(0)); - - // Add a softmax layer 2 - armnn::IConnectableLayer* softmaxLayer2 = net.AddSoftmaxLayer(softmaxDescriptor, "softmax_2"); - BOOST_TEST(softmaxLayer2); - - splitterLayer->GetOutputSlot(1).Connect(softmaxLayer2->GetInputSlot(0)); - - // Add a merger layer - armnn::OriginsDescriptor mergerDesc(2, 4); - - armnn::IConnectableLayer* mergerLayer = net.AddMergerLayer(mergerDesc, "merger layer"); - BOOST_TEST(mergerLayer); - - softmaxLayer1->GetOutputSlot(0).Connect(mergerLayer->GetInputSlot(0)); - softmaxLayer2->GetOutputSlot(0).Connect(mergerLayer->GetInputSlot(1)); - - // Add an output layer - armnn::IConnectableLayer* outputLayer = net.AddOutputLayer(0, "output layer"); - BOOST_TEST(outputLayer); - - mergerLayer->GetOutputSlot(0).Connect(outputLayer->GetInputSlot(0)); - - BOOST_TEST(splitterLayer->GetNumOutputSlots() == 2); - BOOST_TEST(splitterLayer->GetOutputSlot(0).GetConnection(0) == &softmaxLayer1->GetInputSlot(0)); - BOOST_TEST(&splitterLayer->GetOutputSlot(0) == softmaxLayer1->GetInputSlot(0).GetConnection()); - BOOST_TEST(splitterLayer->GetOutputSlot(1).GetConnection(0) == &softmaxLayer2->GetInputSlot(0)); - BOOST_TEST(&splitterLayer->GetOutputSlot(1) == softmaxLayer2->GetInputSlot(0).GetConnection()); - - BOOST_TEST(mergerLayer->GetNumInputSlots() == 2); - BOOST_TEST(softmaxLayer1->GetOutputSlot(0).GetConnection(0) == &mergerLayer->GetInputSlot(0)); - BOOST_TEST(&softmaxLayer1->GetOutputSlot(0) == mergerLayer->GetInputSlot(0).GetConnection()); - BOOST_TEST(softmaxLayer2->GetOutputSlot(0).GetConnection(0) == &mergerLayer->GetInputSlot(1)); - BOOST_TEST(&softmaxLayer2->GetOutputSlot(0) == mergerLayer->GetInputSlot(1).GetConnection()); -} - -BOOST_AUTO_TEST_CASE(NetworkModification_SplitterAddition) -{ - armnn::Network net; - - // Add an input layer and an input tensor descriptor. - armnn::IConnectableLayer* layer = net.AddInputLayer(0, "input layer"); - BOOST_TEST(layer); - - // Add a splitter layer - armnn::ViewsDescriptor splitterDesc(2,4); - - armnn::IConnectableLayer* const splitterLayer = net.AddSplitterLayer(splitterDesc, "splitter layer"); - BOOST_TEST(splitterLayer); - - layer->GetOutputSlot(0).Connect(splitterLayer->GetInputSlot(0)); - - // Add a softmax layer 1 - armnn::SoftmaxDescriptor softmaxDescriptor; - armnn::IConnectableLayer* const softmax1Layer = net.AddSoftmaxLayer(softmaxDescriptor, "softmax_1"); - BOOST_TEST(softmax1Layer); - - splitterLayer->GetOutputSlot(0).Connect(softmax1Layer->GetInputSlot(0)); - - // Add a softmax layer 2 - armnn::IConnectableLayer* const softmax2Layer = net.AddSoftmaxLayer(softmaxDescriptor, "softmax_2"); - BOOST_TEST(softmax2Layer); - - splitterLayer->GetOutputSlot(1).Connect(softmax2Layer->GetInputSlot(0)); - - // Add addition layer - layer = net.AddAdditionLayer("add layer"); - BOOST_TEST(layer); - - softmax1Layer->GetOutputSlot(0).Connect(layer->GetInputSlot(0)); - softmax2Layer->GetOutputSlot(0).Connect(layer->GetInputSlot(1)); - - // Add an output layer - armnn::IConnectableLayer* prevLayer = layer; - layer = net.AddOutputLayer(0, "output layer"); - - prevLayer->GetOutputSlot(0).Connect(layer->GetInputSlot(0)); - - BOOST_TEST(layer); -} - -BOOST_AUTO_TEST_CASE(NetworkModification_SplitterMultiplication) -{ - armnn::Network net; - - // Add an input layer and an input tensor descriptor. - armnn::IConnectableLayer* layer = net.AddInputLayer(0, "input layer"); - BOOST_TEST(layer); - - // Add a splitter layer - armnn::ViewsDescriptor splitterDesc(2,4); - armnn::IConnectableLayer* const splitterLayer = net.AddSplitterLayer(splitterDesc, "splitter layer"); - BOOST_TEST(splitterLayer); - - layer->GetOutputSlot(0).Connect(splitterLayer->GetInputSlot(0)); - - // Add a softmax layer 1 - armnn::SoftmaxDescriptor softmaxDescriptor; - armnn::IConnectableLayer* const softmax1Layer = net.AddSoftmaxLayer(softmaxDescriptor, "softmax_1"); - BOOST_TEST(softmax1Layer); - - splitterLayer->GetOutputSlot(0).Connect(softmax1Layer->GetInputSlot(0)); - - // Add a softmax layer 2 - armnn::IConnectableLayer* const softmax2Layer = net.AddSoftmaxLayer(softmaxDescriptor, "softmax_2"); - BOOST_TEST(softmax2Layer); - - splitterLayer->GetOutputSlot(1).Connect(softmax2Layer->GetInputSlot(0)); - - // Add multiplication layer - layer = net.AddMultiplicationLayer("multiplication layer"); - BOOST_TEST(layer); - - softmax1Layer->GetOutputSlot(0).Connect(layer->GetInputSlot(0)); - softmax2Layer->GetOutputSlot(0).Connect(layer->GetInputSlot(1)); - - // Add an output layer - armnn::IConnectableLayer* prevLayer = layer; - layer = net.AddOutputLayer(0, "output layer"); - BOOST_TEST(layer); - - prevLayer->GetOutputSlot(0).Connect(layer->GetInputSlot(0)); -} - -BOOST_AUTO_TEST_CASE(ValidateWorkloads) -{ - const armnn::TensorInfo desc({3, 5}, armnn::DataType::Float32); - - armnn::Network net; - - armnn::NormalizationDescriptor nmDesc; - armnn::ActivationDescriptor acDesc; - - // in - // | - // nm - // / | - // ac | - // \ | - // ml - // | - // sm - // | - // ot - armnn::IConnectableLayer* layer = net.AddInputLayer(0, "in"); - layer->GetOutputSlot(0).SetTensorInfo(desc); - - armnn::IConnectableLayer* const normLayer = net.AddNormalizationLayer(nmDesc, "nm"); - - layer->GetOutputSlot(0).Connect(normLayer->GetInputSlot(0)); - normLayer->GetOutputSlot(0).SetTensorInfo(desc); - - layer = net.AddActivationLayer(acDesc, "ac"); - - normLayer->GetOutputSlot(0).Connect(layer->GetInputSlot(0)); - layer->GetOutputSlot(0).SetTensorInfo(desc); - - armnn::IConnectableLayer* prevLayer = layer; - layer = net.AddMultiplicationLayer("ml"); - - prevLayer->GetOutputSlot(0).Connect(layer->GetInputSlot(0)); - normLayer->GetOutputSlot(0).Connect(layer->GetInputSlot(1)); - layer->GetOutputSlot(0).SetTensorInfo(desc); - - prevLayer = layer; - armnn::SoftmaxDescriptor softmaxDescriptor; - layer = net.AddSoftmaxLayer(softmaxDescriptor, "sm"); - - prevLayer->GetOutputSlot(0).Connect(layer->GetInputSlot(0)); - layer->GetOutputSlot(0).SetTensorInfo(desc); - - prevLayer = layer; - layer = net.AddOutputLayer(0, "ot"); - - prevLayer->GetOutputSlot(0).Connect(layer->GetInputSlot(0)); - - armnn::DeviceSpec spec; - spec.DefaultComputeDevice = armnn::Compute::CpuRef; - - armnn::IOptimizedNetworkPtr optNet = Optimize(net, spec); - static_cast<armnn::OptimizedNetwork*>(optNet.get())->GetGraph().AllocateDynamicBuffers(); - - // validate workloads - armnn::RefWorkloadFactory fact; - for (auto&& layer : static_cast<armnn::OptimizedNetwork*>(optNet.get())->GetGraph()) - { - BOOST_CHECK_NO_THROW( - layer->CreateWorkload(static_cast<armnn::OptimizedNetwork*>(optNet.get())->GetGraph(), fact)); - } -} - -BOOST_AUTO_TEST_SUITE_END() diff --git a/src/armnn/test/ObservableTest.cpp b/src/armnn/test/ObservableTest.cpp new file mode 100644 index 0000000000..6588f3469e --- /dev/null +++ b/src/armnn/test/ObservableTest.cpp @@ -0,0 +1,94 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// See LICENSE file in the project root for full license information. +// + +#include <boost/test/unit_test.hpp> + +#include "Graph.hpp" +#include "Observable.hpp" + +BOOST_AUTO_TEST_SUITE(Observable) + +BOOST_AUTO_TEST_CASE(AddedLayerObservableTest) +{ + armnn::Graph graph; + + // Create a graph observable + armnn::AddedLayerObservable layerObservable(graph); + + // Add a few layers + auto output = graph.AddLayer<armnn::OutputLayer>(0, "output"); + auto input = graph.InsertNewLayer<armnn::InputLayer>(output->GetInputSlot(0), 0, "input"); + + // Check the observable has observed the changes + std::list<armnn::Layer*> testLayers({ output, input }); + + BOOST_CHECK_EQUAL_COLLECTIONS(layerObservable.begin(), layerObservable.end(), + testLayers.begin(), testLayers.end()); +} + +BOOST_AUTO_TEST_CASE(ClearAddedLayerObservableTest) +{ + armnn::Graph graph; + + // Create a graph observable + armnn::AddedLayerObservable addedLayerObservable(graph); + + // Add a few layers + auto output = graph.AddLayer<armnn::OutputLayer>(0, "output"); + graph.InsertNewLayer<armnn::InputLayer>(output->GetInputSlot(0), 0, "input"); + + addedLayerObservable.Clear(); + + // Check the observable has observed the changes + std::list<armnn::Layer*> emptyList({}); + + BOOST_CHECK_EQUAL_COLLECTIONS(addedLayerObservable.begin(), addedLayerObservable.end(), + emptyList.begin(), emptyList.end()); +} + +BOOST_AUTO_TEST_CASE(ErasedLayerNamesObservableTest) +{ + armnn::Graph graph; + + // Create a graph observable + armnn::ErasedLayerNamesObservable erasedLayerNamesObservable(graph); + + // Add a few layers + auto output = graph.AddLayer<armnn::OutputLayer>(0, "output"); + graph.InsertNewLayer<armnn::InputLayer>(output->GetInputSlot(0), 0, "input"); + + graph.EraseLayer(output); + + // Check the observable has observed the changes + std::list<std::string> testList({"output"}); + + BOOST_CHECK_EQUAL_COLLECTIONS(erasedLayerNamesObservable.begin(), erasedLayerNamesObservable.end(), + testList.begin(), testList.end()); +} + +BOOST_AUTO_TEST_CASE(ClearErasedLayerNamesObservableTest) +{ + armnn::Graph graph; + + // Create a graph observable + armnn::ErasedLayerNamesObservable erasedLayerNamesObservable(graph); + + // Add a few layers + auto output = graph.AddLayer<armnn::OutputLayer>(0, "output"); + graph.InsertNewLayer<armnn::InputLayer>(output->GetInputSlot(0), 0, "input"); + + graph.EraseLayer(output); + + erasedLayerNamesObservable.Clear(); + + // Check the observable has observed the changes + std::list<std::string> emptyList({}); + + BOOST_CHECK_EQUAL_COLLECTIONS(erasedLayerNamesObservable.begin(), erasedLayerNamesObservable.end(), + emptyList.begin(), emptyList.end()); +} + +BOOST_AUTO_TEST_SUITE_END() + diff --git a/src/armnn/test/OpenClTimerTest.cpp b/src/armnn/test/OpenClTimerTest.cpp new file mode 100644 index 0000000000..b8dea8ebe0 --- /dev/null +++ b/src/armnn/test/OpenClTimerTest.cpp @@ -0,0 +1,137 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// See LICENSE file in the project root for full license information. +// + +#if (defined(__aarch64__)) || (defined(__x86_64__)) // disable test failing on FireFly/Armv7 + +#include "arm_compute/runtime/CL/CLScheduler.h" +#include "backends/ClContextControl.hpp" +#include "backends/ClWorkloadFactory.hpp" +#include "backends/CpuTensorHandle.hpp" +#include <boost/format.hpp> +#include <iostream> +#include "OpenClTimer.hpp" +#include "backends/test/TensorCopyUtils.hpp" +#include "TensorHelpers.hpp" +#include <boost/test/unit_test.hpp> +#include "backends/WorkloadFactory.hpp" +#include "backends/test/WorkloadTestUtils.hpp" + +using namespace armnn; + +struct OpenClFixture +{ + // Initialising ClContextControl to ensure OpenCL is loaded correctly for each test case. + // NOTE: Profiling needs to be enabled in ClContextControl to be able to obtain execution + // times from OpenClTimer. + OpenClFixture() : m_ClContextControl(nullptr, true) {} + ~OpenClFixture() {} + + ClContextControl m_ClContextControl; +}; + +BOOST_FIXTURE_TEST_SUITE(OpenClTimerBatchNorm, OpenClFixture) +using FactoryType = ClWorkloadFactory; + +BOOST_AUTO_TEST_CASE(OpenClTimerBatchNorm) +{ + ClWorkloadFactory workloadFactory; + + const unsigned int width = 2; + const unsigned int height = 3; + const unsigned int channels = 2; + const unsigned int num = 1; + int32_t qOffset = 0; + float qScale = 0.f; + + TensorInfo inputTensorInfo({num, channels, height, width}, GetDataType<float>()); + TensorInfo outputTensorInfo({num, channels, height, width}, GetDataType<float>()); + TensorInfo tensorInfo({channels}, GetDataType<float>()); + + // Set quantization parameters if the requested type is a quantized type. + if(IsQuantizedType<float>()) + { + inputTensorInfo.SetQuantizationScale(qScale); + inputTensorInfo.SetQuantizationOffset(qOffset); + outputTensorInfo.SetQuantizationScale(qScale); + outputTensorInfo.SetQuantizationOffset(qOffset); + tensorInfo.SetQuantizationScale(qScale); + tensorInfo.SetQuantizationOffset(qOffset); + } + + auto input = MakeTensor<float, 4>(inputTensorInfo, + QuantizedVector<float>(qScale, qOffset, + { + 1.f, 4.f, + 4.f, 2.f, + 1.f, 6.f, + + 1.f, 1.f, + 4.f, 1.f, + -2.f, 4.f + })); + // these values are per-channel of the input + auto mean = MakeTensor<float, 1>(tensorInfo, QuantizedVector<float>(qScale, qOffset, {3, -2})); + auto variance = MakeTensor<float, 1>(tensorInfo, QuantizedVector<float>(qScale, qOffset, {4, 9})); + auto beta = MakeTensor<float, 1>(tensorInfo, QuantizedVector<float>(qScale, qOffset, {3, 2})); + auto gamma = MakeTensor<float, 1>(tensorInfo, QuantizedVector<float>(qScale, qOffset, {2, 1})); + + std::unique_ptr<ITensorHandle> inputHandle = workloadFactory.CreateTensorHandle(inputTensorInfo); + std::unique_ptr<ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo); + + BatchNormalizationQueueDescriptor data; + WorkloadInfo info; + ScopedCpuTensorHandle meanTensor(tensorInfo); + ScopedCpuTensorHandle varianceTensor(tensorInfo); + ScopedCpuTensorHandle betaTensor(tensorInfo); + ScopedCpuTensorHandle gammaTensor(tensorInfo); + + AllocateAndCopyDataToITensorHandle(&meanTensor, &mean[0]); + AllocateAndCopyDataToITensorHandle(&varianceTensor, &variance[0]); + AllocateAndCopyDataToITensorHandle(&betaTensor, &beta[0]); + AllocateAndCopyDataToITensorHandle(&gammaTensor, &gamma[0]); + + AddInputToWorkload(data, info, inputTensorInfo, inputHandle.get()); + AddOutputToWorkload(data, info, outputTensorInfo, outputHandle.get()); + data.m_Mean = &meanTensor; + data.m_Variance = &varianceTensor; + data.m_Beta = &betaTensor; + data.m_Gamma = &gammaTensor; + data.m_Parameters.m_Eps = 0.0f; + + // for each channel: + // substract mean, divide by standard deviation (with an epsilon to avoid div by 0) + // multiply by gamma and add beta + std::unique_ptr<IWorkload> workload = workloadFactory.CreateBatchNormalization(data, info); + + inputHandle->Allocate(); + outputHandle->Allocate(); + + CopyDataToITensorHandle(inputHandle.get(), &input[0][0][0][0]); + + OpenClTimer openClTimer; + + BOOST_CHECK_EQUAL(openClTimer.GetName(), "OpenClKernelTimer"); + + //Start the timer + openClTimer.Start(); + + //Execute the workload + workload->Execute(); + + //Stop the timer + openClTimer.Stop(); + + BOOST_CHECK_EQUAL(openClTimer.GetMeasurements().size(), 1); + + BOOST_CHECK_EQUAL(openClTimer.GetMeasurements().front().m_Name, + "OpenClKernelTimer/0: batchnormalization_layer_nchw GWS[1,3,2]"); + + BOOST_CHECK(openClTimer.GetMeasurements().front().m_Value > 0); + +} + +BOOST_AUTO_TEST_SUITE_END() + +#endif //aarch64 or x86_64
\ No newline at end of file diff --git a/src/armnn/test/OptimizerTests.cpp b/src/armnn/test/OptimizerTests.cpp index da26fba76e..0c1a2619b2 100644 --- a/src/armnn/test/OptimizerTests.cpp +++ b/src/armnn/test/OptimizerTests.cpp @@ -7,6 +7,8 @@ #include "armnn/ArmNN.hpp" #include "Graph.hpp" #include "Optimizer.hpp" +#include "backends/CpuTensorHandle.hpp" +#include "FloatingPointConverter.hpp" namespace { @@ -21,7 +23,7 @@ bool CheckSequence(const armnn::Graph::ConstIterator first, const armnn::Graph:: return (first == last); } -/// Check each unary function in Us evaluates true for each correspondent layer in the sequence [first, last) +/// Checks each unary function in Us evaluates true for each correspondent layer in the sequence [first, last). template <typename U, typename... Us> bool CheckSequence(const armnn::Graph::ConstIterator first, const armnn::Graph::ConstIterator last, @@ -30,11 +32,149 @@ bool CheckSequence(const armnn::Graph::ConstIterator first, { return u(*first) && CheckSequence(std::next(first), last, us...); } + +template <typename LayerT> +bool CheckRelatedLayers(armnn::Graph& graph, const std::list<std::string>& testRelatedLayers) +{ + for (auto& layer : graph) + { + if (layer->GetType() == armnn::LayerEnumOf<LayerT>()) + { + auto& relatedLayers = layer->GetRelatedLayerNames(); + if(!std::equal(relatedLayers.begin(), relatedLayers.end(), + testRelatedLayers.begin(), testRelatedLayers.end())) + { + return false; + } + } + } + + return true; +} + +// connects two layers +using namespace armnn; +void Connect(Layer* from, Layer* to, const TensorInfo& tensorInfo, unsigned int fromIndex = 0, unsigned int toIndex = 0) +{ + from->GetOutputSlot(fromIndex).Connect(to->GetInputSlot(toIndex)); + from->GetOutputHandler(fromIndex).SetTensorInfo(tensorInfo); +} + +void CreateLSTMLayerHelper(Graph &graph, bool CifgEnabled) +{ + LstmDescriptor layerDesc; + layerDesc.m_ActivationFunc = 4; + layerDesc.m_ClippingThresCell = 0.2f; + layerDesc.m_ClippingThresProj = 0.4f; + layerDesc.m_CifgEnabled = CifgEnabled; + layerDesc.m_PeepholeEnabled = false; + layerDesc.m_ProjectionEnabled = false; + + LstmLayer* const layer = graph.AddLayer<LstmLayer>(layerDesc, "layer"); + unsigned int batchSize = 3; + unsigned int inputSize = 2; + unsigned int numUnits = 4; + unsigned int outputSize = 4; + + layer->m_BasicParameters.m_InputToForgetWeights = std::make_unique<ScopedCpuTensorHandle> + (TensorInfo({ numUnits, inputSize }, DataType::Float32)); + layer->m_BasicParameters.m_InputToCellWeights = std::make_unique<ScopedCpuTensorHandle> + (TensorInfo({ numUnits, inputSize }, DataType::Float32)); + layer->m_BasicParameters.m_InputToOutputWeights = std::make_unique<ScopedCpuTensorHandle> + (TensorInfo({ numUnits, inputSize }, DataType::Float32)); + layer->m_BasicParameters.m_RecurrentToForgetWeights = std::make_unique<ScopedCpuTensorHandle> + (TensorInfo({ numUnits, outputSize }, DataType::Float32)); + layer->m_BasicParameters.m_RecurrentToCellWeights = std::make_unique<ScopedCpuTensorHandle> + (TensorInfo({ numUnits, outputSize }, DataType::Float32)); + layer->m_BasicParameters.m_RecurrentToOutputWeights = std::make_unique<ScopedCpuTensorHandle> + (TensorInfo({ numUnits, outputSize }, DataType::Float32)); + layer->m_BasicParameters.m_ForgetGateBias = std::make_unique<ScopedCpuTensorHandle> + (TensorInfo({ numUnits }, DataType::Float32)); + layer->m_BasicParameters.m_CellBias = std::make_unique<ScopedCpuTensorHandle> + (TensorInfo({ numUnits }, DataType::Float32)); + layer->m_BasicParameters.m_OutputGateBias = std::make_unique<ScopedCpuTensorHandle> + (TensorInfo({ numUnits }, DataType::Float32)); + + layer->m_BasicParameters.m_InputToForgetWeights->Allocate(); + layer->m_BasicParameters.m_InputToCellWeights->Allocate(); + layer->m_BasicParameters.m_InputToOutputWeights->Allocate(); + layer->m_BasicParameters.m_RecurrentToForgetWeights->Allocate(); + layer->m_BasicParameters.m_RecurrentToCellWeights->Allocate(); + layer->m_BasicParameters.m_RecurrentToOutputWeights->Allocate(); + layer->m_BasicParameters.m_ForgetGateBias->Allocate(); + layer->m_BasicParameters.m_CellBias->Allocate(); + layer->m_BasicParameters.m_OutputGateBias->Allocate(); + + if (!layerDesc.m_CifgEnabled) + { + layer->m_CifgParameters.m_InputToInputWeights = std::make_unique<ScopedCpuTensorHandle> + (TensorInfo({ numUnits, inputSize }, DataType::Float32)); + layer->m_CifgParameters.m_RecurrentToInputWeights = std::make_unique<ScopedCpuTensorHandle> + (TensorInfo({ numUnits, outputSize }, DataType::Float32)); + layer->m_CifgParameters.m_CellToInputWeights = std::make_unique<ScopedCpuTensorHandle> + (TensorInfo({ numUnits }, DataType::Float32)); + layer->m_CifgParameters.m_InputGateBias = std::make_unique<ScopedCpuTensorHandle> + (TensorInfo({ numUnits }, DataType::Float32)); + layer->m_CifgParameters.m_InputToInputWeights->Allocate(); + layer->m_CifgParameters.m_RecurrentToInputWeights->Allocate(); + layer->m_CifgParameters.m_CellToInputWeights->Allocate(); + layer->m_CifgParameters.m_InputGateBias->Allocate(); + } + + if (layerDesc.m_ProjectionEnabled) + { + layer->m_ProjectionParameters.m_ProjectionWeights = std::make_unique<ScopedCpuTensorHandle> + (TensorInfo({ outputSize, numUnits }, DataType::Float32)); + layer->m_ProjectionParameters.m_ProjectionBias = std::make_unique<ScopedCpuTensorHandle> + (TensorInfo({ outputSize }, DataType::Float32)); + layer->m_ProjectionParameters.m_ProjectionWeights->Allocate(); + layer->m_ProjectionParameters.m_ProjectionBias->Allocate(); + } + + if (layerDesc.m_PeepholeEnabled) + { + layer->m_PeepholeParameters.m_CellToForgetWeights = std::make_unique<ScopedCpuTensorHandle> + (TensorInfo({ numUnits }, DataType::Float32)); + layer->m_PeepholeParameters.m_CellToOutputWeights = std::make_unique<ScopedCpuTensorHandle> + (TensorInfo({ numUnits }, DataType::Float32)); + layer->m_PeepholeParameters.m_CellToForgetWeights->Allocate(); + layer->m_PeepholeParameters.m_CellToOutputWeights->Allocate(); + } + + // create input and output layers + Layer* const input = graph.AddLayer<InputLayer>(0, "input"); + Layer* const outputStateIn = graph.AddLayer<InputLayer>(1, "outputStateIn"); + Layer* const cellStateIn = graph.AddLayer<InputLayer>(2, "cellStateIn"); + Layer* const scratchBuffer = graph.AddLayer<OutputLayer>(0, "scratchBuffer"); + Layer* const outputStateOut = graph.AddLayer<OutputLayer>(1, "outputStateOut"); + Layer* const cellStateOut = graph.AddLayer<OutputLayer>(2, "cellStateOut"); + Layer* const output = graph.AddLayer<OutputLayer>(3, "output"); + + // connect up + armnn::TensorInfo lstmTensorInfo1({ batchSize, inputSize }, DataType::Float32); + armnn::TensorInfo lstmTensorInfo2({ batchSize, numUnits}, DataType::Float32); + armnn::TensorInfo lstmTensorInfo3({ batchSize, outputSize }, DataType::Float32); + armnn::TensorInfo lstmTensorInfoScratchBuff({ batchSize, numUnits*3 }, DataType::Float32); + if (layerDesc.m_CifgEnabled) + { + lstmTensorInfoScratchBuff.SetShape({ batchSize, numUnits*4 }); + } + + Connect(input, layer, lstmTensorInfo1, 0, 0); + Connect(cellStateIn, layer, lstmTensorInfo2, 0, 1); + Connect(outputStateIn, layer, lstmTensorInfo3, 0, 2); + Connect(layer, scratchBuffer, lstmTensorInfoScratchBuff, 0, 0); + Connect(layer, outputStateOut, lstmTensorInfo3, 1, 0); + Connect(layer, cellStateOut, lstmTensorInfo2, 2, 0); + Connect(layer, output, lstmTensorInfo3, 3, 0); +} + } BOOST_AUTO_TEST_SUITE(Optimizer) +using namespace armnn::optimizations; -BOOST_AUTO_TEST_CASE(OptimizeInversePermutes) +BOOST_AUTO_TEST_CASE(OptimizeInversePermutesTest) { armnn::Graph graph; @@ -42,7 +182,7 @@ BOOST_AUTO_TEST_CASE(OptimizeInversePermutes) graph.InsertNewLayer<armnn::InputLayer>(output->GetInputSlot(0), 0, "input"); - // Insert two permutes, one the inverse of the other + // Inserts two permutes, one the inverse of the other. graph.InsertNewLayer<armnn::PermuteLayer>(output->GetInputSlot(0), armnn::PermuteDescriptor({0, 2, 3, 1}), "perm0231"); @@ -57,16 +197,38 @@ BOOST_AUTO_TEST_CASE(OptimizeInversePermutes) &IsLayerOfType<armnn::PermuteLayer>, &IsLayerOfType<armnn::OutputLayer>)); - armnn::Optimizer::Optimize(graph); + armnn::Optimizer::Pass(graph, armnn::MakeOptimizations(OptimizeInversePermutes())); - // The permutes are removed + // The permutes are removed. BOOST_TEST(CheckSequence(graph.cbegin(), graph.cend(), &IsLayerOfType<armnn::InputLayer>, &IsLayerOfType<armnn::OutputLayer>)); } -BOOST_AUTO_TEST_CASE(MovePermuteUp) +BOOST_AUTO_TEST_CASE(LSTMValidateTensorShapesFromInputsCIFGDisabledTest) +{ + Graph graph; + + //Helper function creates graph containing LSTM layer with required input and output layers + CreateLSTMLayerHelper(graph, false); + + //This function used to call ValidateShapesFromInputs(); + BOOST_CHECK_NO_THROW(graph.InferTensorInfos()); +} + +BOOST_AUTO_TEST_CASE(LSTMValidateTensorShapesFromInputsCIFGEnabledTest) +{ + Graph graph; + + //Helper function creates graph containing LSTM layer with required input and output layers + CreateLSTMLayerHelper(graph, true); + + //This function used to call ValidateShapesFromInputs(); + BOOST_CHECK_NO_THROW(graph.InferTensorInfos()); +} + +BOOST_AUTO_TEST_CASE(MovePermuteUpTest) { const armnn::TensorInfo info({ 1, 5, 2, 3 }, armnn::DataType::Float32); const armnn::TensorInfo permuted({ 1, 3, 5, 2 }, armnn::DataType::Float32); @@ -77,12 +239,16 @@ BOOST_AUTO_TEST_CASE(MovePermuteUp) armnn::Layer* head = graph.AddLayer<armnn::OutputLayer>(0, "output"); + std::string permuteLayerName = "original_permute"; + // Insert permute head = graph.InsertNewLayer<armnn::PermuteLayer>(head->GetInputSlot(0), - armnn::PermuteDescriptor({ 0, 2, 3, 1 }), ""); + armnn::PermuteDescriptor({ 0, 2, 3, 1 }), + permuteLayerName.c_str()); + head->GetOutputHandler().SetTensorInfo(permuted); - // Insert layers that don't care about data format + // Inserts layers that don't care about data format. head = graph.InsertNewLayer<armnn::ActivationLayer>(head->GetInputSlot(0), armnn::ActivationDescriptor{}, ""); head->GetOutputHandler().SetTensorInfo(info); @@ -90,7 +256,7 @@ BOOST_AUTO_TEST_CASE(MovePermuteUp) head = graph.InsertNewLayer<armnn::AdditionLayer>(head->GetInputSlot(0), ""); head->GetOutputHandler().SetTensorInfo(info); - // Insert input for 2nd input of Addition + // Inserts input for 2nd input of Addition. graph.InsertNewLayer<armnn::InputLayer>(head->GetInputSlot(1), inputId++, "") ->GetOutputHandler().SetTensorInfo(info); @@ -107,11 +273,11 @@ BOOST_AUTO_TEST_CASE(MovePermuteUp) head = graph.InsertNewLayer<armnn::MultiplicationLayer>(head->GetInputSlot(0), ""); head->GetOutputHandler().SetTensorInfo(info); - // Insert input for 2nd input of Multiplication + // Inserts input for 2nd input of Multiplication. graph.InsertNewLayer<armnn::InputLayer>(head->GetInputSlot(1), inputId++, "") ->GetOutputHandler().SetTensorInfo(info); - // Insert input + // Inserts input. graph.InsertNewLayer<armnn::InputLayer>(head->GetInputSlot(0), inputId++, "") ->GetOutputHandler().SetTensorInfo(info); @@ -129,9 +295,9 @@ BOOST_AUTO_TEST_CASE(MovePermuteUp) &IsLayerOfType<armnn::PermuteLayer>, &IsLayerOfType<armnn::OutputLayer>)); - armnn::Optimizer::Optimize(graph); + armnn::Optimizer::Pass(graph, armnn::MakeOptimizations(MovePermuteUp())); - // The permute is moved to the top. New permutes for layers with multiple inputs + // The permute is moved to the top. New permutes for layers with multiple inputs. BOOST_TEST(CheckSequence(graph.cbegin(), graph.cend(), &IsLayerOfType<armnn::InputLayer>, @@ -147,12 +313,18 @@ BOOST_AUTO_TEST_CASE(MovePermuteUp) &IsLayerOfType<armnn::AdditionLayer>, &IsLayerOfType<armnn::ActivationLayer>, &IsLayerOfType<armnn::OutputLayer>)); + + std::list<std::string> testRelatedLayers = { permuteLayerName }; + + BOOST_TEST(CheckRelatedLayers<armnn::PermuteLayer>(graph, testRelatedLayers)); } -BOOST_AUTO_TEST_CASE(PermuteAsReshape) +BOOST_AUTO_TEST_CASE(PermuteAsReshapeTest) { armnn::Graph graph; + std::string permuteLayerName = "permute"; + const armnn::TensorInfo infoIn({ 1, 2, 3, 1 }, armnn::DataType::Float32); const armnn::TensorInfo infoOut({ 1, 1, 2, 3 }, armnn::DataType::Float32); @@ -161,9 +333,9 @@ BOOST_AUTO_TEST_CASE(PermuteAsReshape) graph.InsertNewLayer<armnn::InputLayer>(output->GetInputSlot(0), 0, "input") ->GetOutputHandler().SetTensorInfo(infoIn); - // Insert permute + // Inserts permute. graph.InsertNewLayer<armnn::PermuteLayer>(output->GetInputSlot(0), - armnn::PermuteDescriptor({ 0, 2, 3, 1 }), "") + armnn::PermuteDescriptor({ 0, 2, 3, 1 }), permuteLayerName.c_str()) ->GetOutputHandler().SetTensorInfo(infoOut); BOOST_TEST(CheckSequence(graph.cbegin(), @@ -172,7 +344,7 @@ BOOST_AUTO_TEST_CASE(PermuteAsReshape) &IsLayerOfType<armnn::PermuteLayer>, &IsLayerOfType<armnn::OutputLayer>)); - armnn::Optimizer::Optimize(graph); + armnn::Optimizer::Pass(graph, armnn::MakeOptimizations(PermuteAsReshape())); // The permute is replaced by an equivalent reshape. @@ -189,9 +361,13 @@ BOOST_AUTO_TEST_CASE(PermuteAsReshape) &IsLayerOfType<armnn::InputLayer>, checkReshape, &IsLayerOfType<armnn::OutputLayer>)); + + + std::list<std::string> testRelatedLayers = { permuteLayerName }; + BOOST_TEST(CheckRelatedLayers<armnn::ReshapeLayer>(graph, testRelatedLayers)); } -BOOST_AUTO_TEST_CASE(OptimizeConsecutiveReshapes) +BOOST_AUTO_TEST_CASE(OptimizeConsecutiveReshapesTest) { armnn::Graph graph; @@ -203,16 +379,19 @@ BOOST_AUTO_TEST_CASE(OptimizeConsecutiveReshapes) input->GetOutputHandler().SetTensorInfo(info0); { - // Insert two reshapes + // Inserts two reshapes. const armnn::TensorInfo info1({1, 30, 1, 1}, armnn::DataType::Float32); const armnn::TensorInfo info2({1, 2, 1, 15}, armnn::DataType::Float32); + std::string reshape1Name = "reshape1"; + std::string reshape2Name = "reshape2"; + auto reshape1 = graph.InsertNewLayer<armnn::ReshapeLayer>(output->GetInputSlot(0), armnn::ReshapeDescriptor{ info1.GetShape() }, - "reshape1"); + reshape1Name.c_str()); auto reshape2 = graph.InsertNewLayer<armnn::ReshapeLayer>(output->GetInputSlot(0), armnn::ReshapeDescriptor{ info2.GetShape() }, - "reshape2"); + reshape2Name.c_str()); reshape1->GetOutputHandler().SetTensorInfo(info1); reshape2->GetOutputHandler().SetTensorInfo(info2); @@ -224,7 +403,7 @@ BOOST_AUTO_TEST_CASE(OptimizeConsecutiveReshapes) &IsLayerOfType<armnn::ReshapeLayer>, &IsLayerOfType<armnn::OutputLayer>)); - armnn::Optimizer::Optimize(graph); + armnn::Optimizer::Pass(graph, armnn::MakeOptimizations(OptimizeConsecutiveReshapes())); auto checkReshape = [&info2](const armnn::Layer* const layer) -> bool { @@ -234,25 +413,30 @@ BOOST_AUTO_TEST_CASE(OptimizeConsecutiveReshapes) (reshapeLayer->GetOutputHandler().GetTensorInfo().GetShape() == info2.GetShape()); }; - // The two reshapes are replaced by a single equivalent reshape + // The two reshapes are replaced by a single equivalent reshape. BOOST_TEST(CheckSequence(graph.cbegin(), graph.cend(), &IsLayerOfType<armnn::InputLayer>, checkReshape, &IsLayerOfType<armnn::OutputLayer>)); + + // Check the new reshape layer has the other two reshapes as related layers + std::list<std::string> testRelatedLayers = { reshape2Name, reshape1Name }; + + BOOST_TEST(CheckRelatedLayers<armnn::ReshapeLayer>(graph, testRelatedLayers)); } { - // Insert a reshape to the input shape + // Inserts a reshape to the input shape. auto reshapeToIn = graph.InsertNewLayer<armnn::ReshapeLayer>(output->GetInputSlot(0), armnn::ReshapeDescriptor{ info0.GetShape() }, "reshapeToIn"); reshapeToIn->GetOutputHandler().SetTensorInfo(info0); - armnn::Optimizer::Optimize(graph); + armnn::Optimizer::Pass(graph, armnn::MakeOptimizations(OptimizeConsecutiveReshapes())); - // The two reshapes are removed + // The two reshapes are removed. BOOST_TEST(CheckSequence(graph.cbegin(), graph.cend(), &IsLayerOfType<armnn::InputLayer>, @@ -260,7 +444,7 @@ BOOST_AUTO_TEST_CASE(OptimizeConsecutiveReshapes) } } -BOOST_AUTO_TEST_CASE(SquashEqualSiblings) +BOOST_AUTO_TEST_CASE(SquashEqualSiblingsTest) { armnn::Graph graph; @@ -272,7 +456,7 @@ BOOST_AUTO_TEST_CASE(SquashEqualSiblings) auto input = graph.AddLayer<armnn::InputLayer>(0, "input"); input->GetOutputSlot().SetTensorInfo(info); - // Insert equal permutes, equal reshapes and something else + // Inserts equal permutes, equal reshapes and something else. const armnn::PermuteDescriptor permDesc({ 0, 2, 3, 1 }); const armnn::ReshapeDescriptor reshapeDesc{ { 1, 3, 1, 5 } }; @@ -314,7 +498,8 @@ BOOST_AUTO_TEST_CASE(SquashEqualSiblings) &IsLayerOfType<armnn::OutputLayer>, &IsLayerOfType<armnn::OutputLayer>)); - armnn::Optimizer::Optimize(graph); + armnn::Optimizer::Pass(graph, armnn::MakeOptimizations(SquashEqualPermuteSiblings(), + SquashEqualReshapeSiblings())); // The permutes and reshapes are squashed. @@ -331,4 +516,259 @@ BOOST_AUTO_TEST_CASE(SquashEqualSiblings) &IsLayerOfType<armnn::OutputLayer>)); } +BOOST_AUTO_TEST_CASE(ConvertConstantsHalfToFloatTest) +{ + armnn::Graph graph; + + const armnn::TensorInfo info({ 1,1,1,2 }, armnn::DataType::Float32); + + // Create the half precision input data + unsigned int dims[] = { 4,1,1,1 }; + std::vector<float> convWeightsData{1.f, 2.f, 3.f, 4.f}; + std::vector<uint16_t> halfWeights(4); + armnnUtils::FloatingPointConverter::ConvertFloat32To16(convWeightsData.data(), + convWeightsData.size(), + halfWeights.data()); + armnn::ConstTensor weights(armnn::TensorInfo(4, dims, armnn::DataType::Float16), halfWeights); + + //Create the simple test network + auto input = graph.AddLayer<armnn::InputLayer>(0, "input"); + input->GetOutputSlot().SetTensorInfo(info); + + auto fc = graph.AddLayer<armnn::FullyConnectedLayer>(armnn::FullyConnectedDescriptor(), "fc"); + fc->m_Weight = std::make_unique<armnn::ScopedCpuTensorHandle>(weights); + fc->GetOutputSlot().SetTensorInfo(info); + + auto output = graph.AddLayer<armnn::OutputLayer>(1, "output"); + + //Connect up the layers + input->GetOutputSlot().Connect(fc->GetInputSlot(0)); + fc->GetOutputSlot().Connect(output->GetInputSlot(0)); + + //Test the tensor info is correct. + BOOST_CHECK(fc->m_Weight->GetTensorInfo().GetDataType() == armnn::DataType::Float16); + + // Run the optimizer + armnn::Optimizer::Pass(graph, armnn::MakeOptimizations(ConvertConstantsHalfToFloat())); + + //Test the tensor info is correct. + BOOST_CHECK(fc->m_Weight->GetTensorInfo().GetDataType() == armnn::DataType::Float32); + + // Now test the data matches float32 data + float* data = fc->m_Weight->GetTensor<float>(); + BOOST_CHECK(1.0f == data[0]); + BOOST_CHECK(2.0f == data[1]); + BOOST_CHECK(3.0f == data[2]); + BOOST_CHECK(4.0f == data[3]); +} + +BOOST_AUTO_TEST_CASE(ConvertConstantsFloatToHalfTest) +{ + armnn::Graph graph; + + const armnn::TensorInfo info({ 1, 1, 1, 2 }, armnn::DataType::Float16); + + // Create const tensor from fp32 data + unsigned int dims[] = { 4, 1, 1, 1 }; + std::vector<float> floatWeights{ 1.0f, 2.0f, 3.0f, 4.0f }; + armnn::ConstTensor weights(armnn::TensorInfo(4, dims, armnn::DataType::Float32), floatWeights); + + // Create simple test network + auto input = graph.AddLayer<armnn::InputLayer>(0, "input"); + input->GetOutputSlot().SetTensorInfo(info); + + auto fc = graph.AddLayer<armnn::FullyConnectedLayer>(armnn::FullyConnectedDescriptor(), "fc"); + fc->m_Weight = std::make_unique<armnn::ScopedCpuTensorHandle>(weights); + fc->GetOutputSlot().SetTensorInfo(info); + + auto output = graph.AddLayer<armnn::OutputLayer>(1, "output"); + + // Connect up the layers + input->GetOutputSlot().Connect(fc->GetInputSlot(0)); + fc->GetOutputSlot().Connect(output->GetInputSlot(0)); + + // Check tensor data type before conversion + BOOST_CHECK(fc->m_Weight->GetTensorInfo().GetDataType() == armnn::DataType::Float32); + + // Run the optimizer + armnn::Optimizer::Pass(graph, armnn::MakeOptimizations(ConvertConstantsFloatToHalf())); + + // Check tensor data type after conversion + BOOST_CHECK(fc->m_Weight->GetTensorInfo().GetDataType() == armnn::DataType::Float16); + + // Check whether data matches expected fp16 data + Half* data = fc->m_Weight->GetTensor<Half>(); + BOOST_CHECK(data[0] == Half(1.0f)); + BOOST_CHECK(data[1] == Half(2.0f)); + BOOST_CHECK(data[2] == Half(3.0f)); + BOOST_CHECK(data[3] == Half(4.0f)); +} + +BOOST_AUTO_TEST_CASE(OptimizeInverseConversionsTest) +{ + armnn::Graph graph; + + auto output = graph.AddLayer<armnn::OutputLayer>(0, "output"); + + graph.InsertNewLayer<armnn::InputLayer>(output->GetInputSlot(0), 0, "input"); + + // Fp32ToFp16 conversion followed by an inverse Fp16ToFp32 conversion + graph.InsertNewLayer<armnn::ConvertFp32ToFp16Layer>(output->GetInputSlot(0), "convert1"); + graph.InsertNewLayer<armnn::ConvertFp16ToFp32Layer>(output->GetInputSlot(0), "convert2"); + + graph.InsertNewLayer<armnn::Convolution2dLayer>(output->GetInputSlot(0), Convolution2dDescriptor(), "conv"); + + // Fp16ToFp32 conversion followed by an inverse Fp32ToFp16 conversion + graph.InsertNewLayer<armnn::ConvertFp16ToFp32Layer>(output->GetInputSlot(0), "convert3"); + graph.InsertNewLayer<armnn::ConvertFp32ToFp16Layer>(output->GetInputSlot(0), "convert4"); + + BOOST_TEST(CheckSequence(graph.cbegin(), + graph.cend(), + &IsLayerOfType<armnn::InputLayer>, + &IsLayerOfType<armnn::ConvertFp32ToFp16Layer>, + &IsLayerOfType<armnn::ConvertFp16ToFp32Layer>, + &IsLayerOfType<armnn::Convolution2dLayer>, + &IsLayerOfType<armnn::ConvertFp16ToFp32Layer>, + &IsLayerOfType<armnn::ConvertFp32ToFp16Layer>, + &IsLayerOfType<armnn::OutputLayer>)); + + armnn::Optimizer::Pass(graph, armnn::MakeOptimizations(OptimizeInverseConversionsFp16(), + OptimizeInverseConversionsFp32())); + + // Check that all consecutive inverse conversions are removed + BOOST_TEST(CheckSequence(graph.cbegin(), + graph.cend(), + &IsLayerOfType<armnn::InputLayer>, + &IsLayerOfType<armnn::Convolution2dLayer>, + &IsLayerOfType<armnn::OutputLayer>)); +} + +BOOST_AUTO_TEST_CASE(InsertConvertersTest) +{ + const armnn::TensorInfo info({ 1, 5, 2, 3 }, armnn::DataType::Float16); + + armnn::Graph graph; + + armnn::LayerBindingId inputId = 0; + + armnn::Layer* head = graph.AddLayer<armnn::OutputLayer>(0, "output"); + + head = graph.InsertNewLayer<armnn::AdditionLayer>(head->GetInputSlot(0), ""); + head->GetOutputHandler().SetTensorInfo(info); + + graph.InsertNewLayer<armnn::InputLayer>(head->GetInputSlot(1), inputId++, "") + ->GetOutputHandler().SetTensorInfo(info); + + head = graph.InsertNewLayer<armnn::FloorLayer>(head->GetInputSlot(0), ""); + head->GetOutputHandler().SetTensorInfo(info); + + head = graph.InsertNewLayer<armnn::MemCopyLayer>(head->GetInputSlot(0), ""); + head->GetOutputHandler().SetTensorInfo(info); + + graph.InsertNewLayer<armnn::InputLayer>(head->GetInputSlot(0), inputId++, "") + ->GetOutputHandler().SetTensorInfo(info); + + // Check graph layer sequence before inserting convert layers + BOOST_TEST(CheckSequence(graph.cbegin(), + graph.cend(), + &IsLayerOfType<armnn::InputLayer>, + &IsLayerOfType<armnn::InputLayer>, + &IsLayerOfType<armnn::MemCopyLayer>, + &IsLayerOfType<armnn::FloorLayer>, + &IsLayerOfType<armnn::AdditionLayer>, + &IsLayerOfType<armnn::OutputLayer>)); + + // Check layers have Float16 DataType + for (auto& layer : graph) + { + if(layer->GetType()==LayerType::Floor || layer->GetType() == LayerType::Addition) + { + BOOST_ASSERT(layer->GetOutputSlot(0).GetTensorInfo().GetDataType() == DataType::Float16); + BOOST_ASSERT(layer->GetDataType() == DataType::Float16); + } + } + + // Insert convert layers either side of unsupported layer + for (auto& layer : graph) + { + if(layer->GetType()==LayerType::Floor || layer->GetType() == LayerType::Addition) + { + InsertConvertFp16ToFp32LayersBefore(graph, *layer); + InsertConvertFp32ToFp16LayersAfter(graph, *layer); + } + } + + // Check layers have correct DataType after inserting convert layers + for (auto& layer : graph) + { + if (layer->GetType()==LayerType::Floor || layer->GetType() == LayerType::Addition) + { + BOOST_ASSERT(layer->GetOutputSlot(0).GetTensorInfo().GetDataType() == DataType::Float32); + BOOST_ASSERT(layer->GetDataType() == DataType::Float32); + } + else if (layer->GetType() == LayerType::ConvertFp16ToFp32) + { + BOOST_ASSERT(layer->GetOutputSlot(0).GetTensorInfo().GetDataType() == DataType::Float32); + BOOST_ASSERT(layer->GetDataType() == DataType::Float16); + } + else if (layer->GetType() == LayerType::ConvertFp32ToFp16) + { + BOOST_ASSERT(layer->GetOutputSlot(0).GetTensorInfo().GetDataType() == DataType::Float16); + BOOST_ASSERT(layer->GetDataType() == DataType::Float32); + } + } + + // Check sequence of layers after inserting convert layers + BOOST_TEST(CheckSequence(graph.cbegin(), + graph.cend(), + &IsLayerOfType<armnn::InputLayer>, + &IsLayerOfType<armnn::InputLayer>, + &IsLayerOfType<armnn::ConvertFp16ToFp32Layer>, + &IsLayerOfType<armnn::MemCopyLayer>, + &IsLayerOfType<armnn::ConvertFp16ToFp32Layer>, + &IsLayerOfType<armnn::FloorLayer>, + &IsLayerOfType<armnn::ConvertFp32ToFp16Layer>, + &IsLayerOfType<armnn::ConvertFp16ToFp32Layer>, + &IsLayerOfType<armnn::AdditionLayer>, + &IsLayerOfType<armnn::ConvertFp32ToFp16Layer>, + &IsLayerOfType<armnn::OutputLayer>)); +} + +BOOST_AUTO_TEST_CASE(Fp32NetworkToFp16OptimizationTest) +{ + armnn::Graph graph; + + const armnn::TensorInfo infoFP32({ 2,2,1,3 }, armnn::DataType::Float32); + + // Create the simple test network + auto input = graph.AddLayer<armnn::InputLayer>(0, "input"); + input->GetOutputSlot().SetTensorInfo(infoFP32); + + auto floor = graph.AddLayer<armnn::FloorLayer>("floor"); + floor->GetOutputSlot().SetTensorInfo(infoFP32); + + auto output = graph.AddLayer<armnn::OutputLayer>(1, "output"); + + // Connect up the layers + input->GetOutputSlot().Connect(floor->GetInputSlot(0)); + floor->GetOutputSlot().Connect(output->GetInputSlot(0)); + + BOOST_TEST(CheckSequence(graph.cbegin(), + graph.cend(), + &IsLayerOfType<armnn::InputLayer>, + &IsLayerOfType<armnn::FloorLayer>, + &IsLayerOfType<armnn::OutputLayer>)); + + // Run the optimizer + armnn::Optimizer::Pass(graph, armnn::MakeOptimizations(Fp32NetworkToFp16Converter())); + + BOOST_TEST(CheckSequence(graph.cbegin(), + graph.cend(), + &IsLayerOfType<armnn::InputLayer>, + &IsLayerOfType<armnn::ConvertFp32ToFp16Layer>, + &IsLayerOfType<armnn::FloorLayer>, + &IsLayerOfType<armnn::ConvertFp16ToFp32Layer>, + &IsLayerOfType<armnn::OutputLayer>)); +} + BOOST_AUTO_TEST_SUITE_END() diff --git a/src/armnn/test/ProfilerTests.cpp b/src/armnn/test/ProfilerTests.cpp new file mode 100644 index 0000000000..4450c5a08e --- /dev/null +++ b/src/armnn/test/ProfilerTests.cpp @@ -0,0 +1,235 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// See LICENSE file in the project root for full license information. +// + +#include <boost/test/unit_test.hpp> +#include <boost/test/output_test_stream.hpp> +#include <boost/algorithm/string.hpp> + +#include <memory> +#include <thread> + +#include <armnn/TypesUtils.hpp> +#include <Profiling.hpp> + +namespace armnn +{ + +size_t GetProfilerEventSequenceSize(armnn::Profiler* profiler) +{ + if (!profiler) + { + return static_cast<size_t>(-1); + } + + return profiler->m_EventSequence.size(); +} +} // namespace armnn + +namespace +{ + +void RegisterUnregisterProfilerSingleThreadImpl() +{ + // Important! Regular assertions must be used in this function for testing (rather than + // BOOST_TEST macros) otherwise multi-threading tests would randomly fail. + + // Get a reference to the profiler manager. + armnn::ProfilerManager& profilerManager = armnn::ProfilerManager::GetInstance(); + + // Check that there's no profiler registered for this thread. + assert(!profilerManager.GetProfiler()); + + // Create and register a profiler for this thread. + std::unique_ptr<armnn::Profiler> profiler = std::make_unique<armnn::Profiler>(); + profilerManager.RegisterProfiler(profiler.get()); + + // Check that on a single thread we get the same profiler we registered. + assert(profiler.get() == profilerManager.GetProfiler()); + + // Destroy the profiler. + profiler.reset(); + + // Check that the profiler has been un-registered for this thread. + assert(!profilerManager.GetProfiler()); +} + +} // namespace + +BOOST_AUTO_TEST_SUITE(Profiler) + +BOOST_AUTO_TEST_CASE(EnableDisableProfiling) +{ + std::unique_ptr<armnn::Profiler> profiler = std::make_unique<armnn::Profiler>(); + + // Check that profiling is disabled by default. + BOOST_TEST(!profiler->IsProfilingEnabled()); + + // Enable profiling. + profiler->EnableProfiling(true); + + // Check that profiling is enabled. + BOOST_TEST(profiler->IsProfilingEnabled()); + + // Disable profiling. + profiler->EnableProfiling(false); + + // Check that profiling is disabled. + BOOST_TEST(!profiler->IsProfilingEnabled()); +} + +BOOST_AUTO_TEST_CASE(RegisterUnregisterProfilerSingleThread) +{ + RegisterUnregisterProfilerSingleThreadImpl(); +} + +BOOST_AUTO_TEST_CASE(RegisterUnregisterProfilerMultipleThreads) +{ + std::thread thread1([]() { RegisterUnregisterProfilerSingleThreadImpl(); }); + std::thread thread2([]() { RegisterUnregisterProfilerSingleThreadImpl(); }); + std::thread thread3([]() { RegisterUnregisterProfilerSingleThreadImpl(); }); + + thread1.join(); + thread2.join(); + thread3.join(); +} + +BOOST_AUTO_TEST_CASE(ProfilingMacros) +{ + // Get a reference to the profiler manager. + armnn::ProfilerManager& profilerManager = armnn::ProfilerManager::GetInstance(); + + { // --- No profiler --- + + // Check that there's no profiler registered for this thread. + BOOST_TEST(!profilerManager.GetProfiler()); + + // Test scoped event. + { ARMNN_SCOPED_PROFILING_EVENT(armnn::Compute::CpuAcc, "test"); } + + // Check that we still cannot get a profiler for this thread. + BOOST_TEST(!profilerManager.GetProfiler()); + } + + // Create and register a profiler for this thread. + std::unique_ptr<armnn::Profiler> profiler = std::make_unique<armnn::Profiler>(); + profilerManager.RegisterProfiler(profiler.get()); + + { // --- Profiler, but profiling disabled --- + + // Get current event sequence size. + size_t eventSequenceSizeBefore = armnn::GetProfilerEventSequenceSize(profiler.get()); + + // Test scoped macro. + { ARMNN_SCOPED_PROFILING_EVENT(armnn::Compute::CpuAcc, "test"); } + + // Check that no profiling event has been added to the sequence. + size_t eventSequenceSizeAfter = armnn::GetProfilerEventSequenceSize(profiler.get()); + BOOST_TEST(eventSequenceSizeBefore == eventSequenceSizeAfter); + } + + // Enable profiling. + profiler->EnableProfiling(true); + + { // --- Profiler, and profiling enabled --- + + // Get current event sequence size. + size_t eventSequenceSizeBefore = armnn::GetProfilerEventSequenceSize(profiler.get()); + + // Test scoped macro. + { ARMNN_SCOPED_PROFILING_EVENT(armnn::Compute::CpuAcc, "test"); } + + // Check that a profiling event has been added to the sequence. + size_t eventSequenceSizeAfter = armnn::GetProfilerEventSequenceSize(profiler.get()); + BOOST_TEST(eventSequenceSizeAfter == eventSequenceSizeBefore + 1); + } + + // Disable profiling here to not print out anything on stdout. + profiler->EnableProfiling(false); +} + +BOOST_AUTO_TEST_CASE(RuntimeLoadNetwork) +{ + // Get a reference to the profiler manager. + armnn::ProfilerManager& profilerManager = armnn::ProfilerManager::GetInstance(); + + // Check that there's no profiler registered for this thread. + BOOST_TEST(!profilerManager.GetProfiler()); + + // Build a mock-network and load it into the runtime. + armnn::IRuntime::CreationOptions options; + armnn::IRuntimePtr runtime(armnn::IRuntime::Create(options)); + armnn::NetworkId networkIdentifier = 1; + armnn::INetworkPtr mockNetwork(armnn::INetwork::Create()); + mockNetwork->AddInputLayer(0, "test layer"); + std::vector<armnn::Compute> backends = { armnn::Compute::CpuRef }; + runtime->LoadNetwork(networkIdentifier, armnn::Optimize(*mockNetwork, backends, runtime->GetDeviceSpec())); + + // Check that now there's a profiler registered for this thread (created and registered by the loading the network). + BOOST_TEST(profilerManager.GetProfiler()); + + // Unload the network. + runtime->UnloadNetwork(networkIdentifier); + + // Check that the profiler has been un-registered for this thread. + BOOST_TEST(!profilerManager.GetProfiler()); +} + +BOOST_AUTO_TEST_CASE(WriteEventResults) +{ + // Get a reference to the profiler manager. + armnn::ProfilerManager& profileManager = armnn::ProfilerManager::GetInstance(); + + // Create and register a profiler for this thread. + std::unique_ptr<armnn::Profiler> profiler = std::make_unique<armnn::Profiler>(); + profileManager.RegisterProfiler(profiler.get()); + + // Enable profiling. + profiler->EnableProfiling(true); + + { // --- Profiler, and profiling enabled --- + + // Get current event sequence size. + size_t eventSequenceSizeBefore = armnn::GetProfilerEventSequenceSize(profiler.get()); + + // Test scoped macro. + { + // Need to directly create a ScopedProfilingEvent as the one created by the macro falls out of scope + // immediately causing the Event.Stop() function method to be called immediately after the Event.Start() + // function resulting in periodic test failures on the Dent and Smith HiKeys + armnn::ScopedProfilingEvent testEvent(armnn::Compute::CpuAcc, "test", armnn::WallClockTimer()); + std::this_thread::sleep_for(std::chrono::milliseconds(10)); + } + + // Check that a profiling event has been added to the sequence. + size_t eventSequenceSizeAfter = armnn::GetProfilerEventSequenceSize(profiler.get()); + BOOST_TEST(eventSequenceSizeAfter == eventSequenceSizeBefore + 1); + + boost::test_tools::output_test_stream output; + profiler->AnalyzeEventsAndWriteResults(output); + BOOST_TEST(!output.is_empty(false)); + + // output should contain event name 'test' + BOOST_CHECK(boost::contains(output.str(), "test")); + + // output should contain headers + BOOST_CHECK(boost::contains(output.str(), "Event Sequence - Name")); + BOOST_CHECK(boost::contains(output.str(), "Event Stats - Name")); + BOOST_CHECK(boost::contains(output.str(), "Total")); + BOOST_CHECK(boost::contains(output.str(), "Device")); + // output should contain compute device 'CpuAcc' + BOOST_CHECK(boost::contains(output.str(), "CpuAcc")); + // output should not contain un-readable numbers + BOOST_CHECK(!(boost::contains(output.str(), "e+"))); + // output should not contain un-readable numbers + BOOST_CHECK(!(boost::contains(output.str(), "+"))); + // output should not contain zero value + BOOST_CHECK(!(boost::contains(output.str(), " 0 "))); + } + + // Disable profiling here to not print out anything on stdout. + profiler->EnableProfiling(false); +} + +BOOST_AUTO_TEST_SUITE_END() diff --git a/src/armnn/test/ProfilingEventTest.cpp b/src/armnn/test/ProfilingEventTest.cpp new file mode 100644 index 0000000000..4d0319d456 --- /dev/null +++ b/src/armnn/test/ProfilingEventTest.cpp @@ -0,0 +1,95 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// See LICENSE file in the project root for full license information. +// +#include <boost/test/unit_test.hpp> + +#include "ProfilingEvent.hpp" +#include "Profiling.hpp" +#include <thread> + +using namespace armnn; + +BOOST_AUTO_TEST_SUITE(ProfilingEvent) + +BOOST_AUTO_TEST_CASE(ProfilingEventTest) +{ + // Get a reference to the profiler manager. + armnn::ProfilerManager& profileManager = armnn::ProfilerManager::GetInstance(); + + const char* eventName = "EventName"; + + Event::Instruments insts1; + insts1.emplace_back(std::make_unique<WallClockTimer>()); + Event testEvent(eventName, + nullptr, + nullptr, + armnn::Compute::Undefined, + std::move(insts1)); + + BOOST_CHECK_EQUAL(testEvent.GetName(), "EventName"); + + // start the timer - outer + testEvent.Start(); + + // wait for 10 milliseconds + std::this_thread::sleep_for(std::chrono::milliseconds(10)); + + // stop the timer - outer + testEvent.Stop(); + + BOOST_CHECK_GE(testEvent.GetMeasurements().front().m_Value, 10.0); + + // create a sub event with CpuAcc + Event::Instruments insts2; + insts2.emplace_back(std::make_unique<WallClockTimer>()); + Event testEvent2(eventName, + profileManager.GetProfiler(), + &testEvent, + Compute::CpuAcc, + std::move(insts2)); + + BOOST_CHECK_EQUAL(&testEvent, testEvent2.GetParentEvent()); + BOOST_CHECK_EQUAL(profileManager.GetProfiler(), testEvent2.GetProfiler()); + BOOST_CHECK_EQUAL(Compute::CpuAcc, testEvent2.GetComputeDevice()); +} + +BOOST_AUTO_TEST_CASE(ProfilingEventTestOnGpuAcc) +{ + // Get a reference to the profiler manager. + armnn::ProfilerManager& profileManager = armnn::ProfilerManager::GetInstance(); + + const char* eventName = "GPUEvent"; + + Event::Instruments insts1; + insts1.emplace_back(std::make_unique<WallClockTimer>()); + Event testEvent(eventName, + nullptr, + nullptr, + armnn::Compute::Undefined, + std::move(insts1)); + + BOOST_CHECK_EQUAL(testEvent.GetName(), "GPUEvent"); + + // start the timer - outer + testEvent.Start(); + + // wait for 10 milliseconds + std::this_thread::sleep_for(std::chrono::milliseconds(10)); + + // stop the timer - outer + testEvent.Stop(); + + BOOST_CHECK_GE(testEvent.GetMeasurements().front().m_Value, 10.0); + + // create a sub event + Event::Instruments insts2; + insts2.emplace_back(std::make_unique<WallClockTimer>()); + Event testEvent2(eventName, profileManager.GetProfiler(), &testEvent, Compute::GpuAcc, std::move(insts2)); + + BOOST_CHECK_EQUAL(&testEvent, testEvent2.GetParentEvent()); + BOOST_CHECK_EQUAL(profileManager.GetProfiler(), testEvent2.GetProfiler()); + BOOST_CHECK_EQUAL(Compute::GpuAcc, testEvent2.GetComputeDevice()); +} + +BOOST_AUTO_TEST_SUITE_END() diff --git a/src/armnn/test/RuntimeTests.cpp b/src/armnn/test/RuntimeTests.cpp index fcb0a1e7c2..e29a1d4841 100644 --- a/src/armnn/test/RuntimeTests.cpp +++ b/src/armnn/test/RuntimeTests.cpp @@ -32,33 +32,46 @@ BOOST_AUTO_TEST_SUITE(Runtime) BOOST_AUTO_TEST_CASE(RuntimeUnloadNetwork) { // build 2 mock-networks and load them into the runtime - armnn::IRuntimePtr runtime(armnn::IRuntime::Create(armnn::Compute::CpuRef)); + armnn::IRuntime::CreationOptions options; + armnn::IRuntimePtr runtime(armnn::IRuntime::Create(options)); - // mock network 1 + // Mock network 1. armnn::NetworkId networkIdentifier1 = 1; armnn::INetworkPtr mockNetwork1(armnn::INetwork::Create()); mockNetwork1->AddInputLayer(0, "test layer"); - runtime->LoadNetwork(networkIdentifier1, Optimize(*mockNetwork1, runtime->GetDeviceSpec())); + std::vector<armnn::Compute> backends = {armnn::Compute::CpuRef}; + runtime->LoadNetwork(networkIdentifier1, Optimize(*mockNetwork1, backends, runtime->GetDeviceSpec())); - // mock network 2 + // Mock network 2. armnn::NetworkId networkIdentifier2 = 2; armnn::INetworkPtr mockNetwork2(armnn::INetwork::Create()); mockNetwork2->AddInputLayer(0, "test layer"); - runtime->LoadNetwork(networkIdentifier2, Optimize(*mockNetwork2, runtime->GetDeviceSpec())); + runtime->LoadNetwork(networkIdentifier2, Optimize(*mockNetwork2, backends, runtime->GetDeviceSpec())); - // unload one by its networkID + // Unloads one by its networkID. BOOST_TEST(runtime->UnloadNetwork(networkIdentifier1) == armnn::Status::Success); BOOST_TEST(runtime->UnloadNetwork(networkIdentifier1) == armnn::Status::Failure); } // Note: the current builds we don't do valgrind and gperftools based leak checking at the same -// time, so in practice WITH_VALGRIND and ARMNN_LEAK_CHECKING_ENABLED are exclusive. In -// the future the gperftools based leak checking should stay and the valgrind based should -// be removed. +// time, so in practice WITH_VALGRIND and ARMNN_LEAK_CHECKING_ENABLED are exclusive. The +// valgrind tests can stay for x86 builds, but on hikey Valgrind is just way too slow +// to be integrated into the CI system. -#if ARMNN_LEAK_CHECKING_ENABLED -void CreateAndDropDummyNetwork(armnn::Runtime & runtime) +#ifdef ARMNN_LEAK_CHECKING_ENABLED + +struct DisableGlobalLeakChecking +{ + DisableGlobalLeakChecking() + { + ARMNN_LOCAL_LEAK_CHECKING_ONLY(); + } +}; + +BOOST_GLOBAL_FIXTURE(DisableGlobalLeakChecking); + +void CreateAndDropDummyNetwork(const std::vector<armnn::Compute>& backends, armnn::Runtime& runtime) { armnn::NetworkId networkIdentifier; { @@ -74,12 +87,12 @@ void CreateAndDropDummyNetwork(armnn::Runtime & runtime) input->GetOutputSlot(0).Connect(layer->GetInputSlot(0)); layer->GetOutputSlot(0).Connect(output->GetInputSlot(0)); - // set the tensors in the network + // Sets the tensors in the network. input->GetOutputSlot(0).SetTensorInfo(inputTensorInfo); layer->GetOutputSlot(0).SetTensorInfo(outputTensorInfo); // optimize the network - armnn::IOptimizedNetworkPtr optNet = Optimize(*network, runtime.GetDeviceSpec()); + armnn::IOptimizedNetworkPtr optNet = Optimize(*network, backends, runtime.GetDeviceSpec()); runtime.LoadNetwork(networkIdentifier, std::move(optNet)); } @@ -94,10 +107,13 @@ BOOST_AUTO_TEST_CASE(RuntimeHeapMemoryUsageSanityChecks) ARMNN_SCOPED_LEAK_CHECKER("Sanity_Check_Outer"); { ARMNN_SCOPED_LEAK_CHECKER("Sanity_Check_Inner"); + BOOST_TEST(ARMNN_NO_LEAKS_IN_SCOPE() == true); std::unique_ptr<char[]> dummyAllocation(new char[1000]); - BOOST_TEST(ARMNN_NO_LEAKS_IN_SCOPE() == false); - BOOST_TEST(ARMNN_BYTES_LEAKED_IN_SCOPE() >= 1000); - BOOST_TEST(ARMNN_OBJECTS_LEAKED_IN_SCOPE() >= 1); + BOOST_CHECK_MESSAGE(ARMNN_NO_LEAKS_IN_SCOPE() == false, + "A leak of 1000 bytes is expected here. " + "Please make sure environment variable: HEAPCHECK=draconian is set!"); + BOOST_TEST(ARMNN_BYTES_LEAKED_IN_SCOPE() == 1000); + BOOST_TEST(ARMNN_OBJECTS_LEAKED_IN_SCOPE() == 1); } BOOST_TEST(ARMNN_NO_LEAKS_IN_SCOPE()); BOOST_TEST(ARMNN_BYTES_LEAKED_IN_SCOPE() == 0); @@ -109,22 +125,24 @@ BOOST_AUTO_TEST_CASE(RuntimeHeapMemoryUsageSanityChecks) BOOST_AUTO_TEST_CASE(RuntimeMemoryLeaksGpuAcc) { BOOST_TEST(ARMNN_LEAK_CHECKER_IS_ACTIVE()); - - armnn::Runtime runtime(armnn::Compute::GpuAcc); + armnn::IRuntime::CreationOptions options; + armnn::Runtime runtime(options); armnn::RuntimeLoadedNetworksReserve(&runtime); + std::vector<armnn::Compute> backends = {armnn::Compute::GpuAcc}; { // Do a warmup of this so we make sure that all one-time // initialization happens before we do the leak checking. - CreateAndDropDummyNetwork(runtime); + CreateAndDropDummyNetwork(backends, runtime); } { ARMNN_SCOPED_LEAK_CHECKER("LoadAndUnloadNetworkGpuAcc"); + BOOST_TEST(ARMNN_NO_LEAKS_IN_SCOPE()); // In the second run we check for all remaining memory // in use after the network was unloaded. If there is any // then it will be treated as a memory leak. - CreateAndDropDummyNetwork(runtime); + CreateAndDropDummyNetwork(backends, runtime); BOOST_TEST(ARMNN_NO_LEAKS_IN_SCOPE()); BOOST_TEST(ARMNN_BYTES_LEAKED_IN_SCOPE() == 0); BOOST_TEST(ARMNN_OBJECTS_LEAKED_IN_SCOPE() == 0); @@ -136,22 +154,24 @@ BOOST_AUTO_TEST_CASE(RuntimeMemoryLeaksGpuAcc) BOOST_AUTO_TEST_CASE(RuntimeMemoryLeaksCpuAcc) { BOOST_TEST(ARMNN_LEAK_CHECKER_IS_ACTIVE()); - - armnn::Runtime runtime(armnn::Compute::CpuAcc); + armnn::IRuntime::CreationOptions options; + armnn::Runtime runtime(options); armnn::RuntimeLoadedNetworksReserve(&runtime); + std::vector<armnn::Compute> backends = {armnn::Compute::CpuAcc}; { // Do a warmup of this so we make sure that all one-time // initialization happens before we do the leak checking. - CreateAndDropDummyNetwork(runtime); + CreateAndDropDummyNetwork(backends, runtime); } { ARMNN_SCOPED_LEAK_CHECKER("LoadAndUnloadNetworkCpuAcc"); + BOOST_TEST(ARMNN_NO_LEAKS_IN_SCOPE()); // In the second run we check for all remaining memory // in use after the network was unloaded. If there is any // then it will be treated as a memory leak. - CreateAndDropDummyNetwork(runtime); + CreateAndDropDummyNetwork(backends, runtime); BOOST_TEST(ARMNN_NO_LEAKS_IN_SCOPE()); BOOST_TEST(ARMNN_BYTES_LEAKED_IN_SCOPE() == 0); BOOST_TEST(ARMNN_OBJECTS_LEAKED_IN_SCOPE() == 0); @@ -163,21 +183,24 @@ BOOST_AUTO_TEST_CASE(RuntimeMemoryLeaksCpuRef) { BOOST_TEST(ARMNN_LEAK_CHECKER_IS_ACTIVE()); - armnn::Runtime runtime(armnn::Compute::CpuRef); + armnn::IRuntime::CreationOptions options; + armnn::Runtime runtime(options); armnn::RuntimeLoadedNetworksReserve(&runtime); + std::vector<armnn::Compute> backends = {armnn::Compute::CpuRef}; { // Do a warmup of this so we make sure that all one-time // initialization happens before we do the leak checking. - CreateAndDropDummyNetwork(runtime); + CreateAndDropDummyNetwork(backends, runtime); } { ARMNN_SCOPED_LEAK_CHECKER("LoadAndUnloadNetworkCpuRef"); + BOOST_TEST(ARMNN_NO_LEAKS_IN_SCOPE()); // In the second run we check for all remaining memory // in use after the network was unloaded. If there is any // then it will be treated as a memory leak. - CreateAndDropDummyNetwork(runtime); + CreateAndDropDummyNetwork(backends, runtime); BOOST_TEST(ARMNN_NO_LEAKS_IN_SCOPE()); BOOST_TEST(ARMNN_BYTES_LEAKED_IN_SCOPE() == 0); BOOST_TEST(ARMNN_OBJECTS_LEAKED_IN_SCOPE() == 0); @@ -199,25 +222,28 @@ BOOST_AUTO_TEST_CASE(RuntimeMemoryUsage) // A start-pointer or chain of start-pointers to the block is found. Since the block is still pointed at, // the programmer could, at least in principle, have freed it before program exit. - // We want to test this in case memory is not freed as early as it could have been + // We want to test this in case memory is not freed as early as it could have been. unsigned long reachableBefore = 0; unsigned long reachableAfter = 0; - // needed as out params but we don't test them + // Needed as out params but we don't test them. unsigned long dubious = 0; unsigned long suppressed = 0; - // ensure that runtime is large enough before checking for memory leaks - // otherwise when loading the network it will automatically reserve memory that won't be released until destruction + // Ensure that runtime is large enough before checking for memory leaks. + // Otherwise, when loading the network, it will automatically reserve memory that won't be released + // until destruction. armnn::NetworkId networkIdentifier; - armnn::Runtime runtime(armnn::Compute::GpuAcc); + armnn::IRuntime::CreationOptions options; + armnn::Runtime runtime(options); armnn::RuntimeLoadedNetworksReserve(&runtime); - // check for leaks before we load the network and record them so that we can see the delta after unloading + // Checks for leaks before we load the network and record them so that we can see the delta after unloading. VALGRIND_DO_QUICK_LEAK_CHECK; VALGRIND_COUNT_LEAKS(leakedBefore, dubious, reachableBefore, suppressed); // build a mock-network and load it into the runtime + std::vector<armnn::Compute> backends = {armnn::Compute::GpuAcc}; { armnn::TensorInfo inputTensorInfo(armnn::TensorShape({ 7, 7 }), armnn::DataType::Float32); armnn::TensorInfo outputTensorInfo(armnn::TensorShape({ 7, 7 }), armnn::DataType::Float32); @@ -231,12 +257,12 @@ BOOST_AUTO_TEST_CASE(RuntimeMemoryUsage) input->GetOutputSlot(0).Connect(layer->GetInputSlot(0)); layer->GetOutputSlot(0).Connect(output->GetInputSlot(0)); - // set the tensors in the network + // Sets the tensors in the network. input->GetOutputSlot(0).SetTensorInfo(inputTensorInfo); layer->GetOutputSlot(0).SetTensorInfo(outputTensorInfo); // optimize the network - armnn::IOptimizedNetworkPtr optNet = Optimize(*mockNetwork, runtime.GetDeviceSpec()); + armnn::IOptimizedNetworkPtr optNet = Optimize(*mockNetwork, backends, runtime.GetDeviceSpec()); runtime.LoadNetwork(networkIdentifier, std::move(optNet)); } @@ -246,16 +272,16 @@ BOOST_AUTO_TEST_CASE(RuntimeMemoryUsage) VALGRIND_DO_ADDED_LEAK_CHECK; VALGRIND_COUNT_LEAKS(leakedAfter, dubious, reachableAfter, suppressed); - // if we're not running under Valgrind, these vars will have been initialised to 0, so this will always pass + // If we're not running under Valgrind, these vars will have been initialised to 0, so this will always pass. BOOST_TEST(leakedBefore == leakedAfter); // Add resonable threshold after and before running valgrind with the ACL clear cache function. // TODO Threshold set to 80k until the root cause of the memory leakage is found and fixed. Revert threshold - // value to 1024 when fixed + // value to 1024 when fixed. BOOST_TEST(static_cast<long>(reachableAfter) - static_cast<long>(reachableBefore) < 81920); - // these are needed because VALGRIND_COUNT_LEAKS is a macro that assigns to the parameters - // so they are assigned to, but still considered unused, causing a warning + // These are needed because VALGRIND_COUNT_LEAKS is a macro that assigns to the parameters + // so they are assigned to, but still considered unused, causing a warning. boost::ignore_unused(dubious); boost::ignore_unused(suppressed); } @@ -263,7 +289,7 @@ BOOST_AUTO_TEST_CASE(RuntimeMemoryUsage) // Note: this part of the code is due to be removed when we fully trust the gperftools based results. #ifdef WITH_VALGRIND -// run with the following command to get all the amazing output (in the devenv/build folder) :) +// Run with the following command to get all the amazing output (in the devenv/build folder) :) // valgrind --leak-check=full --show-leak-kinds=all --log-file=Valgrind_Memcheck_Leak_Report.txt armnn/test/UnitTests BOOST_AUTO_TEST_CASE(RuntimeMemoryLeak) { @@ -276,11 +302,11 @@ BOOST_AUTO_TEST_CASE(RuntimeMemoryLeak) // A start-pointer or chain of start-pointers to the block is found. Since the block is still pointed at, // the programmer could, at least in principle, have freed it before program exit. - // We want to test this in case memory is not freed as early as it could have been + // We want to test this in case memory is not freed as early as it could have been. unsigned long reachableBefore = 0; unsigned long reachableAfter = 0; - // needed as out params but we don't test them + // Needed as out params but we don't test them. unsigned long dubious = 0; unsigned long suppressed = 0; @@ -288,14 +314,15 @@ BOOST_AUTO_TEST_CASE(RuntimeMemoryLeak) // ensure that runtime is large enough before checking for memory leaks // otherwise when loading the network it will automatically reserve memory that won't be released until destruction - armnn::Runtime runtime(armnn::Compute::CpuRef); + armnn::IRuntime::CreationOptions options; + armnn::Runtime runtime(options); armnn::RuntimeLoadedNetworksReserve(&runtime); - // check for leaks before we load the network and record them so that we can see the delta after unloading + // Checks for leaks before we load the network and record them so that we can see the delta after unloading. VALGRIND_DO_QUICK_LEAK_CHECK; VALGRIND_COUNT_LEAKS(leakedBefore, dubious, reachableBefore, suppressed); - // build a mock-network and load it into the runtime + // Builds a mock-network and load it into the runtime. { unsigned int inputShape[] = {1, 7, 1, 1}; armnn::TensorInfo inputTensorInfo(4, inputShape, armnn::DataType::Float32); @@ -303,10 +330,9 @@ BOOST_AUTO_TEST_CASE(RuntimeMemoryLeak) std::unique_ptr<armnn::Network> mockNetwork1 = std::make_unique<armnn::Network>(); mockNetwork1->AddInputLayer(0, "test layer"); - armnn::DeviceSpec device; - device.DefaultComputeDevice = armnn::Compute::CpuRef; - runtime.LoadNetwork(networkIdentifier1, Optimize(*mockNetwork1, device)); + std::vector<armnn::Compute> backends = {armnn::Compute::CpuRef}; + runtime.LoadNetwork(networkIdentifier1, Optimize(*mockNetwork1, backends, runtime.GetDeviceSpec())); } runtime.UnloadNetwork(networkIdentifier1); @@ -314,7 +340,7 @@ BOOST_AUTO_TEST_CASE(RuntimeMemoryLeak) VALGRIND_DO_ADDED_LEAK_CHECK; VALGRIND_COUNT_LEAKS(leakedAfter, dubious, reachableAfter, suppressed); - // if we're not running under Valgrind, these vars will have been initialised to 0, so this will always pass + // If we're not running under Valgrind, these vars will have been initialised to 0, so this will always pass. BOOST_TEST(leakedBefore == leakedAfter); #if defined(ARMCOMPUTECL_ENABLED) @@ -329,11 +355,134 @@ BOOST_AUTO_TEST_CASE(RuntimeMemoryLeak) BOOST_TEST(reachableBefore >= reachableAfter); - // these are needed because VALGRIND_COUNT_LEAKS is a macro that assigns to the parameters - // so they are assigned to, but still considered unused, causing a warning + // These are needed because VALGRIND_COUNT_LEAKS is a macro that assigns to the parameters + // so they are assigned to, but still considered unused, causing a warning. boost::ignore_unused(dubious); boost::ignore_unused(suppressed); } #endif +#if ARMCOMPUTENEON_ENABLED +BOOST_AUTO_TEST_CASE(RuntimeValidateCpuAccDeviceSupportLayerNoFallback) +{ + // build up the structure of the network + armnn::INetworkPtr net(armnn::INetwork::Create()); + + armnn::IConnectableLayer* input = net->AddInputLayer(0); + + armnn::IConnectableLayer* output = net->AddOutputLayer(0); + + input->GetOutputSlot(0).Connect(output->GetInputSlot(0)); + + input->GetOutputSlot(0).SetTensorInfo(armnn::TensorInfo({ 1, 1, 4, 4 }, armnn::DataType::Float32)); + + armnn::IRuntime::CreationOptions options; + armnn::IRuntimePtr runtime(armnn::IRuntime::Create(options)); + + std::vector<armnn::Compute> backends = { armnn::Compute::CpuAcc }; + armnn::IOptimizedNetworkPtr optNet = armnn::Optimize(*net, backends, runtime->GetDeviceSpec()); + BOOST_CHECK(optNet); + + // Load it into the runtime. It should success. + armnn::NetworkId netId; + BOOST_TEST(runtime->LoadNetwork(netId, std::move(optNet)) == armnn::Status::Success); +} +#endif // ARMCOMPUTENEON_ENABLED + +#if ARMCOMPUTECL_ENABLED +BOOST_AUTO_TEST_CASE(RuntimeValidateGpuDeviceSupportLayerNoFallback) +{ + // build up the structure of the network + armnn::INetworkPtr net(armnn::INetwork::Create()); + + armnn::IConnectableLayer* input = net->AddInputLayer(0); + + armnn::IConnectableLayer* output = net->AddOutputLayer(0); + + input->GetOutputSlot(0).Connect(output->GetInputSlot(0)); + + input->GetOutputSlot(0).SetTensorInfo(armnn::TensorInfo({ 1, 1, 4, 4 }, armnn::DataType::Float32)); + + armnn::IRuntime::CreationOptions options; + armnn::IRuntimePtr runtime(armnn::IRuntime::Create(options)); + + std::vector<armnn::Compute> backends = { armnn::Compute::GpuAcc }; + armnn::IOptimizedNetworkPtr optNet = armnn::Optimize(*net, backends, runtime->GetDeviceSpec()); + BOOST_CHECK(optNet); + + // Load it into the runtime. It should success. + armnn::NetworkId netId; + BOOST_TEST(runtime->LoadNetwork(netId, std::move(optNet)) == armnn::Status::Success); +} +#endif // ARMCOMPUTECL_ENABLED + +BOOST_AUTO_TEST_CASE(RuntimeCpuRef) +{ + using namespace armnn; + + // Create runtime in which test will run + armnn::IRuntime::CreationOptions options; + armnn::IRuntimePtr runtime(armnn::IRuntime::Create(options)); + + // build up the structure of the network + INetworkPtr net(INetwork::Create()); + + IConnectableLayer* input = net->AddInputLayer(0); + + // This layer configuration isn't supported by CpuAcc, should be fall back to CpuRef. + NormalizationDescriptor descriptor; + IConnectableLayer* normalize = net->AddNormalizationLayer(descriptor); + + IConnectableLayer* output = net->AddOutputLayer(0); + + input->GetOutputSlot(0).Connect(normalize->GetInputSlot(0)); + normalize->GetOutputSlot(0).Connect(output->GetInputSlot(0)); + + input->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 4, 4 }, DataType::Float32)); + normalize->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 4, 4 }, DataType::Float32)); + + // optimize the network + std::vector<armnn::Compute> backends = { armnn::Compute::CpuRef }; + IOptimizedNetworkPtr optNet = Optimize(*net, backends, runtime->GetDeviceSpec()); + + // Load it into the runtime. It should success. + armnn::NetworkId netId; + BOOST_TEST(runtime->LoadNetwork(netId, std::move(optNet)) == Status::Success); +} + +BOOST_AUTO_TEST_CASE(RuntimeFallbackToCpuRef) +{ + using namespace armnn; + + // Create runtime in which test will run + armnn::IRuntime::CreationOptions options; + armnn::IRuntimePtr runtime(armnn::IRuntime::Create(options)); + + // build up the structure of the network + INetworkPtr net(INetwork::Create()); + + IConnectableLayer* input = net->AddInputLayer(0); + + // This layer configuration isn't supported by CpuAcc, should be fall back to CpuRef. + NormalizationDescriptor descriptor; + IConnectableLayer* normalize = net->AddNormalizationLayer(descriptor); + + IConnectableLayer* output = net->AddOutputLayer(0); + + input->GetOutputSlot(0).Connect(normalize->GetInputSlot(0)); + normalize->GetOutputSlot(0).Connect(output->GetInputSlot(0)); + + input->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 4, 4 }, DataType::Float32)); + normalize->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 4, 4 }, DataType::Float32)); + + // Allow fallback to CpuRef. + std::vector<armnn::Compute> backends = { armnn::Compute::CpuAcc, armnn::Compute::CpuRef }; + // optimize the network + IOptimizedNetworkPtr optNet = Optimize(*net, backends, runtime->GetDeviceSpec()); + + // Load it into the runtime. It should succeed. + armnn::NetworkId netId; + BOOST_TEST(runtime->LoadNetwork(netId, std::move(optNet)) == Status::Success); +} + BOOST_AUTO_TEST_SUITE_END() diff --git a/src/armnn/test/TensorHelpers.hpp b/src/armnn/test/TensorHelpers.hpp index aac4c1d15e..ec38940a44 100644 --- a/src/armnn/test/TensorHelpers.hpp +++ b/src/armnn/test/TensorHelpers.hpp @@ -39,7 +39,7 @@ struct SelectiveComparer<T, false> { static bool Compare(T a, T b) { - // if a or b is zero, percent_tolerance does an exact match, so compare to a small, constant tolerance instead + // If a or b is zero, percent_tolerance does an exact match, so compare to a small, constant tolerance instead. if (a == 0.0f || b == 0.0f) { return std::abs(a - b) <= g_FloatCloseToZeroTolerance; @@ -62,7 +62,7 @@ template <typename T, std::size_t n> boost::test_tools::predicate_result CompareTensors(const boost::multi_array<T, n>& a, const boost::multi_array<T, n>& b) { - // check they are same shape + // Checks they are same shape. for (unsigned int i=0; i<n; i++) { if (a.shape()[i] != b.shape()[i]) @@ -77,9 +77,9 @@ boost::test_tools::predicate_result CompareTensors(const boost::multi_array<T, n } } - // now compare element-wise + // Now compares element-wise. - // fun iteration over n dimensions + // Fun iteration over n dimensions. std::array<unsigned int, n> indices; for (unsigned int i = 0; i < n; i++) { @@ -150,7 +150,7 @@ boost::test_tools::predicate_result CompareTensors(const boost::multi_array<T, n } -// Creates a boost::multi_array with shape defined by the given TensorInfo. +// Creates a boost::multi_array with the shape defined by the given TensorInfo. template <typename T, std::size_t n> boost::multi_array<T, n> MakeTensor(const armnn::TensorInfo& tensorInfo) { @@ -164,7 +164,7 @@ boost::multi_array<T, n> MakeTensor(const armnn::TensorInfo& tensorInfo) return boost::multi_array<T, n>(shape); } -// Creates a boost::multi_array with shape defined by the given TensorInfo and contents defined by the given vector. +// Creates a boost::multi_array with the shape defined by the given TensorInfo and contents defined by the given vector. template <typename T, std::size_t n> boost::multi_array<T, n> MakeTensor(const armnn::TensorInfo& tensorInfo, const std::vector<T>& flat) { diff --git a/src/armnn/test/TensorTest.cpp b/src/armnn/test/TensorTest.cpp index 2bb37f4fb8..8057d4dd7a 100644 --- a/src/armnn/test/TensorTest.cpp +++ b/src/armnn/test/TensorTest.cpp @@ -8,7 +8,7 @@ namespace armnn { -// Add unit test framework for interpreting TensorInfo type +// Adds unit test framework for interpreting TensorInfo type. std::ostream& boost_test_print_type(std::ostream& ostr, const TensorInfo& right) { ostr << "TensorInfo[ " @@ -115,7 +115,7 @@ BOOST_AUTO_TEST_CASE(TensorVsConstTensor) armnn::Tensor t(TensorInfo(), &mutableDatum); armnn::ConstTensor ct(TensorInfo(), &immutableDatum); - // Check that both Tensor and ConstTensor can be passed as a ConstTensor + // Checks that both Tensor and ConstTensor can be passed as a ConstTensor. CheckTensor(t); CheckTensor(ct); } @@ -136,9 +136,9 @@ BOOST_AUTO_TEST_CASE(ModifyTensorInfo) BOOST_AUTO_TEST_CASE(TensorShapeOperatorBrackets) { TensorShape shape({0,1,2,3}); - // Check version of operator[] which returns an unsigned int + // Checks version of operator[] which returns an unsigned int. BOOST_TEST(shape[2] == 2); - // Check the version of operator[] which returns a reference + // Checks the version of operator[] which returns a reference. shape[2] = 20; BOOST_TEST(shape[2] == 20); } diff --git a/src/armnn/test/UnitTests.cpp b/src/armnn/test/UnitTests.cpp index 0e2f99583f..203fbfe821 100644 --- a/src/armnn/test/UnitTests.cpp +++ b/src/armnn/test/UnitTests.cpp @@ -44,7 +44,7 @@ class SetupDebugOutput public: SetupDebugOutput() { - // Send the output to both cout (as standard) and the debug output. + // Sends the output to both cout (as standard) and the debug output. m_OutputStream.push(tee(std::cout)); m_OutputStream.push(m_DebugOutputSink); diff --git a/src/armnn/test/UnitTests.hpp b/src/armnn/test/UnitTests.hpp index 9b750b5b33..8d5c7055e7 100644 --- a/src/armnn/test/UnitTests.hpp +++ b/src/armnn/test/UnitTests.hpp @@ -12,7 +12,7 @@ inline void ConfigureLoggingTest() { - // Configure logging for both the ARMNN library and this test program + // Configures logging for both the ARMNN library and this test program. armnn::ConfigureLogging(true, true, armnn::LogSeverity::Fatal); armnnUtils::ConfigureLogging(boost::log::core::get().get(), true, true, armnn::LogSeverity::Fatal); } @@ -43,9 +43,27 @@ void CompareTestResultIfSupported(const std::string& testName, const LayerTestRe } } +template <typename T, std::size_t n> +void CompareTestResultIfSupported(const std::string& testName, const std::vector<LayerTestResult<T, n>>& testResult) +{ + bool testNameIndicatesUnsupported = testName.find("UNSUPPORTED") != std::string::npos; + for (unsigned int i = 0; i < testResult.size(); ++i) + { + BOOST_CHECK_MESSAGE(testNameIndicatesUnsupported != testResult[i].supported, + "The test name does not match the supportedness it is reporting"); + if (testResult[i].supported) + { + BOOST_TEST(CompareTensors(testResult[i].output, testResult[i].outputExpected)); + } + } +} + template<typename FactoryType, typename TFuncPtr, typename... Args> void RunTestFunction(const char* testName, TFuncPtr testFunction, Args... args) { + std::unique_ptr<armnn::Profiler> profiler = std::make_unique<armnn::Profiler>(); + armnn::ProfilerManager::GetInstance().RegisterProfiler(profiler.get()); + FactoryType workloadFactory; auto testResult = (*testFunction)(workloadFactory, args...); CompareTestResultIfSupported(testName, testResult); diff --git a/src/armnn/test/UtilsTests.cpp b/src/armnn/test/UtilsTests.cpp index 11fa51626c..2268aa31e2 100644 --- a/src/armnn/test/UtilsTests.cpp +++ b/src/armnn/test/UtilsTests.cpp @@ -4,10 +4,14 @@ // #include <boost/test/unit_test.hpp> + #include <armnn/Utils.hpp> #include <armnn/Types.hpp> #include <armnn/TypesUtils.hpp> #include <armnn/Descriptors.hpp> +#include <GraphTopologicalSort.hpp> +#include <Graph.hpp> +#include "TypeUtils.hpp" BOOST_AUTO_TEST_SUITE(Utils) @@ -55,4 +59,110 @@ BOOST_AUTO_TEST_CASE(PermuteDescriptorWithDuplicatedMappings) BOOST_CHECK_THROW(armnn::PermuteDescriptor({ 1u, 1u, 0u }), armnn::InvalidArgumentException); } +BOOST_AUTO_TEST_CASE(HalfType) +{ + using namespace half_float::literal; + armnn::Half a = 1.0_h; + + float b = 1.0f; + armnn::Half c(b); + + // Test half type + BOOST_CHECK_EQUAL(a, b); + BOOST_CHECK_EQUAL(sizeof(c), 2); + + // Test half type is floating point type + BOOST_CHECK(std::is_floating_point<armnn::Half>::value); + + // Test utility function returns correct type. + using ResolvedType = armnn::ResolveType<armnn::DataType::Float16>; + constexpr bool isHalfType = std::is_same<armnn::Half, ResolvedType>::value; + BOOST_CHECK(isHalfType); + + armnn::DataType dt = armnn::GetDataType<armnn::Half>(); + BOOST_CHECK(dt == armnn::DataType::Float16); + + //Test utility functions return correct size + BOOST_CHECK(GetDataTypeSize(armnn::DataType::Float16) == 2); + + //Test utility functions return correct name + BOOST_CHECK((GetDataTypeName(armnn::DataType::Float16) == std::string("Float16"))); +} + +BOOST_AUTO_TEST_CASE(GraphTopologicalSortSimpleTest) +{ + std::map<int, std::vector<int>> graph; + + graph[0] = {2}; + graph[1] = {3}; + graph[2] = {4}; + graph[3] = {4}; + graph[4] = {5}; + graph[5] = {}; + + auto getNodeInputs = [graph](int node) -> std::vector<int> + { + return graph.find(node)->second; + }; + + std::vector<int> targetNodes = {0, 1}; + + std::vector<int> output; + bool sortCompleted = armnnUtils::GraphTopologicalSort<int>(targetNodes, getNodeInputs, output); + + BOOST_TEST(sortCompleted); + + std::vector<int> correctResult = {5, 4, 2, 0, 3, 1}; + BOOST_CHECK_EQUAL_COLLECTIONS(output.begin(), output.end(), correctResult.begin(), correctResult.end()); +} + +BOOST_AUTO_TEST_CASE(GraphTopologicalSortVariantTest) +{ + std::map<int, std::vector<int>> graph; + + graph[0] = {2}; + graph[1] = {2}; + graph[2] = {3, 4}; + graph[3] = {5}; + graph[4] = {5}; + graph[5] = {6}; + graph[6] = {}; + + auto getNodeInputs = [graph](int node) -> std::vector<int> + { + return graph.find(node)->second; + }; + + std::vector<int> targetNodes = {0, 1}; + + std::vector<int> output; + bool sortCompleted = armnnUtils::GraphTopologicalSort<int>(targetNodes, getNodeInputs, output); + + BOOST_TEST(sortCompleted); + + std::vector<int> correctResult = {6, 5, 3, 4, 2, 0, 1}; + BOOST_CHECK_EQUAL_COLLECTIONS(output.begin(), output.end(), correctResult.begin(), correctResult.end()); +} + +BOOST_AUTO_TEST_CASE(CyclicalGraphTopologicalSortTest) +{ + std::map<int, std::vector<int>> graph; + + graph[0] = {1}; + graph[1] = {2}; + graph[2] = {0}; + + auto getNodeInputs = [graph](int node) -> std::vector<int> + { + return graph.find(node)->second; + }; + + std::vector<int> targetNodes = {0}; + + std::vector<int> output; + bool sortCompleted = armnnUtils::GraphTopologicalSort<int>(targetNodes, getNodeInputs, output); + + BOOST_TEST(!sortCompleted); +} + BOOST_AUTO_TEST_SUITE_END() |