diff options
Diffstat (limited to 'src/armnn/test/OptimizerTests.cpp')
-rw-r--r-- | src/armnn/test/OptimizerTests.cpp | 498 |
1 files changed, 469 insertions, 29 deletions
diff --git a/src/armnn/test/OptimizerTests.cpp b/src/armnn/test/OptimizerTests.cpp index da26fba76e..0c1a2619b2 100644 --- a/src/armnn/test/OptimizerTests.cpp +++ b/src/armnn/test/OptimizerTests.cpp @@ -7,6 +7,8 @@ #include "armnn/ArmNN.hpp" #include "Graph.hpp" #include "Optimizer.hpp" +#include "backends/CpuTensorHandle.hpp" +#include "FloatingPointConverter.hpp" namespace { @@ -21,7 +23,7 @@ bool CheckSequence(const armnn::Graph::ConstIterator first, const armnn::Graph:: return (first == last); } -/// Check each unary function in Us evaluates true for each correspondent layer in the sequence [first, last) +/// Checks each unary function in Us evaluates true for each correspondent layer in the sequence [first, last). template <typename U, typename... Us> bool CheckSequence(const armnn::Graph::ConstIterator first, const armnn::Graph::ConstIterator last, @@ -30,11 +32,149 @@ bool CheckSequence(const armnn::Graph::ConstIterator first, { return u(*first) && CheckSequence(std::next(first), last, us...); } + +template <typename LayerT> +bool CheckRelatedLayers(armnn::Graph& graph, const std::list<std::string>& testRelatedLayers) +{ + for (auto& layer : graph) + { + if (layer->GetType() == armnn::LayerEnumOf<LayerT>()) + { + auto& relatedLayers = layer->GetRelatedLayerNames(); + if(!std::equal(relatedLayers.begin(), relatedLayers.end(), + testRelatedLayers.begin(), testRelatedLayers.end())) + { + return false; + } + } + } + + return true; +} + +// connects two layers +using namespace armnn; +void Connect(Layer* from, Layer* to, const TensorInfo& tensorInfo, unsigned int fromIndex = 0, unsigned int toIndex = 0) +{ + from->GetOutputSlot(fromIndex).Connect(to->GetInputSlot(toIndex)); + from->GetOutputHandler(fromIndex).SetTensorInfo(tensorInfo); +} + +void CreateLSTMLayerHelper(Graph &graph, bool CifgEnabled) +{ + LstmDescriptor layerDesc; + layerDesc.m_ActivationFunc = 4; + layerDesc.m_ClippingThresCell = 0.2f; + layerDesc.m_ClippingThresProj = 0.4f; + layerDesc.m_CifgEnabled = CifgEnabled; + layerDesc.m_PeepholeEnabled = false; + layerDesc.m_ProjectionEnabled = false; + + LstmLayer* const layer = graph.AddLayer<LstmLayer>(layerDesc, "layer"); + unsigned int batchSize = 3; + unsigned int inputSize = 2; + unsigned int numUnits = 4; + unsigned int outputSize = 4; + + layer->m_BasicParameters.m_InputToForgetWeights = std::make_unique<ScopedCpuTensorHandle> + (TensorInfo({ numUnits, inputSize }, DataType::Float32)); + layer->m_BasicParameters.m_InputToCellWeights = std::make_unique<ScopedCpuTensorHandle> + (TensorInfo({ numUnits, inputSize }, DataType::Float32)); + layer->m_BasicParameters.m_InputToOutputWeights = std::make_unique<ScopedCpuTensorHandle> + (TensorInfo({ numUnits, inputSize }, DataType::Float32)); + layer->m_BasicParameters.m_RecurrentToForgetWeights = std::make_unique<ScopedCpuTensorHandle> + (TensorInfo({ numUnits, outputSize }, DataType::Float32)); + layer->m_BasicParameters.m_RecurrentToCellWeights = std::make_unique<ScopedCpuTensorHandle> + (TensorInfo({ numUnits, outputSize }, DataType::Float32)); + layer->m_BasicParameters.m_RecurrentToOutputWeights = std::make_unique<ScopedCpuTensorHandle> + (TensorInfo({ numUnits, outputSize }, DataType::Float32)); + layer->m_BasicParameters.m_ForgetGateBias = std::make_unique<ScopedCpuTensorHandle> + (TensorInfo({ numUnits }, DataType::Float32)); + layer->m_BasicParameters.m_CellBias = std::make_unique<ScopedCpuTensorHandle> + (TensorInfo({ numUnits }, DataType::Float32)); + layer->m_BasicParameters.m_OutputGateBias = std::make_unique<ScopedCpuTensorHandle> + (TensorInfo({ numUnits }, DataType::Float32)); + + layer->m_BasicParameters.m_InputToForgetWeights->Allocate(); + layer->m_BasicParameters.m_InputToCellWeights->Allocate(); + layer->m_BasicParameters.m_InputToOutputWeights->Allocate(); + layer->m_BasicParameters.m_RecurrentToForgetWeights->Allocate(); + layer->m_BasicParameters.m_RecurrentToCellWeights->Allocate(); + layer->m_BasicParameters.m_RecurrentToOutputWeights->Allocate(); + layer->m_BasicParameters.m_ForgetGateBias->Allocate(); + layer->m_BasicParameters.m_CellBias->Allocate(); + layer->m_BasicParameters.m_OutputGateBias->Allocate(); + + if (!layerDesc.m_CifgEnabled) + { + layer->m_CifgParameters.m_InputToInputWeights = std::make_unique<ScopedCpuTensorHandle> + (TensorInfo({ numUnits, inputSize }, DataType::Float32)); + layer->m_CifgParameters.m_RecurrentToInputWeights = std::make_unique<ScopedCpuTensorHandle> + (TensorInfo({ numUnits, outputSize }, DataType::Float32)); + layer->m_CifgParameters.m_CellToInputWeights = std::make_unique<ScopedCpuTensorHandle> + (TensorInfo({ numUnits }, DataType::Float32)); + layer->m_CifgParameters.m_InputGateBias = std::make_unique<ScopedCpuTensorHandle> + (TensorInfo({ numUnits }, DataType::Float32)); + layer->m_CifgParameters.m_InputToInputWeights->Allocate(); + layer->m_CifgParameters.m_RecurrentToInputWeights->Allocate(); + layer->m_CifgParameters.m_CellToInputWeights->Allocate(); + layer->m_CifgParameters.m_InputGateBias->Allocate(); + } + + if (layerDesc.m_ProjectionEnabled) + { + layer->m_ProjectionParameters.m_ProjectionWeights = std::make_unique<ScopedCpuTensorHandle> + (TensorInfo({ outputSize, numUnits }, DataType::Float32)); + layer->m_ProjectionParameters.m_ProjectionBias = std::make_unique<ScopedCpuTensorHandle> + (TensorInfo({ outputSize }, DataType::Float32)); + layer->m_ProjectionParameters.m_ProjectionWeights->Allocate(); + layer->m_ProjectionParameters.m_ProjectionBias->Allocate(); + } + + if (layerDesc.m_PeepholeEnabled) + { + layer->m_PeepholeParameters.m_CellToForgetWeights = std::make_unique<ScopedCpuTensorHandle> + (TensorInfo({ numUnits }, DataType::Float32)); + layer->m_PeepholeParameters.m_CellToOutputWeights = std::make_unique<ScopedCpuTensorHandle> + (TensorInfo({ numUnits }, DataType::Float32)); + layer->m_PeepholeParameters.m_CellToForgetWeights->Allocate(); + layer->m_PeepholeParameters.m_CellToOutputWeights->Allocate(); + } + + // create input and output layers + Layer* const input = graph.AddLayer<InputLayer>(0, "input"); + Layer* const outputStateIn = graph.AddLayer<InputLayer>(1, "outputStateIn"); + Layer* const cellStateIn = graph.AddLayer<InputLayer>(2, "cellStateIn"); + Layer* const scratchBuffer = graph.AddLayer<OutputLayer>(0, "scratchBuffer"); + Layer* const outputStateOut = graph.AddLayer<OutputLayer>(1, "outputStateOut"); + Layer* const cellStateOut = graph.AddLayer<OutputLayer>(2, "cellStateOut"); + Layer* const output = graph.AddLayer<OutputLayer>(3, "output"); + + // connect up + armnn::TensorInfo lstmTensorInfo1({ batchSize, inputSize }, DataType::Float32); + armnn::TensorInfo lstmTensorInfo2({ batchSize, numUnits}, DataType::Float32); + armnn::TensorInfo lstmTensorInfo3({ batchSize, outputSize }, DataType::Float32); + armnn::TensorInfo lstmTensorInfoScratchBuff({ batchSize, numUnits*3 }, DataType::Float32); + if (layerDesc.m_CifgEnabled) + { + lstmTensorInfoScratchBuff.SetShape({ batchSize, numUnits*4 }); + } + + Connect(input, layer, lstmTensorInfo1, 0, 0); + Connect(cellStateIn, layer, lstmTensorInfo2, 0, 1); + Connect(outputStateIn, layer, lstmTensorInfo3, 0, 2); + Connect(layer, scratchBuffer, lstmTensorInfoScratchBuff, 0, 0); + Connect(layer, outputStateOut, lstmTensorInfo3, 1, 0); + Connect(layer, cellStateOut, lstmTensorInfo2, 2, 0); + Connect(layer, output, lstmTensorInfo3, 3, 0); +} + } BOOST_AUTO_TEST_SUITE(Optimizer) +using namespace armnn::optimizations; -BOOST_AUTO_TEST_CASE(OptimizeInversePermutes) +BOOST_AUTO_TEST_CASE(OptimizeInversePermutesTest) { armnn::Graph graph; @@ -42,7 +182,7 @@ BOOST_AUTO_TEST_CASE(OptimizeInversePermutes) graph.InsertNewLayer<armnn::InputLayer>(output->GetInputSlot(0), 0, "input"); - // Insert two permutes, one the inverse of the other + // Inserts two permutes, one the inverse of the other. graph.InsertNewLayer<armnn::PermuteLayer>(output->GetInputSlot(0), armnn::PermuteDescriptor({0, 2, 3, 1}), "perm0231"); @@ -57,16 +197,38 @@ BOOST_AUTO_TEST_CASE(OptimizeInversePermutes) &IsLayerOfType<armnn::PermuteLayer>, &IsLayerOfType<armnn::OutputLayer>)); - armnn::Optimizer::Optimize(graph); + armnn::Optimizer::Pass(graph, armnn::MakeOptimizations(OptimizeInversePermutes())); - // The permutes are removed + // The permutes are removed. BOOST_TEST(CheckSequence(graph.cbegin(), graph.cend(), &IsLayerOfType<armnn::InputLayer>, &IsLayerOfType<armnn::OutputLayer>)); } -BOOST_AUTO_TEST_CASE(MovePermuteUp) +BOOST_AUTO_TEST_CASE(LSTMValidateTensorShapesFromInputsCIFGDisabledTest) +{ + Graph graph; + + //Helper function creates graph containing LSTM layer with required input and output layers + CreateLSTMLayerHelper(graph, false); + + //This function used to call ValidateShapesFromInputs(); + BOOST_CHECK_NO_THROW(graph.InferTensorInfos()); +} + +BOOST_AUTO_TEST_CASE(LSTMValidateTensorShapesFromInputsCIFGEnabledTest) +{ + Graph graph; + + //Helper function creates graph containing LSTM layer with required input and output layers + CreateLSTMLayerHelper(graph, true); + + //This function used to call ValidateShapesFromInputs(); + BOOST_CHECK_NO_THROW(graph.InferTensorInfos()); +} + +BOOST_AUTO_TEST_CASE(MovePermuteUpTest) { const armnn::TensorInfo info({ 1, 5, 2, 3 }, armnn::DataType::Float32); const armnn::TensorInfo permuted({ 1, 3, 5, 2 }, armnn::DataType::Float32); @@ -77,12 +239,16 @@ BOOST_AUTO_TEST_CASE(MovePermuteUp) armnn::Layer* head = graph.AddLayer<armnn::OutputLayer>(0, "output"); + std::string permuteLayerName = "original_permute"; + // Insert permute head = graph.InsertNewLayer<armnn::PermuteLayer>(head->GetInputSlot(0), - armnn::PermuteDescriptor({ 0, 2, 3, 1 }), ""); + armnn::PermuteDescriptor({ 0, 2, 3, 1 }), + permuteLayerName.c_str()); + head->GetOutputHandler().SetTensorInfo(permuted); - // Insert layers that don't care about data format + // Inserts layers that don't care about data format. head = graph.InsertNewLayer<armnn::ActivationLayer>(head->GetInputSlot(0), armnn::ActivationDescriptor{}, ""); head->GetOutputHandler().SetTensorInfo(info); @@ -90,7 +256,7 @@ BOOST_AUTO_TEST_CASE(MovePermuteUp) head = graph.InsertNewLayer<armnn::AdditionLayer>(head->GetInputSlot(0), ""); head->GetOutputHandler().SetTensorInfo(info); - // Insert input for 2nd input of Addition + // Inserts input for 2nd input of Addition. graph.InsertNewLayer<armnn::InputLayer>(head->GetInputSlot(1), inputId++, "") ->GetOutputHandler().SetTensorInfo(info); @@ -107,11 +273,11 @@ BOOST_AUTO_TEST_CASE(MovePermuteUp) head = graph.InsertNewLayer<armnn::MultiplicationLayer>(head->GetInputSlot(0), ""); head->GetOutputHandler().SetTensorInfo(info); - // Insert input for 2nd input of Multiplication + // Inserts input for 2nd input of Multiplication. graph.InsertNewLayer<armnn::InputLayer>(head->GetInputSlot(1), inputId++, "") ->GetOutputHandler().SetTensorInfo(info); - // Insert input + // Inserts input. graph.InsertNewLayer<armnn::InputLayer>(head->GetInputSlot(0), inputId++, "") ->GetOutputHandler().SetTensorInfo(info); @@ -129,9 +295,9 @@ BOOST_AUTO_TEST_CASE(MovePermuteUp) &IsLayerOfType<armnn::PermuteLayer>, &IsLayerOfType<armnn::OutputLayer>)); - armnn::Optimizer::Optimize(graph); + armnn::Optimizer::Pass(graph, armnn::MakeOptimizations(MovePermuteUp())); - // The permute is moved to the top. New permutes for layers with multiple inputs + // The permute is moved to the top. New permutes for layers with multiple inputs. BOOST_TEST(CheckSequence(graph.cbegin(), graph.cend(), &IsLayerOfType<armnn::InputLayer>, @@ -147,12 +313,18 @@ BOOST_AUTO_TEST_CASE(MovePermuteUp) &IsLayerOfType<armnn::AdditionLayer>, &IsLayerOfType<armnn::ActivationLayer>, &IsLayerOfType<armnn::OutputLayer>)); + + std::list<std::string> testRelatedLayers = { permuteLayerName }; + + BOOST_TEST(CheckRelatedLayers<armnn::PermuteLayer>(graph, testRelatedLayers)); } -BOOST_AUTO_TEST_CASE(PermuteAsReshape) +BOOST_AUTO_TEST_CASE(PermuteAsReshapeTest) { armnn::Graph graph; + std::string permuteLayerName = "permute"; + const armnn::TensorInfo infoIn({ 1, 2, 3, 1 }, armnn::DataType::Float32); const armnn::TensorInfo infoOut({ 1, 1, 2, 3 }, armnn::DataType::Float32); @@ -161,9 +333,9 @@ BOOST_AUTO_TEST_CASE(PermuteAsReshape) graph.InsertNewLayer<armnn::InputLayer>(output->GetInputSlot(0), 0, "input") ->GetOutputHandler().SetTensorInfo(infoIn); - // Insert permute + // Inserts permute. graph.InsertNewLayer<armnn::PermuteLayer>(output->GetInputSlot(0), - armnn::PermuteDescriptor({ 0, 2, 3, 1 }), "") + armnn::PermuteDescriptor({ 0, 2, 3, 1 }), permuteLayerName.c_str()) ->GetOutputHandler().SetTensorInfo(infoOut); BOOST_TEST(CheckSequence(graph.cbegin(), @@ -172,7 +344,7 @@ BOOST_AUTO_TEST_CASE(PermuteAsReshape) &IsLayerOfType<armnn::PermuteLayer>, &IsLayerOfType<armnn::OutputLayer>)); - armnn::Optimizer::Optimize(graph); + armnn::Optimizer::Pass(graph, armnn::MakeOptimizations(PermuteAsReshape())); // The permute is replaced by an equivalent reshape. @@ -189,9 +361,13 @@ BOOST_AUTO_TEST_CASE(PermuteAsReshape) &IsLayerOfType<armnn::InputLayer>, checkReshape, &IsLayerOfType<armnn::OutputLayer>)); + + + std::list<std::string> testRelatedLayers = { permuteLayerName }; + BOOST_TEST(CheckRelatedLayers<armnn::ReshapeLayer>(graph, testRelatedLayers)); } -BOOST_AUTO_TEST_CASE(OptimizeConsecutiveReshapes) +BOOST_AUTO_TEST_CASE(OptimizeConsecutiveReshapesTest) { armnn::Graph graph; @@ -203,16 +379,19 @@ BOOST_AUTO_TEST_CASE(OptimizeConsecutiveReshapes) input->GetOutputHandler().SetTensorInfo(info0); { - // Insert two reshapes + // Inserts two reshapes. const armnn::TensorInfo info1({1, 30, 1, 1}, armnn::DataType::Float32); const armnn::TensorInfo info2({1, 2, 1, 15}, armnn::DataType::Float32); + std::string reshape1Name = "reshape1"; + std::string reshape2Name = "reshape2"; + auto reshape1 = graph.InsertNewLayer<armnn::ReshapeLayer>(output->GetInputSlot(0), armnn::ReshapeDescriptor{ info1.GetShape() }, - "reshape1"); + reshape1Name.c_str()); auto reshape2 = graph.InsertNewLayer<armnn::ReshapeLayer>(output->GetInputSlot(0), armnn::ReshapeDescriptor{ info2.GetShape() }, - "reshape2"); + reshape2Name.c_str()); reshape1->GetOutputHandler().SetTensorInfo(info1); reshape2->GetOutputHandler().SetTensorInfo(info2); @@ -224,7 +403,7 @@ BOOST_AUTO_TEST_CASE(OptimizeConsecutiveReshapes) &IsLayerOfType<armnn::ReshapeLayer>, &IsLayerOfType<armnn::OutputLayer>)); - armnn::Optimizer::Optimize(graph); + armnn::Optimizer::Pass(graph, armnn::MakeOptimizations(OptimizeConsecutiveReshapes())); auto checkReshape = [&info2](const armnn::Layer* const layer) -> bool { @@ -234,25 +413,30 @@ BOOST_AUTO_TEST_CASE(OptimizeConsecutiveReshapes) (reshapeLayer->GetOutputHandler().GetTensorInfo().GetShape() == info2.GetShape()); }; - // The two reshapes are replaced by a single equivalent reshape + // The two reshapes are replaced by a single equivalent reshape. BOOST_TEST(CheckSequence(graph.cbegin(), graph.cend(), &IsLayerOfType<armnn::InputLayer>, checkReshape, &IsLayerOfType<armnn::OutputLayer>)); + + // Check the new reshape layer has the other two reshapes as related layers + std::list<std::string> testRelatedLayers = { reshape2Name, reshape1Name }; + + BOOST_TEST(CheckRelatedLayers<armnn::ReshapeLayer>(graph, testRelatedLayers)); } { - // Insert a reshape to the input shape + // Inserts a reshape to the input shape. auto reshapeToIn = graph.InsertNewLayer<armnn::ReshapeLayer>(output->GetInputSlot(0), armnn::ReshapeDescriptor{ info0.GetShape() }, "reshapeToIn"); reshapeToIn->GetOutputHandler().SetTensorInfo(info0); - armnn::Optimizer::Optimize(graph); + armnn::Optimizer::Pass(graph, armnn::MakeOptimizations(OptimizeConsecutiveReshapes())); - // The two reshapes are removed + // The two reshapes are removed. BOOST_TEST(CheckSequence(graph.cbegin(), graph.cend(), &IsLayerOfType<armnn::InputLayer>, @@ -260,7 +444,7 @@ BOOST_AUTO_TEST_CASE(OptimizeConsecutiveReshapes) } } -BOOST_AUTO_TEST_CASE(SquashEqualSiblings) +BOOST_AUTO_TEST_CASE(SquashEqualSiblingsTest) { armnn::Graph graph; @@ -272,7 +456,7 @@ BOOST_AUTO_TEST_CASE(SquashEqualSiblings) auto input = graph.AddLayer<armnn::InputLayer>(0, "input"); input->GetOutputSlot().SetTensorInfo(info); - // Insert equal permutes, equal reshapes and something else + // Inserts equal permutes, equal reshapes and something else. const armnn::PermuteDescriptor permDesc({ 0, 2, 3, 1 }); const armnn::ReshapeDescriptor reshapeDesc{ { 1, 3, 1, 5 } }; @@ -314,7 +498,8 @@ BOOST_AUTO_TEST_CASE(SquashEqualSiblings) &IsLayerOfType<armnn::OutputLayer>, &IsLayerOfType<armnn::OutputLayer>)); - armnn::Optimizer::Optimize(graph); + armnn::Optimizer::Pass(graph, armnn::MakeOptimizations(SquashEqualPermuteSiblings(), + SquashEqualReshapeSiblings())); // The permutes and reshapes are squashed. @@ -331,4 +516,259 @@ BOOST_AUTO_TEST_CASE(SquashEqualSiblings) &IsLayerOfType<armnn::OutputLayer>)); } +BOOST_AUTO_TEST_CASE(ConvertConstantsHalfToFloatTest) +{ + armnn::Graph graph; + + const armnn::TensorInfo info({ 1,1,1,2 }, armnn::DataType::Float32); + + // Create the half precision input data + unsigned int dims[] = { 4,1,1,1 }; + std::vector<float> convWeightsData{1.f, 2.f, 3.f, 4.f}; + std::vector<uint16_t> halfWeights(4); + armnnUtils::FloatingPointConverter::ConvertFloat32To16(convWeightsData.data(), + convWeightsData.size(), + halfWeights.data()); + armnn::ConstTensor weights(armnn::TensorInfo(4, dims, armnn::DataType::Float16), halfWeights); + + //Create the simple test network + auto input = graph.AddLayer<armnn::InputLayer>(0, "input"); + input->GetOutputSlot().SetTensorInfo(info); + + auto fc = graph.AddLayer<armnn::FullyConnectedLayer>(armnn::FullyConnectedDescriptor(), "fc"); + fc->m_Weight = std::make_unique<armnn::ScopedCpuTensorHandle>(weights); + fc->GetOutputSlot().SetTensorInfo(info); + + auto output = graph.AddLayer<armnn::OutputLayer>(1, "output"); + + //Connect up the layers + input->GetOutputSlot().Connect(fc->GetInputSlot(0)); + fc->GetOutputSlot().Connect(output->GetInputSlot(0)); + + //Test the tensor info is correct. + BOOST_CHECK(fc->m_Weight->GetTensorInfo().GetDataType() == armnn::DataType::Float16); + + // Run the optimizer + armnn::Optimizer::Pass(graph, armnn::MakeOptimizations(ConvertConstantsHalfToFloat())); + + //Test the tensor info is correct. + BOOST_CHECK(fc->m_Weight->GetTensorInfo().GetDataType() == armnn::DataType::Float32); + + // Now test the data matches float32 data + float* data = fc->m_Weight->GetTensor<float>(); + BOOST_CHECK(1.0f == data[0]); + BOOST_CHECK(2.0f == data[1]); + BOOST_CHECK(3.0f == data[2]); + BOOST_CHECK(4.0f == data[3]); +} + +BOOST_AUTO_TEST_CASE(ConvertConstantsFloatToHalfTest) +{ + armnn::Graph graph; + + const armnn::TensorInfo info({ 1, 1, 1, 2 }, armnn::DataType::Float16); + + // Create const tensor from fp32 data + unsigned int dims[] = { 4, 1, 1, 1 }; + std::vector<float> floatWeights{ 1.0f, 2.0f, 3.0f, 4.0f }; + armnn::ConstTensor weights(armnn::TensorInfo(4, dims, armnn::DataType::Float32), floatWeights); + + // Create simple test network + auto input = graph.AddLayer<armnn::InputLayer>(0, "input"); + input->GetOutputSlot().SetTensorInfo(info); + + auto fc = graph.AddLayer<armnn::FullyConnectedLayer>(armnn::FullyConnectedDescriptor(), "fc"); + fc->m_Weight = std::make_unique<armnn::ScopedCpuTensorHandle>(weights); + fc->GetOutputSlot().SetTensorInfo(info); + + auto output = graph.AddLayer<armnn::OutputLayer>(1, "output"); + + // Connect up the layers + input->GetOutputSlot().Connect(fc->GetInputSlot(0)); + fc->GetOutputSlot().Connect(output->GetInputSlot(0)); + + // Check tensor data type before conversion + BOOST_CHECK(fc->m_Weight->GetTensorInfo().GetDataType() == armnn::DataType::Float32); + + // Run the optimizer + armnn::Optimizer::Pass(graph, armnn::MakeOptimizations(ConvertConstantsFloatToHalf())); + + // Check tensor data type after conversion + BOOST_CHECK(fc->m_Weight->GetTensorInfo().GetDataType() == armnn::DataType::Float16); + + // Check whether data matches expected fp16 data + Half* data = fc->m_Weight->GetTensor<Half>(); + BOOST_CHECK(data[0] == Half(1.0f)); + BOOST_CHECK(data[1] == Half(2.0f)); + BOOST_CHECK(data[2] == Half(3.0f)); + BOOST_CHECK(data[3] == Half(4.0f)); +} + +BOOST_AUTO_TEST_CASE(OptimizeInverseConversionsTest) +{ + armnn::Graph graph; + + auto output = graph.AddLayer<armnn::OutputLayer>(0, "output"); + + graph.InsertNewLayer<armnn::InputLayer>(output->GetInputSlot(0), 0, "input"); + + // Fp32ToFp16 conversion followed by an inverse Fp16ToFp32 conversion + graph.InsertNewLayer<armnn::ConvertFp32ToFp16Layer>(output->GetInputSlot(0), "convert1"); + graph.InsertNewLayer<armnn::ConvertFp16ToFp32Layer>(output->GetInputSlot(0), "convert2"); + + graph.InsertNewLayer<armnn::Convolution2dLayer>(output->GetInputSlot(0), Convolution2dDescriptor(), "conv"); + + // Fp16ToFp32 conversion followed by an inverse Fp32ToFp16 conversion + graph.InsertNewLayer<armnn::ConvertFp16ToFp32Layer>(output->GetInputSlot(0), "convert3"); + graph.InsertNewLayer<armnn::ConvertFp32ToFp16Layer>(output->GetInputSlot(0), "convert4"); + + BOOST_TEST(CheckSequence(graph.cbegin(), + graph.cend(), + &IsLayerOfType<armnn::InputLayer>, + &IsLayerOfType<armnn::ConvertFp32ToFp16Layer>, + &IsLayerOfType<armnn::ConvertFp16ToFp32Layer>, + &IsLayerOfType<armnn::Convolution2dLayer>, + &IsLayerOfType<armnn::ConvertFp16ToFp32Layer>, + &IsLayerOfType<armnn::ConvertFp32ToFp16Layer>, + &IsLayerOfType<armnn::OutputLayer>)); + + armnn::Optimizer::Pass(graph, armnn::MakeOptimizations(OptimizeInverseConversionsFp16(), + OptimizeInverseConversionsFp32())); + + // Check that all consecutive inverse conversions are removed + BOOST_TEST(CheckSequence(graph.cbegin(), + graph.cend(), + &IsLayerOfType<armnn::InputLayer>, + &IsLayerOfType<armnn::Convolution2dLayer>, + &IsLayerOfType<armnn::OutputLayer>)); +} + +BOOST_AUTO_TEST_CASE(InsertConvertersTest) +{ + const armnn::TensorInfo info({ 1, 5, 2, 3 }, armnn::DataType::Float16); + + armnn::Graph graph; + + armnn::LayerBindingId inputId = 0; + + armnn::Layer* head = graph.AddLayer<armnn::OutputLayer>(0, "output"); + + head = graph.InsertNewLayer<armnn::AdditionLayer>(head->GetInputSlot(0), ""); + head->GetOutputHandler().SetTensorInfo(info); + + graph.InsertNewLayer<armnn::InputLayer>(head->GetInputSlot(1), inputId++, "") + ->GetOutputHandler().SetTensorInfo(info); + + head = graph.InsertNewLayer<armnn::FloorLayer>(head->GetInputSlot(0), ""); + head->GetOutputHandler().SetTensorInfo(info); + + head = graph.InsertNewLayer<armnn::MemCopyLayer>(head->GetInputSlot(0), ""); + head->GetOutputHandler().SetTensorInfo(info); + + graph.InsertNewLayer<armnn::InputLayer>(head->GetInputSlot(0), inputId++, "") + ->GetOutputHandler().SetTensorInfo(info); + + // Check graph layer sequence before inserting convert layers + BOOST_TEST(CheckSequence(graph.cbegin(), + graph.cend(), + &IsLayerOfType<armnn::InputLayer>, + &IsLayerOfType<armnn::InputLayer>, + &IsLayerOfType<armnn::MemCopyLayer>, + &IsLayerOfType<armnn::FloorLayer>, + &IsLayerOfType<armnn::AdditionLayer>, + &IsLayerOfType<armnn::OutputLayer>)); + + // Check layers have Float16 DataType + for (auto& layer : graph) + { + if(layer->GetType()==LayerType::Floor || layer->GetType() == LayerType::Addition) + { + BOOST_ASSERT(layer->GetOutputSlot(0).GetTensorInfo().GetDataType() == DataType::Float16); + BOOST_ASSERT(layer->GetDataType() == DataType::Float16); + } + } + + // Insert convert layers either side of unsupported layer + for (auto& layer : graph) + { + if(layer->GetType()==LayerType::Floor || layer->GetType() == LayerType::Addition) + { + InsertConvertFp16ToFp32LayersBefore(graph, *layer); + InsertConvertFp32ToFp16LayersAfter(graph, *layer); + } + } + + // Check layers have correct DataType after inserting convert layers + for (auto& layer : graph) + { + if (layer->GetType()==LayerType::Floor || layer->GetType() == LayerType::Addition) + { + BOOST_ASSERT(layer->GetOutputSlot(0).GetTensorInfo().GetDataType() == DataType::Float32); + BOOST_ASSERT(layer->GetDataType() == DataType::Float32); + } + else if (layer->GetType() == LayerType::ConvertFp16ToFp32) + { + BOOST_ASSERT(layer->GetOutputSlot(0).GetTensorInfo().GetDataType() == DataType::Float32); + BOOST_ASSERT(layer->GetDataType() == DataType::Float16); + } + else if (layer->GetType() == LayerType::ConvertFp32ToFp16) + { + BOOST_ASSERT(layer->GetOutputSlot(0).GetTensorInfo().GetDataType() == DataType::Float16); + BOOST_ASSERT(layer->GetDataType() == DataType::Float32); + } + } + + // Check sequence of layers after inserting convert layers + BOOST_TEST(CheckSequence(graph.cbegin(), + graph.cend(), + &IsLayerOfType<armnn::InputLayer>, + &IsLayerOfType<armnn::InputLayer>, + &IsLayerOfType<armnn::ConvertFp16ToFp32Layer>, + &IsLayerOfType<armnn::MemCopyLayer>, + &IsLayerOfType<armnn::ConvertFp16ToFp32Layer>, + &IsLayerOfType<armnn::FloorLayer>, + &IsLayerOfType<armnn::ConvertFp32ToFp16Layer>, + &IsLayerOfType<armnn::ConvertFp16ToFp32Layer>, + &IsLayerOfType<armnn::AdditionLayer>, + &IsLayerOfType<armnn::ConvertFp32ToFp16Layer>, + &IsLayerOfType<armnn::OutputLayer>)); +} + +BOOST_AUTO_TEST_CASE(Fp32NetworkToFp16OptimizationTest) +{ + armnn::Graph graph; + + const armnn::TensorInfo infoFP32({ 2,2,1,3 }, armnn::DataType::Float32); + + // Create the simple test network + auto input = graph.AddLayer<armnn::InputLayer>(0, "input"); + input->GetOutputSlot().SetTensorInfo(infoFP32); + + auto floor = graph.AddLayer<armnn::FloorLayer>("floor"); + floor->GetOutputSlot().SetTensorInfo(infoFP32); + + auto output = graph.AddLayer<armnn::OutputLayer>(1, "output"); + + // Connect up the layers + input->GetOutputSlot().Connect(floor->GetInputSlot(0)); + floor->GetOutputSlot().Connect(output->GetInputSlot(0)); + + BOOST_TEST(CheckSequence(graph.cbegin(), + graph.cend(), + &IsLayerOfType<armnn::InputLayer>, + &IsLayerOfType<armnn::FloorLayer>, + &IsLayerOfType<armnn::OutputLayer>)); + + // Run the optimizer + armnn::Optimizer::Pass(graph, armnn::MakeOptimizations(Fp32NetworkToFp16Converter())); + + BOOST_TEST(CheckSequence(graph.cbegin(), + graph.cend(), + &IsLayerOfType<armnn::InputLayer>, + &IsLayerOfType<armnn::ConvertFp32ToFp16Layer>, + &IsLayerOfType<armnn::FloorLayer>, + &IsLayerOfType<armnn::ConvertFp16ToFp32Layer>, + &IsLayerOfType<armnn::OutputLayer>)); +} + BOOST_AUTO_TEST_SUITE_END() |