From 6fff4f4c1ce780ff90163e3a0352d7da204f2b1b Mon Sep 17 00:00:00 2001 From: Teresa Charlin Date: Sat, 31 Oct 2020 13:21:01 +0000 Subject: IVGCVSW-5476 Fix Fuse_batchNorm_into_Conv2D_Float32_Test * failing with no backends provided Signed-off-by: Teresa Charlin Change-Id: I55ebfc52268ad667e495831c64977338d003db99 --- CMakeLists.txt | 1 + src/armnn/test/OptimizerTests.cpp | 169 +-------------------- .../test/optimizations/FuseBatchNormTests.cpp | 163 ++++++++++++++++++++ 3 files changed, 166 insertions(+), 167 deletions(-) create mode 100644 src/armnn/test/optimizations/FuseBatchNormTests.cpp diff --git a/CMakeLists.txt b/CMakeLists.txt index a6d88e096c..c2e394dae7 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -645,6 +645,7 @@ if(BUILD_UNIT_TESTS) src/armnn/test/optimizations/ConvertConstantsHalfToFloatTests.cpp src/armnn/test/optimizations/Fp32NetworkToBf16ConverterTests.cpp src/armnn/test/optimizations/Fp32NetworkToFp16ConverterTests.cpp + src/armnn/test/optimizations/FuseBatchNormTests.cpp src/armnn/test/optimizations/InsertDebugLayerTests.cpp src/armnn/test/optimizations/MovePermuteUpTests.cpp src/armnn/test/optimizations/MoveTransposeUpTests.cpp diff --git a/src/armnn/test/OptimizerTests.cpp b/src/armnn/test/OptimizerTests.cpp index 879905bda8..8845dae6c8 100644 --- a/src/armnn/test/OptimizerTests.cpp +++ b/src/armnn/test/OptimizerTests.cpp @@ -798,6 +798,7 @@ BOOST_AUTO_TEST_CASE(BackendHintTest) } } +// Tests that OptimizeForExclusiveConnections works, fusing when needed, using BatchNorm fusing as example BOOST_AUTO_TEST_CASE(OptimizeForExclusiveConnections_fuse_Test) { using namespace armnn; @@ -882,6 +883,7 @@ BOOST_AUTO_TEST_CASE(OptimizeForExclusiveConnections_fuse_Test) &IsLayerOfType)); } +// Tests that OptimizeForExclusiveConnections works, not fusing when not needed, using BatchNorm fusing as example BOOST_AUTO_TEST_CASE(OptimizeForExclusiveConnections_notFuse_Test) { // Define the network @@ -921,171 +923,4 @@ BOOST_AUTO_TEST_CASE(OptimizeForExclusiveConnections_notFuse_Test) &IsLayerOfType, &IsLayerOfType)); } - -BOOST_AUTO_TEST_CASE(Fuse_batchNorm_into_Conv2D_Float32_Test) -{ - using namespace armnn; - - // Define layers information - Convolution2dDescriptor convolution2dDescriptor; - convolution2dDescriptor.m_BiasEnabled = false; - convolution2dDescriptor.m_DataLayout = DataLayout::NHWC; - convolution2dDescriptor.m_StrideX = 1; - convolution2dDescriptor.m_StrideY = 1; - BatchNormalizationDescriptor batchNormDescriptor; - batchNormDescriptor.m_DataLayout = DataLayout::NHWC; - - const unsigned int inputDimensionSizes[] = {1, 4, 4, 3}; // NHWCin - const unsigned int weightsDimensionSizes[] = {4, 2, 2, 3}; // CoutHWCin - const unsigned int outputDimensionSizes[] = {1, 3, 3, 4}; // NHWCout - const unsigned int outputChannelSize[] = {outputDimensionSizes[3]}; // Cout - - TensorInfo inputInfo (4, inputDimensionSizes, DataType::Float32); - TensorInfo outputInfo(4, outputDimensionSizes, DataType::Float32); - - std::vector weightsVector = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, - 11, 12, 13, 14, 15, 16, 17, 18, 19, 110, 111, 112, - 21, 22, 23, 24, 25, 26, 27, 28, 29, 210, 211, 212, - 31, 32, 33, 34, 35, 36, 37, 38, 39, 310, 311, 312}; - TensorInfo weightsInfo(4, weightsDimensionSizes, DataType::Float32); - ConstTensor weights (weightsInfo, weightsVector); - std::vector biasVector = {3.3f, 3.2f, 3.1f, 3.0f}; - TensorInfo biasInfo(1, outputChannelSize, DataType::Float32); - ConstTensor bias (biasInfo, biasVector); - Optional optionalBias = Optional(bias); - - std::vector betaVector = {0.0f, 0.2f, 0.3f, 0.4f}; - std::vector gammaVector = {0.5f, 0.6f, 0.7f, 0.8f}; - std::vector meanVector = {0.1f, 0.2f, 0.3f, 0.4f}; - std::vector varianceVector = {1.0f, 1.1f, 1.2f, 1.3f}; - ConstTensor beta (TensorInfo(1, outputChannelSize, DataType::Float32), betaVector); - ConstTensor gamma (TensorInfo(1, outputChannelSize, DataType::Float32), gammaVector); - ConstTensor mean (TensorInfo(1, outputChannelSize, DataType::Float32), meanVector); - ConstTensor variance(TensorInfo(1, outputChannelSize, DataType::Float32), varianceVector); - - auto inputSize = inputDimensionSizes[0]*inputDimensionSizes[1]*inputDimensionSizes[2]*inputDimensionSizes[3]; - auto outputSize = outputDimensionSizes[0]*outputDimensionSizes[1]*outputDimensionSizes[2]*outputDimensionSizes[3]; - - // FIRST NETWORK: Fused - - // Construct ArmNN network - NetworkId networkIdentifier; - INetworkPtr network = INetwork::Create(); - IConnectableLayer *inputLayer = network->AddInputLayer(0); - IConnectableLayer *convLayer = network->AddConvolution2dLayer(convolution2dDescriptor, - weights, - optionalBias, - "convolution"); - IConnectableLayer *batchNormLayer = network->AddBatchNormalizationLayer(batchNormDescriptor, - mean, - variance, - beta, - gamma, - "batchNorm"); - IConnectableLayer *outputLayer = network->AddOutputLayer(0); - - inputLayer ->GetOutputSlot(0).Connect(convLayer ->GetInputSlot(0)); - convLayer ->GetOutputSlot(0).Connect(batchNormLayer->GetInputSlot(0)); - batchNormLayer ->GetOutputSlot(0).Connect(outputLayer ->GetInputSlot(0)); - - // Create ArmNN runtime - IRuntime::CreationOptions options; // default options - IRuntimePtr run = IRuntime::Create(options); - - //Set the tensors in the network. - inputLayer ->GetOutputSlot(0).SetTensorInfo(inputInfo); - convLayer ->GetOutputSlot(0).SetTensorInfo(outputInfo); - batchNormLayer ->GetOutputSlot(0).SetTensorInfo(outputInfo); - - // Optimise ArmNN network - IOptimizedNetworkPtr optNet = Optimize(*network, {Compute::CpuRef}, run->GetDeviceSpec()); - if (!optNet) - { - // This shouldn't happen for this simple sample, with reference backend. - // But in general usage Optimize could fail if the hardware at runtime cannot - // support the model that has been provided. - std::cerr << "Error: Failed to optimise the input network." << std::endl; - } - - // Load graph into runtime - run->LoadNetwork(networkIdentifier, std::move(optNet)); - - //Creates structures for inputs and outputs. - std::vector inputData(inputSize, 128); - std::vector outputData(outputSize); - - InputTensors inputTensors {{0, ConstTensor(run->GetInputTensorInfo (networkIdentifier, 0), inputData.data())}}; - OutputTensors outputTensors{{0, Tensor(run->GetOutputTensorInfo(networkIdentifier, 0), outputData.data())}}; - - - // Execute network - run->EnqueueWorkload(networkIdentifier, inputTensors, outputTensors); - - // SECOND NETWORK: NotFused - - // Construct ArmNN network - NetworkId networkIdentifierNotFused; - INetworkPtr networkNotFused = INetwork::Create(); - IConnectableLayer *inputLayerNotFused = networkNotFused->AddInputLayer(0); - IConnectableLayer *convLayerNotFused = networkNotFused->AddConvolution2dLayer(convolution2dDescriptor, - weights, - optionalBias, - "convolution"); - IConnectableLayer *batchNormLayerNotFused = networkNotFused->AddBatchNormalizationLayer(batchNormDescriptor, - mean, - variance, - beta, - gamma, - "batchNorm"); - IConnectableLayer *outputLayerNotFused = networkNotFused->AddOutputLayer(0); - IConnectableLayer *output2LayerNotFused = networkNotFused->AddOutputLayer(1); - - - inputLayerNotFused ->GetOutputSlot(0).Connect(convLayerNotFused ->GetInputSlot(0)); - convLayerNotFused ->GetOutputSlot(0).Connect(batchNormLayerNotFused->GetInputSlot(0)); - batchNormLayerNotFused ->GetOutputSlot(0).Connect(outputLayerNotFused ->GetInputSlot(0)); - convLayerNotFused ->GetOutputSlot(0).Connect(output2LayerNotFused ->GetInputSlot(0)); - - // Create ArmNN runtime - IRuntimePtr runNotFused = IRuntime::Create(options); - - //Set the tensors in the network. - inputLayerNotFused ->GetOutputSlot(0).SetTensorInfo(inputInfo); - convLayerNotFused ->GetOutputSlot(0).SetTensorInfo(outputInfo); - batchNormLayerNotFused ->GetOutputSlot(0).SetTensorInfo(outputInfo); - - // Optimise ArmNN network - IOptimizedNetworkPtr optNetNotFused = Optimize(*networkNotFused, {Compute::CpuRef}, runNotFused->GetDeviceSpec()); - if (!optNetNotFused) - { - // This shouldn't happen for this simple sample, with reference backend. - // But in general usage Optimize could fail if the hardware at runtime cannot - // support the model that has been provided. - std::cerr << "Error: Failed to optimise the input network." << std::endl; - } - - // Load graph into runtime - runNotFused->LoadNetwork(networkIdentifierNotFused, std::move(optNetNotFused)); - - //Creates structures for inputs and outputs. - std::vector inputDataNotFused(inputSize, 128); - std::vector outputDataNotFused(outputSize); - std::vector outputData2NotFused(outputSize); - - InputTensors inputTensorsNotFused{ - {0, ConstTensor(runNotFused->GetInputTensorInfo(networkIdentifierNotFused, 0), inputDataNotFused.data())}}; - OutputTensors outputTensorsNotFused{ - {0, Tensor(runNotFused->GetOutputTensorInfo(networkIdentifierNotFused, 0), outputDataNotFused.data())}, - {1, Tensor(runNotFused->GetOutputTensorInfo(networkIdentifierNotFused, 1), outputData2NotFused.data())}}; - - // Execute network - runNotFused->EnqueueWorkload(networkIdentifierNotFused, inputTensorsNotFused, outputTensorsNotFused); - - // Check the output of the fused-convolution matches with the output of the batchNormm in the "NotFused" network - for (unsigned int n = 0; n < outputData.size(); ++n) - { - BOOST_CHECK_CLOSE(outputData[n], outputDataNotFused[n], 0.001); - } -} - BOOST_AUTO_TEST_SUITE_END() diff --git a/src/armnn/test/optimizations/FuseBatchNormTests.cpp b/src/armnn/test/optimizations/FuseBatchNormTests.cpp new file mode 100644 index 0000000000..74cb8f96b7 --- /dev/null +++ b/src/armnn/test/optimizations/FuseBatchNormTests.cpp @@ -0,0 +1,163 @@ +// +// Copyright © 2020 Arm Ltd and Contributors. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "LayersFwd.hpp" +#include + +BOOST_AUTO_TEST_SUITE(Optimizer) +using namespace armnn; + +// This unit test needs the reference backend, it's not available if the reference backend is not built +#if defined(ARMNNREF_ENABLED) +BOOST_AUTO_TEST_CASE(Fuse_batchNorm_into_Conv2D_Float32_Test) +{ + // Define layers information + Convolution2dDescriptor convolution2dDescriptor; + convolution2dDescriptor.m_BiasEnabled = false; + convolution2dDescriptor.m_DataLayout = DataLayout::NHWC; + convolution2dDescriptor.m_StrideX = 1; + convolution2dDescriptor.m_StrideY = 1; + BatchNormalizationDescriptor batchNormDescriptor; + batchNormDescriptor.m_DataLayout = DataLayout::NHWC; + + const unsigned int inputDimensionSizes[] = {1, 4, 4, 3}; // NHWCin + const unsigned int weightsDimensionSizes[] = {4, 2, 2, 3}; // CoutHWCin + const unsigned int outputDimensionSizes[] = {1, 3, 3, 4}; // NHWCout + const unsigned int outputChannelSize[] = {outputDimensionSizes[3]}; // Cout + + TensorInfo inputInfo (4, inputDimensionSizes, DataType::Float32); + TensorInfo outputInfo(4, outputDimensionSizes, DataType::Float32); + + std::vector weightsVector = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, + 11, 12, 13, 14, 15, 16, 17, 18, 19, 110, 111, 112, + 21, 22, 23, 24, 25, 26, 27, 28, 29, 210, 211, 212, + 31, 32, 33, 34, 35, 36, 37, 38, 39, 310, 311, 312}; + TensorInfo weightsInfo(4, weightsDimensionSizes, DataType::Float32); + ConstTensor weights (weightsInfo, weightsVector); + std::vector biasVector = {3.3f, 3.2f, 3.1f, 3.0f}; + TensorInfo biasInfo(1, outputChannelSize, DataType::Float32); + ConstTensor bias (biasInfo, biasVector); + Optional optionalBias = Optional(bias); + + std::vector betaVector = {0.0f, 0.2f, 0.3f, 0.4f}; + std::vector gammaVector = {0.5f, 0.6f, 0.7f, 0.8f}; + std::vector meanVector = {0.1f, 0.2f, 0.3f, 0.4f}; + std::vector varianceVector = {1.0f, 1.1f, 1.2f, 1.3f}; + ConstTensor beta (TensorInfo(1, outputChannelSize, DataType::Float32), betaVector); + ConstTensor gamma (TensorInfo(1, outputChannelSize, DataType::Float32), gammaVector); + ConstTensor mean (TensorInfo(1, outputChannelSize, DataType::Float32), meanVector); + ConstTensor variance(TensorInfo(1, outputChannelSize, DataType::Float32), varianceVector); + + auto inputSize = inputDimensionSizes[0]*inputDimensionSizes[1]*inputDimensionSizes[2]*inputDimensionSizes[3]; + auto outputSize = outputDimensionSizes[0]*outputDimensionSizes[1]*outputDimensionSizes[2]*outputDimensionSizes[3]; + + // FIRST NETWORK: Fused + + // Construct ArmNN network + NetworkId networkIdentifier; + INetworkPtr network = INetwork::Create(); + IConnectableLayer *inputLayer = network->AddInputLayer(0); + IConnectableLayer *convLayer = network->AddConvolution2dLayer(convolution2dDescriptor, + weights, + optionalBias, + "convolution"); + IConnectableLayer *batchNormLayer = network->AddBatchNormalizationLayer(batchNormDescriptor, + mean, + variance, + beta, + gamma, + "batchNorm"); + IConnectableLayer *outputLayer = network->AddOutputLayer(0); + + inputLayer ->GetOutputSlot(0).Connect(convLayer ->GetInputSlot(0)); + convLayer ->GetOutputSlot(0).Connect(batchNormLayer->GetInputSlot(0)); + batchNormLayer ->GetOutputSlot(0).Connect(outputLayer ->GetInputSlot(0)); + + //Set the tensors in the network. + inputLayer ->GetOutputSlot(0).SetTensorInfo(inputInfo); + convLayer ->GetOutputSlot(0).SetTensorInfo(outputInfo); + batchNormLayer ->GetOutputSlot(0).SetTensorInfo(outputInfo); + + // Create ArmNN runtime + IRuntime::CreationOptions options; // default options + IRuntimePtr run = IRuntime::Create(options); + + // Optimise ArmNN network + IOptimizedNetworkPtr optNet = Optimize(*network, {Compute::CpuRef}, run->GetDeviceSpec()); + + // Load graph into runtime + BOOST_TEST(run->LoadNetwork(networkIdentifier, std::move(optNet)) == Status::Success); + + //Creates structures for inputs and outputs. + std::vector inputData(inputSize, 128); + std::vector outputData(outputSize); + + InputTensors inputTensors {{0, ConstTensor(run->GetInputTensorInfo (networkIdentifier, 0), inputData.data())}}; + OutputTensors outputTensors{{0, Tensor(run->GetOutputTensorInfo(networkIdentifier, 0), outputData.data())}}; + + // Execute network + run->EnqueueWorkload(networkIdentifier, inputTensors, outputTensors); + + // SECOND NETWORK: NotFused + + // Construct ArmNN network + NetworkId networkIdentifierNotFused; + INetworkPtr networkNotFused = INetwork::Create(); + IConnectableLayer *inputLayerNotFused = networkNotFused->AddInputLayer(0); + IConnectableLayer *convLayerNotFused = networkNotFused->AddConvolution2dLayer(convolution2dDescriptor, + weights, + optionalBias, + "convolution"); + IConnectableLayer *batchNormLayerNotFused = networkNotFused->AddBatchNormalizationLayer(batchNormDescriptor, + mean, + variance, + beta, + gamma, + "batchNorm"); + IConnectableLayer *outputLayerNotFused = networkNotFused->AddOutputLayer(0); + IConnectableLayer *output2LayerNotFused = networkNotFused->AddOutputLayer(1); + + inputLayerNotFused ->GetOutputSlot(0).Connect(convLayerNotFused ->GetInputSlot(0)); + convLayerNotFused ->GetOutputSlot(0).Connect(batchNormLayerNotFused->GetInputSlot(0)); + batchNormLayerNotFused ->GetOutputSlot(0).Connect(outputLayerNotFused ->GetInputSlot(0)); + convLayerNotFused ->GetOutputSlot(0).Connect(output2LayerNotFused ->GetInputSlot(0)); + + //Set the tensors in the network. + inputLayerNotFused ->GetOutputSlot(0).SetTensorInfo(inputInfo); + convLayerNotFused ->GetOutputSlot(0).SetTensorInfo(outputInfo); + batchNormLayerNotFused ->GetOutputSlot(0).SetTensorInfo(outputInfo); + + // Create ArmNN runtime + IRuntimePtr runNotFused = IRuntime::Create(options); + + // Optimise ArmNN network + IOptimizedNetworkPtr optNetNotFused = Optimize(*networkNotFused, {Compute::CpuRef}, runNotFused->GetDeviceSpec()); + + // Load graph into runtime + BOOST_TEST(runNotFused->LoadNetwork(networkIdentifierNotFused, std::move(optNetNotFused)) == Status::Success); + + //Creates structures for inputs and outputs. + std::vector inputDataNotFused(inputSize, 128); + std::vector outputDataNotFused(outputSize); + std::vector outputData2NotFused(outputSize); + + InputTensors inputTensorsNotFused{ + {0, ConstTensor(runNotFused->GetInputTensorInfo(networkIdentifierNotFused, 0), inputDataNotFused.data())}}; + OutputTensors outputTensorsNotFused{ + {0, Tensor(runNotFused->GetOutputTensorInfo(networkIdentifierNotFused, 0), outputDataNotFused.data())}, + {1, Tensor(runNotFused->GetOutputTensorInfo(networkIdentifierNotFused, 1), outputData2NotFused.data())}}; + + // Execute network + runNotFused->EnqueueWorkload(networkIdentifierNotFused, inputTensorsNotFused, outputTensorsNotFused); + + // Check the output of the fused-convolution matches with the output of the batchNormm in the "NotFused" network + for (unsigned int n = 0; n < outputData.size(); ++n) + { + BOOST_CHECK_CLOSE(outputData[n], outputDataNotFused[n], 0.001); + } +} +#endif + +BOOST_AUTO_TEST_SUITE_END() -- cgit v1.2.1