aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorTeresa Charlin <teresa.charlinreyes@arm.com>2020-10-31 13:21:01 +0000
committerJim Flynn <jim.flynn@arm.com>2020-11-02 09:49:19 +0000
commit6fff4f4c1ce780ff90163e3a0352d7da204f2b1b (patch)
tree4a85ab37207d66db8ffab6d837faf73d1c2a440b
parent1973b09c042e954765def837927eb1bb77248aed (diff)
downloadarmnn-6fff4f4c1ce780ff90163e3a0352d7da204f2b1b.tar.gz
IVGCVSW-5476 Fix Fuse_batchNorm_into_Conv2D_Float32_Test
* failing with no backends provided Signed-off-by: Teresa Charlin <teresa.charlinreyes@arm.com> Change-Id: I55ebfc52268ad667e495831c64977338d003db99
-rw-r--r--CMakeLists.txt1
-rw-r--r--src/armnn/test/OptimizerTests.cpp169
-rw-r--r--src/armnn/test/optimizations/FuseBatchNormTests.cpp163
3 files changed, 166 insertions, 167 deletions
diff --git a/CMakeLists.txt b/CMakeLists.txt
index a6d88e096c..c2e394dae7 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -645,6 +645,7 @@ if(BUILD_UNIT_TESTS)
src/armnn/test/optimizations/ConvertConstantsHalfToFloatTests.cpp
src/armnn/test/optimizations/Fp32NetworkToBf16ConverterTests.cpp
src/armnn/test/optimizations/Fp32NetworkToFp16ConverterTests.cpp
+ src/armnn/test/optimizations/FuseBatchNormTests.cpp
src/armnn/test/optimizations/InsertDebugLayerTests.cpp
src/armnn/test/optimizations/MovePermuteUpTests.cpp
src/armnn/test/optimizations/MoveTransposeUpTests.cpp
diff --git a/src/armnn/test/OptimizerTests.cpp b/src/armnn/test/OptimizerTests.cpp
index 879905bda8..8845dae6c8 100644
--- a/src/armnn/test/OptimizerTests.cpp
+++ b/src/armnn/test/OptimizerTests.cpp
@@ -798,6 +798,7 @@ BOOST_AUTO_TEST_CASE(BackendHintTest)
}
}
+// Tests that OptimizeForExclusiveConnections works, fusing when needed, using BatchNorm fusing as example
BOOST_AUTO_TEST_CASE(OptimizeForExclusiveConnections_fuse_Test)
{
using namespace armnn;
@@ -882,6 +883,7 @@ BOOST_AUTO_TEST_CASE(OptimizeForExclusiveConnections_fuse_Test)
&IsLayerOfType<OutputLayer>));
}
+// Tests that OptimizeForExclusiveConnections works, not fusing when not needed, using BatchNorm fusing as example
BOOST_AUTO_TEST_CASE(OptimizeForExclusiveConnections_notFuse_Test)
{
// Define the network
@@ -921,171 +923,4 @@ BOOST_AUTO_TEST_CASE(OptimizeForExclusiveConnections_notFuse_Test)
&IsLayerOfType<armnn::OutputLayer>,
&IsLayerOfType<armnn::OutputLayer>));
}
-
-BOOST_AUTO_TEST_CASE(Fuse_batchNorm_into_Conv2D_Float32_Test)
-{
- using namespace armnn;
-
- // Define layers information
- Convolution2dDescriptor convolution2dDescriptor;
- convolution2dDescriptor.m_BiasEnabled = false;
- convolution2dDescriptor.m_DataLayout = DataLayout::NHWC;
- convolution2dDescriptor.m_StrideX = 1;
- convolution2dDescriptor.m_StrideY = 1;
- BatchNormalizationDescriptor batchNormDescriptor;
- batchNormDescriptor.m_DataLayout = DataLayout::NHWC;
-
- const unsigned int inputDimensionSizes[] = {1, 4, 4, 3}; // NHWCin
- const unsigned int weightsDimensionSizes[] = {4, 2, 2, 3}; // CoutHWCin
- const unsigned int outputDimensionSizes[] = {1, 3, 3, 4}; // NHWCout
- const unsigned int outputChannelSize[] = {outputDimensionSizes[3]}; // Cout
-
- TensorInfo inputInfo (4, inputDimensionSizes, DataType::Float32);
- TensorInfo outputInfo(4, outputDimensionSizes, DataType::Float32);
-
- std::vector<float> weightsVector = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12,
- 11, 12, 13, 14, 15, 16, 17, 18, 19, 110, 111, 112,
- 21, 22, 23, 24, 25, 26, 27, 28, 29, 210, 211, 212,
- 31, 32, 33, 34, 35, 36, 37, 38, 39, 310, 311, 312};
- TensorInfo weightsInfo(4, weightsDimensionSizes, DataType::Float32);
- ConstTensor weights (weightsInfo, weightsVector);
- std::vector<float> biasVector = {3.3f, 3.2f, 3.1f, 3.0f};
- TensorInfo biasInfo(1, outputChannelSize, DataType::Float32);
- ConstTensor bias (biasInfo, biasVector);
- Optional<ConstTensor> optionalBias = Optional<ConstTensor>(bias);
-
- std::vector<float> betaVector = {0.0f, 0.2f, 0.3f, 0.4f};
- std::vector<float> gammaVector = {0.5f, 0.6f, 0.7f, 0.8f};
- std::vector<float> meanVector = {0.1f, 0.2f, 0.3f, 0.4f};
- std::vector<float> varianceVector = {1.0f, 1.1f, 1.2f, 1.3f};
- ConstTensor beta (TensorInfo(1, outputChannelSize, DataType::Float32), betaVector);
- ConstTensor gamma (TensorInfo(1, outputChannelSize, DataType::Float32), gammaVector);
- ConstTensor mean (TensorInfo(1, outputChannelSize, DataType::Float32), meanVector);
- ConstTensor variance(TensorInfo(1, outputChannelSize, DataType::Float32), varianceVector);
-
- auto inputSize = inputDimensionSizes[0]*inputDimensionSizes[1]*inputDimensionSizes[2]*inputDimensionSizes[3];
- auto outputSize = outputDimensionSizes[0]*outputDimensionSizes[1]*outputDimensionSizes[2]*outputDimensionSizes[3];
-
- // FIRST NETWORK: Fused
-
- // Construct ArmNN network
- NetworkId networkIdentifier;
- INetworkPtr network = INetwork::Create();
- IConnectableLayer *inputLayer = network->AddInputLayer(0);
- IConnectableLayer *convLayer = network->AddConvolution2dLayer(convolution2dDescriptor,
- weights,
- optionalBias,
- "convolution");
- IConnectableLayer *batchNormLayer = network->AddBatchNormalizationLayer(batchNormDescriptor,
- mean,
- variance,
- beta,
- gamma,
- "batchNorm");
- IConnectableLayer *outputLayer = network->AddOutputLayer(0);
-
- inputLayer ->GetOutputSlot(0).Connect(convLayer ->GetInputSlot(0));
- convLayer ->GetOutputSlot(0).Connect(batchNormLayer->GetInputSlot(0));
- batchNormLayer ->GetOutputSlot(0).Connect(outputLayer ->GetInputSlot(0));
-
- // Create ArmNN runtime
- IRuntime::CreationOptions options; // default options
- IRuntimePtr run = IRuntime::Create(options);
-
- //Set the tensors in the network.
- inputLayer ->GetOutputSlot(0).SetTensorInfo(inputInfo);
- convLayer ->GetOutputSlot(0).SetTensorInfo(outputInfo);
- batchNormLayer ->GetOutputSlot(0).SetTensorInfo(outputInfo);
-
- // Optimise ArmNN network
- IOptimizedNetworkPtr optNet = Optimize(*network, {Compute::CpuRef}, run->GetDeviceSpec());
- if (!optNet)
- {
- // This shouldn't happen for this simple sample, with reference backend.
- // But in general usage Optimize could fail if the hardware at runtime cannot
- // support the model that has been provided.
- std::cerr << "Error: Failed to optimise the input network." << std::endl;
- }
-
- // Load graph into runtime
- run->LoadNetwork(networkIdentifier, std::move(optNet));
-
- //Creates structures for inputs and outputs.
- std::vector<float> inputData(inputSize, 128);
- std::vector<float> outputData(outputSize);
-
- InputTensors inputTensors {{0, ConstTensor(run->GetInputTensorInfo (networkIdentifier, 0), inputData.data())}};
- OutputTensors outputTensors{{0, Tensor(run->GetOutputTensorInfo(networkIdentifier, 0), outputData.data())}};
-
-
- // Execute network
- run->EnqueueWorkload(networkIdentifier, inputTensors, outputTensors);
-
- // SECOND NETWORK: NotFused
-
- // Construct ArmNN network
- NetworkId networkIdentifierNotFused;
- INetworkPtr networkNotFused = INetwork::Create();
- IConnectableLayer *inputLayerNotFused = networkNotFused->AddInputLayer(0);
- IConnectableLayer *convLayerNotFused = networkNotFused->AddConvolution2dLayer(convolution2dDescriptor,
- weights,
- optionalBias,
- "convolution");
- IConnectableLayer *batchNormLayerNotFused = networkNotFused->AddBatchNormalizationLayer(batchNormDescriptor,
- mean,
- variance,
- beta,
- gamma,
- "batchNorm");
- IConnectableLayer *outputLayerNotFused = networkNotFused->AddOutputLayer(0);
- IConnectableLayer *output2LayerNotFused = networkNotFused->AddOutputLayer(1);
-
-
- inputLayerNotFused ->GetOutputSlot(0).Connect(convLayerNotFused ->GetInputSlot(0));
- convLayerNotFused ->GetOutputSlot(0).Connect(batchNormLayerNotFused->GetInputSlot(0));
- batchNormLayerNotFused ->GetOutputSlot(0).Connect(outputLayerNotFused ->GetInputSlot(0));
- convLayerNotFused ->GetOutputSlot(0).Connect(output2LayerNotFused ->GetInputSlot(0));
-
- // Create ArmNN runtime
- IRuntimePtr runNotFused = IRuntime::Create(options);
-
- //Set the tensors in the network.
- inputLayerNotFused ->GetOutputSlot(0).SetTensorInfo(inputInfo);
- convLayerNotFused ->GetOutputSlot(0).SetTensorInfo(outputInfo);
- batchNormLayerNotFused ->GetOutputSlot(0).SetTensorInfo(outputInfo);
-
- // Optimise ArmNN network
- IOptimizedNetworkPtr optNetNotFused = Optimize(*networkNotFused, {Compute::CpuRef}, runNotFused->GetDeviceSpec());
- if (!optNetNotFused)
- {
- // This shouldn't happen for this simple sample, with reference backend.
- // But in general usage Optimize could fail if the hardware at runtime cannot
- // support the model that has been provided.
- std::cerr << "Error: Failed to optimise the input network." << std::endl;
- }
-
- // Load graph into runtime
- runNotFused->LoadNetwork(networkIdentifierNotFused, std::move(optNetNotFused));
-
- //Creates structures for inputs and outputs.
- std::vector<float> inputDataNotFused(inputSize, 128);
- std::vector<float> outputDataNotFused(outputSize);
- std::vector<float> outputData2NotFused(outputSize);
-
- InputTensors inputTensorsNotFused{
- {0, ConstTensor(runNotFused->GetInputTensorInfo(networkIdentifierNotFused, 0), inputDataNotFused.data())}};
- OutputTensors outputTensorsNotFused{
- {0, Tensor(runNotFused->GetOutputTensorInfo(networkIdentifierNotFused, 0), outputDataNotFused.data())},
- {1, Tensor(runNotFused->GetOutputTensorInfo(networkIdentifierNotFused, 1), outputData2NotFused.data())}};
-
- // Execute network
- runNotFused->EnqueueWorkload(networkIdentifierNotFused, inputTensorsNotFused, outputTensorsNotFused);
-
- // Check the output of the fused-convolution matches with the output of the batchNormm in the "NotFused" network
- for (unsigned int n = 0; n < outputData.size(); ++n)
- {
- BOOST_CHECK_CLOSE(outputData[n], outputDataNotFused[n], 0.001);
- }
-}
-
BOOST_AUTO_TEST_SUITE_END()
diff --git a/src/armnn/test/optimizations/FuseBatchNormTests.cpp b/src/armnn/test/optimizations/FuseBatchNormTests.cpp
new file mode 100644
index 0000000000..74cb8f96b7
--- /dev/null
+++ b/src/armnn/test/optimizations/FuseBatchNormTests.cpp
@@ -0,0 +1,163 @@
+//
+// Copyright © 2020 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#include "LayersFwd.hpp"
+#include <boost/test/unit_test.hpp>
+
+BOOST_AUTO_TEST_SUITE(Optimizer)
+using namespace armnn;
+
+// This unit test needs the reference backend, it's not available if the reference backend is not built
+#if defined(ARMNNREF_ENABLED)
+BOOST_AUTO_TEST_CASE(Fuse_batchNorm_into_Conv2D_Float32_Test)
+{
+ // Define layers information
+ Convolution2dDescriptor convolution2dDescriptor;
+ convolution2dDescriptor.m_BiasEnabled = false;
+ convolution2dDescriptor.m_DataLayout = DataLayout::NHWC;
+ convolution2dDescriptor.m_StrideX = 1;
+ convolution2dDescriptor.m_StrideY = 1;
+ BatchNormalizationDescriptor batchNormDescriptor;
+ batchNormDescriptor.m_DataLayout = DataLayout::NHWC;
+
+ const unsigned int inputDimensionSizes[] = {1, 4, 4, 3}; // NHWCin
+ const unsigned int weightsDimensionSizes[] = {4, 2, 2, 3}; // CoutHWCin
+ const unsigned int outputDimensionSizes[] = {1, 3, 3, 4}; // NHWCout
+ const unsigned int outputChannelSize[] = {outputDimensionSizes[3]}; // Cout
+
+ TensorInfo inputInfo (4, inputDimensionSizes, DataType::Float32);
+ TensorInfo outputInfo(4, outputDimensionSizes, DataType::Float32);
+
+ std::vector<float> weightsVector = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12,
+ 11, 12, 13, 14, 15, 16, 17, 18, 19, 110, 111, 112,
+ 21, 22, 23, 24, 25, 26, 27, 28, 29, 210, 211, 212,
+ 31, 32, 33, 34, 35, 36, 37, 38, 39, 310, 311, 312};
+ TensorInfo weightsInfo(4, weightsDimensionSizes, DataType::Float32);
+ ConstTensor weights (weightsInfo, weightsVector);
+ std::vector<float> biasVector = {3.3f, 3.2f, 3.1f, 3.0f};
+ TensorInfo biasInfo(1, outputChannelSize, DataType::Float32);
+ ConstTensor bias (biasInfo, biasVector);
+ Optional<ConstTensor> optionalBias = Optional<ConstTensor>(bias);
+
+ std::vector<float> betaVector = {0.0f, 0.2f, 0.3f, 0.4f};
+ std::vector<float> gammaVector = {0.5f, 0.6f, 0.7f, 0.8f};
+ std::vector<float> meanVector = {0.1f, 0.2f, 0.3f, 0.4f};
+ std::vector<float> varianceVector = {1.0f, 1.1f, 1.2f, 1.3f};
+ ConstTensor beta (TensorInfo(1, outputChannelSize, DataType::Float32), betaVector);
+ ConstTensor gamma (TensorInfo(1, outputChannelSize, DataType::Float32), gammaVector);
+ ConstTensor mean (TensorInfo(1, outputChannelSize, DataType::Float32), meanVector);
+ ConstTensor variance(TensorInfo(1, outputChannelSize, DataType::Float32), varianceVector);
+
+ auto inputSize = inputDimensionSizes[0]*inputDimensionSizes[1]*inputDimensionSizes[2]*inputDimensionSizes[3];
+ auto outputSize = outputDimensionSizes[0]*outputDimensionSizes[1]*outputDimensionSizes[2]*outputDimensionSizes[3];
+
+ // FIRST NETWORK: Fused
+
+ // Construct ArmNN network
+ NetworkId networkIdentifier;
+ INetworkPtr network = INetwork::Create();
+ IConnectableLayer *inputLayer = network->AddInputLayer(0);
+ IConnectableLayer *convLayer = network->AddConvolution2dLayer(convolution2dDescriptor,
+ weights,
+ optionalBias,
+ "convolution");
+ IConnectableLayer *batchNormLayer = network->AddBatchNormalizationLayer(batchNormDescriptor,
+ mean,
+ variance,
+ beta,
+ gamma,
+ "batchNorm");
+ IConnectableLayer *outputLayer = network->AddOutputLayer(0);
+
+ inputLayer ->GetOutputSlot(0).Connect(convLayer ->GetInputSlot(0));
+ convLayer ->GetOutputSlot(0).Connect(batchNormLayer->GetInputSlot(0));
+ batchNormLayer ->GetOutputSlot(0).Connect(outputLayer ->GetInputSlot(0));
+
+ //Set the tensors in the network.
+ inputLayer ->GetOutputSlot(0).SetTensorInfo(inputInfo);
+ convLayer ->GetOutputSlot(0).SetTensorInfo(outputInfo);
+ batchNormLayer ->GetOutputSlot(0).SetTensorInfo(outputInfo);
+
+ // Create ArmNN runtime
+ IRuntime::CreationOptions options; // default options
+ IRuntimePtr run = IRuntime::Create(options);
+
+ // Optimise ArmNN network
+ IOptimizedNetworkPtr optNet = Optimize(*network, {Compute::CpuRef}, run->GetDeviceSpec());
+
+ // Load graph into runtime
+ BOOST_TEST(run->LoadNetwork(networkIdentifier, std::move(optNet)) == Status::Success);
+
+ //Creates structures for inputs and outputs.
+ std::vector<float> inputData(inputSize, 128);
+ std::vector<float> outputData(outputSize);
+
+ InputTensors inputTensors {{0, ConstTensor(run->GetInputTensorInfo (networkIdentifier, 0), inputData.data())}};
+ OutputTensors outputTensors{{0, Tensor(run->GetOutputTensorInfo(networkIdentifier, 0), outputData.data())}};
+
+ // Execute network
+ run->EnqueueWorkload(networkIdentifier, inputTensors, outputTensors);
+
+ // SECOND NETWORK: NotFused
+
+ // Construct ArmNN network
+ NetworkId networkIdentifierNotFused;
+ INetworkPtr networkNotFused = INetwork::Create();
+ IConnectableLayer *inputLayerNotFused = networkNotFused->AddInputLayer(0);
+ IConnectableLayer *convLayerNotFused = networkNotFused->AddConvolution2dLayer(convolution2dDescriptor,
+ weights,
+ optionalBias,
+ "convolution");
+ IConnectableLayer *batchNormLayerNotFused = networkNotFused->AddBatchNormalizationLayer(batchNormDescriptor,
+ mean,
+ variance,
+ beta,
+ gamma,
+ "batchNorm");
+ IConnectableLayer *outputLayerNotFused = networkNotFused->AddOutputLayer(0);
+ IConnectableLayer *output2LayerNotFused = networkNotFused->AddOutputLayer(1);
+
+ inputLayerNotFused ->GetOutputSlot(0).Connect(convLayerNotFused ->GetInputSlot(0));
+ convLayerNotFused ->GetOutputSlot(0).Connect(batchNormLayerNotFused->GetInputSlot(0));
+ batchNormLayerNotFused ->GetOutputSlot(0).Connect(outputLayerNotFused ->GetInputSlot(0));
+ convLayerNotFused ->GetOutputSlot(0).Connect(output2LayerNotFused ->GetInputSlot(0));
+
+ //Set the tensors in the network.
+ inputLayerNotFused ->GetOutputSlot(0).SetTensorInfo(inputInfo);
+ convLayerNotFused ->GetOutputSlot(0).SetTensorInfo(outputInfo);
+ batchNormLayerNotFused ->GetOutputSlot(0).SetTensorInfo(outputInfo);
+
+ // Create ArmNN runtime
+ IRuntimePtr runNotFused = IRuntime::Create(options);
+
+ // Optimise ArmNN network
+ IOptimizedNetworkPtr optNetNotFused = Optimize(*networkNotFused, {Compute::CpuRef}, runNotFused->GetDeviceSpec());
+
+ // Load graph into runtime
+ BOOST_TEST(runNotFused->LoadNetwork(networkIdentifierNotFused, std::move(optNetNotFused)) == Status::Success);
+
+ //Creates structures for inputs and outputs.
+ std::vector<float> inputDataNotFused(inputSize, 128);
+ std::vector<float> outputDataNotFused(outputSize);
+ std::vector<float> outputData2NotFused(outputSize);
+
+ InputTensors inputTensorsNotFused{
+ {0, ConstTensor(runNotFused->GetInputTensorInfo(networkIdentifierNotFused, 0), inputDataNotFused.data())}};
+ OutputTensors outputTensorsNotFused{
+ {0, Tensor(runNotFused->GetOutputTensorInfo(networkIdentifierNotFused, 0), outputDataNotFused.data())},
+ {1, Tensor(runNotFused->GetOutputTensorInfo(networkIdentifierNotFused, 1), outputData2NotFused.data())}};
+
+ // Execute network
+ runNotFused->EnqueueWorkload(networkIdentifierNotFused, inputTensorsNotFused, outputTensorsNotFused);
+
+ // Check the output of the fused-convolution matches with the output of the batchNormm in the "NotFused" network
+ for (unsigned int n = 0; n < outputData.size(); ++n)
+ {
+ BOOST_CHECK_CLOSE(outputData[n], outputDataNotFused[n], 0.001);
+ }
+}
+#endif
+
+BOOST_AUTO_TEST_SUITE_END()