aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMike Kelly <mike.kelly@arm.com>2020-11-05 15:44:56 +0000
committerJim Flynn <jim.flynn@arm.com>2020-11-08 22:50:50 +0000
commit90231b8c9f680d323e4b93dcd0820a47925e6d24 (patch)
treec34ace59ad3dd4757650a7b5d8f21a996c31cc88
parent82490eee66b733ef29b3697b21e10a4c8be95233 (diff)
downloadarmnn-90231b8c9f680d323e4b93dcd0820a47925e6d24.tar.gz
IVGCVSW-5315 Create FuseBatchNorm class
Signed-off-by: Teresa Charlin <teresa.charlinreyes@arm.com> Signed-off-by: Mike Kelly <mike.kelly@arm.com> Change-Id: Id0625c58dbeea79874bf986b70d136ed9390bf83
-rw-r--r--src/armnn/Network.cpp5
-rw-r--r--src/armnn/optimizations/FuseBatchNorm.hpp125
-rw-r--r--src/armnn/test/OptimizerTests.cpp69
-rw-r--r--src/armnn/test/optimizations/FuseBatchNormTests.cpp326
4 files changed, 354 insertions, 171 deletions
diff --git a/src/armnn/Network.cpp b/src/armnn/Network.cpp
index 6578b8445f..347e39b4c8 100644
--- a/src/armnn/Network.cpp
+++ b/src/armnn/Network.cpp
@@ -1055,7 +1055,10 @@ IOptimizedNetworkPtr Optimize(const INetwork& inNetwork,
FoldPadIntoConvolution2d(),
PermuteAndBatchToSpaceAsDepthToSpace(),
TransposeAndBatchToSpaceAsDepthToSpace(),
- FuseBatchNormIntoConvolution2D()));
+ FuseBatchNormIntoConvolution2DFloat32(),
+ FuseBatchNormIntoConvolution2DFloat16(),
+ FuseBatchNormIntoDepthwiseConvolution2DFloat32(),
+ FuseBatchNormIntoDepthwiseConvolution2DFloat16()));
// If Fp32 to Fp16 optimization is set convert Fp32 network to Fp16
if (options.m_ReduceFp32ToFp16)
diff --git a/src/armnn/optimizations/FuseBatchNorm.hpp b/src/armnn/optimizations/FuseBatchNorm.hpp
index e8e8c5d77f..9d25379930 100644
--- a/src/armnn/optimizations/FuseBatchNorm.hpp
+++ b/src/armnn/optimizations/FuseBatchNorm.hpp
@@ -7,13 +7,15 @@
#include "Optimization.hpp"
#include <armnnUtils/DataLayoutIndexed.hpp>
+#include <ResolveType.hpp>
namespace armnn
{
namespace optimizations
{
-template <typename ConvLayer>
+template <typename ConvLayer, armnn::DataType ArmnnType,
+ typename T = armnn::ResolveType<ArmnnType>>
class FuseBatchNorm
{
public:
@@ -27,10 +29,12 @@ public:
Layer& base = connection.GetConnectedOutputSlot()->GetOwningLayer();
Layer& child = connection.GetOwningLayer();
- ARMNN_ASSERT(base.GetType() == LayerType::Convolution2d);
+ bool depthwise = (base.GetType() == LayerType::DepthwiseConvolution2d);
+
+ ARMNN_ASSERT(base.GetType() == LayerType::Convolution2d || depthwise);
ARMNN_ASSERT(child.GetType() == LayerType::BatchNormalization);
- if (base.GetDataType() == DataType::Float32 && child.GetDataType() == DataType::Float32)
+ if (base.GetDataType() == ArmnnType && child.GetDataType() == ArmnnType)
{
OutputSlot* parentOut = base.GetInputSlot(0).GetConnectedOutputSlot();
auto convLayer = PolymorphicDowncast<ConvLayer*>(&base);
@@ -47,58 +51,92 @@ public:
ConstTensor varTensor(batchNormLayer->m_Variance->GetTensorInfo(), batchNormLayer->m_Variance->Map(true));
auto convDescriptor = convLayer->GetParameters();
- ConstTensor weightsTensor(convLayer->m_Weight->GetTensorInfo(), convLayer->m_Weight->Map(true));
+ auto weightsInfo(convLayer->m_Weight->GetTensorInfo());
+ ConstTensor weightsTensor(weightsInfo, convLayer->m_Weight->Map(true));
armnnUtils::DataLayoutIndexed dataLayout(convDescriptor.m_DataLayout);
- auto weightsShape = convLayer->m_Weight->GetTensorInfo().GetShape();
- const unsigned int outputChannels = weightsShape[0];
- const unsigned int inputChannels = weightsShape[dataLayout.GetChannelsIndex()];
- const unsigned int weightsHeight = weightsShape[dataLayout.GetHeightIndex()];
- const unsigned int weightsWidth = weightsShape[dataLayout.GetWidthIndex()];
-
- const auto* weightsBuffer = static_cast<const float*>(weightsTensor.GetMemoryArea());
- const auto* betaBuffer = static_cast<const float*>(betaTensor.GetMemoryArea());
- const auto* gammaBuffer = static_cast<const float*>(gammaTensor.GetMemoryArea());
- const auto* meanBuffer = static_cast<const float*>(meanTensor.GetMemoryArea());
- const auto* varBuffer = static_cast<const float*>(varTensor.GetMemoryArea());
-
- std::vector<float> weightsVector (weightsBuffer, weightsBuffer + weightsTensor.GetNumElements());
- std::vector<float> betaVector (betaBuffer, betaBuffer + betaTensor.GetNumElements());
- std::vector<float> gammaVector (gammaBuffer, gammaBuffer + gammaTensor.GetNumElements());
- std::vector<float> meanVector (meanBuffer, meanBuffer + meanTensor.GetNumElements());
- std::vector<float> varianceVector(varBuffer, varBuffer + varTensor.GetNumElements());
+ auto weightsShape = weightsInfo.GetShape();
+ const unsigned int depthMultiplier = depthwise ? weightsShape[0] : 1;
+ const unsigned int inputChannels = depthwise ? weightsShape[1] :
+ weightsShape[dataLayout.GetChannelsIndex()];
+ const unsigned int outputChannels = depthwise ? inputChannels * depthMultiplier : weightsShape[0];
+ const unsigned int weightsHeight = depthwise ? weightsShape[2] :
+ weightsShape[dataLayout.GetHeightIndex()];
+ const unsigned int weightsWidth = depthwise ? weightsShape[3] :
+ weightsShape[dataLayout.GetWidthIndex()];
+
+ const auto* weightsBuffer = static_cast<const T*>(weightsTensor.GetMemoryArea());
+ const auto* betaBuffer = static_cast<const T*>(betaTensor.GetMemoryArea());
+ const auto* gammaBuffer = static_cast<const T*>(gammaTensor.GetMemoryArea());
+ const auto* meanBuffer = static_cast<const T*>(meanTensor.GetMemoryArea());
+ const auto* varBuffer = static_cast<const T*>(varTensor.GetMemoryArea());
+
+ std::vector<T> weightsVector (weightsBuffer, weightsBuffer + weightsTensor.GetNumElements());
+ std::vector<T> betaVector (betaBuffer, betaBuffer + betaTensor.GetNumElements());
+ std::vector<T> gammaVector (gammaBuffer, gammaBuffer + gammaTensor.GetNumElements());
+ std::vector<T> meanVector (meanBuffer, meanBuffer + meanTensor.GetNumElements());
+ std::vector<T> varianceVector(varBuffer, varBuffer + varTensor.GetNumElements());
// fusedWeights = ( gamma * weights ) / ( std - epsilon);
- std::vector<float> fusedWeightsVector(weightsVector.size());
+ std::vector<T> fusedWeightsVector(weightsVector.size());
+ unsigned int depthwiseMultiplierIdx = 0;
- unsigned int i = 0;
- for (unsigned int cOut = 0; cOut < outputChannels; ++cOut)
+ for (unsigned int cInput = 0; cInput < inputChannels; ++cInput)
{
- auto mult = gammaVector[cOut] / sqrtf (varianceVector[cOut] + epsilon);
- for (unsigned int cInput = 0; cInput < inputChannels; ++cInput)
+ for (unsigned int cOut = 0; cOut < outputChannels; ++cOut)
{
+ T mult = gammaVector[cOut] / static_cast<T>(sqrtf (varianceVector[cOut] + epsilon));
+
+ if (depthwise)
+ {
+ cInput = cOut / depthMultiplier;
+ depthwiseMultiplierIdx = cOut % depthMultiplier;
+ }
+
for (unsigned int h = 0; h < weightsHeight; ++h)
{
for (unsigned int w = 0; w < weightsWidth; ++w)
{
- fusedWeightsVector[i] = mult * weightsVector[i];
- i++;
+ unsigned int weightsIdx = 0;
+
+ if (depthwise)
+ {
+ weightsIdx = depthwiseMultiplierIdx * weightsWidth * weightsHeight * inputChannels +
+ cInput * weightsWidth * weightsHeight +
+ h * weightsWidth +
+ w;
+ }
+ else if (convDescriptor.m_DataLayout == DataLayout::NHWC)
+ {
+ weightsIdx = cOut * weightsHeight * weightsWidth * inputChannels +
+ h * weightsWidth * inputChannels +
+ w * inputChannels +
+ cInput;
+ }
+ else
+ {
+ weightsIdx = cOut * weightsWidth * weightsHeight * inputChannels +
+ cInput * weightsWidth * weightsHeight +
+ h * weightsWidth +
+ w;
+ }
+ fusedWeightsVector[weightsIdx] = mult * weightsVector[weightsIdx];
}
}
}
}
- ConstTensor fusedWeightsTensor(convLayer->m_Weight->GetTensorInfo(), fusedWeightsVector);
+ ConstTensor fusedWeightsTensor(weightsInfo, fusedWeightsVector);
// fusedBias = (gamma * (bias - mean)) / (variance - epsilon) + beta;
- std::vector<float> fusedBiasVector(outputChannels);
+ std::vector<T> fusedBiasVector(outputChannels);
if (convDescriptor.m_BiasEnabled)
{
ARMNN_ASSERT_MSG(convLayer->m_Bias != nullptr,
"FuseBatchNorm: Bias data should not be null if bias is enabled.");
ConstTensor biasTensor(convLayer->m_Bias->GetTensorInfo(), convLayer->m_Bias->Map(true));
- const auto* biasBuffer = static_cast<const float*>(biasTensor.GetMemoryArea());
- std::vector<float> biasVector(biasBuffer, biasBuffer + biasTensor.GetNumElements());
+ const auto* biasBuffer = static_cast<const T*>(biasTensor.GetMemoryArea());
+ std::vector<T> biasVector(biasBuffer, biasBuffer + biasTensor.GetNumElements());
for (unsigned int cOut = 0; cOut < outputChannels; ++cOut)
{
@@ -109,7 +147,7 @@ public:
else
{
convDescriptor.m_BiasEnabled = true;
- std::vector<float> biasVector(outputChannels, 0);
+ std::vector<T> biasVector(outputChannels, T(0));
for (unsigned int cOut = 0; cOut < outputChannels; ++cOut)
{
@@ -117,7 +155,7 @@ public:
sqrtf(varianceVector[cOut] + epsilon)) + betaVector[cOut];
}
}
- ConstTensor fusedBiasTensor(TensorInfo({outputChannels}, DataType::Float32), fusedBiasVector);
+ ConstTensor fusedBiasTensor(TensorInfo({outputChannels}, ArmnnType), fusedBiasVector);
// Insert the new convolution layer that has batch norm parameters fused into
const std::string name = std::string("fused-") + child.GetName() + std::string("-into-") + base.GetName();
@@ -143,10 +181,25 @@ protected:
~FuseBatchNorm() = default;
};
-using FuseBatchNormIntoConvolution2D =
+using FuseBatchNormIntoConvolution2DFloat32 =
OptimizeForExclusiveConnection<Convolution2dLayer,
BatchNormalizationLayer,
- FuseBatchNorm<Convolution2dLayer>>;
+ FuseBatchNorm<Convolution2dLayer, armnn::DataType::Float32>>;
+
+using FuseBatchNormIntoConvolution2DFloat16 =
+ OptimizeForExclusiveConnection<Convolution2dLayer,
+ BatchNormalizationLayer,
+ FuseBatchNorm<Convolution2dLayer, armnn::DataType::Float16>>;
+
+using FuseBatchNormIntoDepthwiseConvolution2DFloat32 =
+ OptimizeForExclusiveConnection<DepthwiseConvolution2dLayer,
+ BatchNormalizationLayer,
+ FuseBatchNorm<DepthwiseConvolution2dLayer, armnn::DataType::Float32>>;
+
+using FuseBatchNormIntoDepthwiseConvolution2DFloat16 =
+ OptimizeForExclusiveConnection<DepthwiseConvolution2dLayer,
+ BatchNormalizationLayer,
+ FuseBatchNorm<DepthwiseConvolution2dLayer, armnn::DataType::Float16>>;
} // namespace optimizations
} // namespace armnn \ No newline at end of file
diff --git a/src/armnn/test/OptimizerTests.cpp b/src/armnn/test/OptimizerTests.cpp
index 8845dae6c8..0179589bf4 100644
--- a/src/armnn/test/OptimizerTests.cpp
+++ b/src/armnn/test/OptimizerTests.cpp
@@ -255,8 +255,6 @@ BOOST_AUTO_TEST_CASE(InsertConvertersTest)
&IsLayerOfType<armnn::OutputLayer>));
}
-
-
void CreateConvolution2dGraph(Graph &graph, const unsigned int* inputShape,
const unsigned int* weightsShape, const unsigned int* outputShape,
DataLayout dataLayout = DataLayout::NCHW)
@@ -308,8 +306,8 @@ BOOST_AUTO_TEST_CASE(Conv2dValidateTensorShapesFromInputsNhwc)
}
void CreateDepthwiseConvolution2dGraph(Graph &graph, const unsigned int* inputShape,
- const unsigned int* weightsShape, const unsigned int* outputShape,
- DataLayout dataLayout = DataLayout::NCHW)
+ const unsigned int* weightsShape, const unsigned int* outputShape,
+ DataLayout dataLayout = DataLayout::NCHW)
{
armnn::TensorInfo inputInfo(4, inputShape, DataType::Float32);
armnn::TensorInfo outputInfo(4, outputShape, DataType::Float32);
@@ -357,7 +355,7 @@ BOOST_AUTO_TEST_CASE(DepthwiseConv2dValidateTensorShapesFromInputsNhwc)
BOOST_CHECK_NO_THROW(graph.InferTensorInfos());
}
-void CreatePooling2dGraph(Graph &graph, const unsigned int* inputShape, const unsigned int* outputShape,
+void CreatePooling2dGraph(Graph& graph, const unsigned int* inputShape, const unsigned int* outputShape,
DataLayout dataLayout = DataLayout::NCHW)
{
armnn::TensorInfo inputInfo(4, inputShape, DataType::Float32);
@@ -405,7 +403,7 @@ BOOST_AUTO_TEST_CASE(Pooling2dValidateTensorShapesFromInputsNhwc)
BOOST_CHECK_NO_THROW(graph.InferTensorInfos());
}
-void CreateResizeBilinearGraph(Graph &graph, const unsigned int* inputShape, const unsigned int* outputShape,
+void CreateResizeBilinearGraph(Graph& graph, const unsigned int* inputShape, const unsigned int* outputShape,
DataLayout dataLayout = DataLayout::NCHW)
{
TensorInfo inputInfo(4, inputShape, DataType::Float32);
@@ -448,7 +446,6 @@ BOOST_AUTO_TEST_CASE(ResizeBilinearValidateTensorShapesFromInputsNhwc)
BOOST_CHECK_NO_THROW(graph.InferTensorInfos());
}
-
void CreateGatherGraph(Graph& graph, const armnn::TensorInfo& paramsInfo, const armnn::TensorInfo& indicesInfo,
const armnn::TensorInfo& outputInfo)
{
@@ -547,7 +544,6 @@ BOOST_AUTO_TEST_CASE(FoldPadLayerIntoConvolution2dLayer)
const unsigned int weightsShape[] = { 1, 2, 3, 3 };
const unsigned int outputShape[] = { 1, 2, 1, 1 };
-
armnn::TensorInfo inputInfo(4, inputShape, DataType::Float32);
armnn::TensorInfo paddedInfo(4, paddedShape, DataType::Float32);
armnn::TensorInfo outputInfo(4, outputShape, DataType::Float32);
@@ -628,9 +624,6 @@ BOOST_AUTO_TEST_CASE(FoldPadLayerIntoConvolution2dLayer)
&IsLayerOfType<armnn::OutputLayer>));
}
-
-
-
class MockLayerSupport : public LayerSupportBase {
public:
bool IsInputSupported(const TensorInfo& /*input*/,
@@ -686,7 +679,6 @@ public:
};
};
-
BOOST_AUTO_TEST_CASE(BackendHintTest)
{
class TestBackendAssignment : public LayerVisitorBase<VisitorNoThrowPolicy>
@@ -764,7 +756,6 @@ BOOST_AUTO_TEST_CASE(BackendHintTest)
input->GetOutputSlot(0).Connect(act->GetInputSlot(0));
act->GetOutputSlot(0).Connect(output->GetInputSlot(0));
-
auto optNet = IOptimizedNetworkPtr(new OptimizedNetwork(std::move(graph)), &IOptimizedNetwork::Destroy);
OptimizedNetwork* optNetObjPtr = PolymorphicDowncast<OptimizedNetwork*>(optNet.get());
@@ -772,7 +763,6 @@ BOOST_AUTO_TEST_CASE(BackendHintTest)
// Get the optimized graph
Graph& optGraph = optNetObjPtr->GetGraph();
-
std::vector<BackendId> prefs{"MockBackend", "CustomBackend"};
BackendIdSet availableBackends = {"CustomBackend", "MockBackend"};
@@ -799,13 +789,13 @@ BOOST_AUTO_TEST_CASE(BackendHintTest)
}
// Tests that OptimizeForExclusiveConnections works, fusing when needed, using BatchNorm fusing as example
-BOOST_AUTO_TEST_CASE(OptimizeForExclusiveConnections_fuse_Test)
+BOOST_AUTO_TEST_CASE(OptimizeForExclusiveConnectionsFuseTest)
{
using namespace armnn;
// Define layers information
Convolution2dDescriptor convolution2dDescriptor;
convolution2dDescriptor.m_BiasEnabled = false;
- convolution2dDescriptor.m_DataLayout = DataLayout::NHWC;
+ convolution2dDescriptor.m_DataLayout = DataLayout::NHWC;
BatchNormalizationDescriptor batchNormDescriptor;
batchNormDescriptor.m_DataLayout = DataLayout::NHWC;
@@ -814,32 +804,31 @@ BOOST_AUTO_TEST_CASE(OptimizeForExclusiveConnections_fuse_Test)
const unsigned int outputDimensionSizes[] = {1, 3, 3, 1}; // NHWCout
const unsigned int outputChannelSize[] = {outputDimensionSizes[3]}; // Cout
- TensorInfo inputInfo (4, inputDimensionSizes, DataType::Float32);
+ TensorInfo inputInfo(4, inputDimensionSizes, DataType::Float32);
TensorInfo outputInfo(4, outputDimensionSizes, DataType::Float32);
- std::vector<float> weightsVector = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12};
- ConstTensor weights (TensorInfo(4, weightsDimensionSizes, DataType::Float32), weightsVector);
-
+ std::vector<float> weightsVector = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12};
+ ConstTensor weights(TensorInfo(4, weightsDimensionSizes, DataType::Float32), weightsVector);
std::vector<float> betaVector = {0.1f};
std::vector<float> gammaVector = {0.5f};
std::vector<float> meanVector = {0};
std::vector<float> varianceVector = {1};
- ConstTensor beta (TensorInfo(1, outputChannelSize, DataType::Float32), betaVector);
- ConstTensor gamma (TensorInfo(1, outputChannelSize, DataType::Float32), gammaVector);
- ConstTensor mean (TensorInfo(1, outputChannelSize, DataType::Float32), meanVector);
- ConstTensor variance(TensorInfo(1, outputChannelSize, DataType::Float32), varianceVector);
+ ConstTensor beta(TensorInfo(1, outputChannelSize, DataType::Float32), betaVector);
+ ConstTensor gamma(TensorInfo(1, outputChannelSize, DataType::Float32), gammaVector);
+ ConstTensor mean(TensorInfo(1, outputChannelSize, DataType::Float32), meanVector);
+ ConstTensor variance(TensorInfo(1, outputChannelSize, DataType::Float32), varianceVector);
// Define the network
Graph graph;
- auto input = graph.AddLayer<InputLayer>(0, "input");
- auto conv = graph.AddLayer<Convolution2dLayer>(convolution2dDescriptor, "convolution");
- auto batchNorm = graph.AddLayer<BatchNormalizationLayer>(batchNormDescriptor, "batchNorm");
- auto output = graph.AddLayer<OutputLayer>(0, "output");
+ auto input = graph.AddLayer<InputLayer>(0, "input");
+ auto conv = graph.AddLayer<Convolution2dLayer>(convolution2dDescriptor, "convolution");
+ auto batchNorm = graph.AddLayer<BatchNormalizationLayer>(batchNormDescriptor, "batchNorm");
+ auto output = graph.AddLayer<OutputLayer>(0, "output");
// Set layer information
- input ->GetOutputSlot().SetTensorInfo(inputInfo);
- conv ->GetOutputSlot().SetTensorInfo(outputInfo);
+ input->GetOutputSlot().SetTensorInfo(inputInfo);
+ conv->GetOutputSlot().SetTensorInfo(outputInfo);
batchNorm->GetOutputSlot().SetTensorInfo(outputInfo);
conv ->m_Weight = std::make_unique<ScopedCpuTensorHandle>(weights);
batchNorm->m_Beta = std::make_unique<ScopedCpuTensorHandle>(beta);
@@ -849,8 +838,8 @@ BOOST_AUTO_TEST_CASE(OptimizeForExclusiveConnections_fuse_Test)
if (convolution2dDescriptor.m_BiasEnabled)
{
std::vector<float> biasVector = {11};
- ConstTensor bias (TensorInfo(1, outputChannelSize, DataType::Float32), biasVector);
- conv->m_Bias = std::make_unique<ScopedCpuTensorHandle>(bias);
+ ConstTensor bias(TensorInfo(1, outputChannelSize, DataType::Float32), biasVector);
+ conv->m_Bias = std::make_unique<ScopedCpuTensorHandle>(bias);
}
// Connect layers
@@ -867,12 +856,12 @@ BOOST_AUTO_TEST_CASE(OptimizeForExclusiveConnections_fuse_Test)
&IsLayerOfType<OutputLayer>));
// Optimize graph
- armnn::Optimizer::Pass(graph, MakeOptimizations(FuseBatchNormIntoConvolution2D()));
+ armnn::Optimizer::Pass(graph, MakeOptimizations(FuseBatchNormIntoConvolution2DFloat32()));
- auto checkFusedConv2d = [ ](const armnn::Layer* const layer) -> bool
+ auto checkFusedConv2d = [](const armnn::Layer* const layer)->bool
{
return IsLayerOfType<armnn::Convolution2dLayer>(layer) &&
- (layer->GetNameStr() == "fused-batchNorm-into-convolution");
+ (layer->GetNameStr() == "fused-batchNorm-into-convolution");
};
BOOST_CHECK(3 == graph.GetNumLayers());
@@ -884,11 +873,11 @@ BOOST_AUTO_TEST_CASE(OptimizeForExclusiveConnections_fuse_Test)
}
// Tests that OptimizeForExclusiveConnections works, not fusing when not needed, using BatchNorm fusing as example
-BOOST_AUTO_TEST_CASE(OptimizeForExclusiveConnections_notFuse_Test)
+BOOST_AUTO_TEST_CASE(OptimizeForExclusiveConnectionsWithoutFuseTest)
{
// Define the network
- Graph graph;
- Convolution2dDescriptor convolution2dDescriptor;
+ Graph graph;
+ Convolution2dDescriptor convolution2dDescriptor;
BatchNormalizationDescriptor batchNormDescriptor;
auto input = graph.AddLayer<InputLayer>(0, "input");
@@ -912,7 +901,7 @@ BOOST_AUTO_TEST_CASE(OptimizeForExclusiveConnections_notFuse_Test)
&IsLayerOfType<armnn::OutputLayer>,
&IsLayerOfType<armnn::OutputLayer>));
// Optimize graph
- armnn::Optimizer::Pass(graph, armnn::MakeOptimizations(FuseBatchNormIntoConvolution2D()));
+ armnn::Optimizer::Pass(graph, armnn::MakeOptimizations(FuseBatchNormIntoConvolution2DFloat32()));
BOOST_CHECK(5 == graph.GetNumLayers());
BOOST_TEST(CheckSequence(graph.cbegin(),
@@ -923,4 +912,4 @@ BOOST_AUTO_TEST_CASE(OptimizeForExclusiveConnections_notFuse_Test)
&IsLayerOfType<armnn::OutputLayer>,
&IsLayerOfType<armnn::OutputLayer>));
}
-BOOST_AUTO_TEST_SUITE_END()
+BOOST_AUTO_TEST_SUITE_END() \ No newline at end of file
diff --git a/src/armnn/test/optimizations/FuseBatchNormTests.cpp b/src/armnn/test/optimizations/FuseBatchNormTests.cpp
index 74cb8f96b7..bf47c577a4 100644
--- a/src/armnn/test/optimizations/FuseBatchNormTests.cpp
+++ b/src/armnn/test/optimizations/FuseBatchNormTests.cpp
@@ -4,17 +4,79 @@
//
#include "LayersFwd.hpp"
+
+#include <Network.hpp>
+#include <ResolveType.hpp>
+#include <armnn/INetwork.hpp>
+#include <test/TestUtils.hpp>
+
#include <boost/test/unit_test.hpp>
-BOOST_AUTO_TEST_SUITE(Optimizer)
using namespace armnn;
-// This unit test needs the reference backend, it's not available if the reference backend is not built
-#if defined(ARMNNREF_ENABLED)
-BOOST_AUTO_TEST_CASE(Fuse_batchNorm_into_Conv2D_Float32_Test)
+BOOST_AUTO_TEST_SUITE(Optimizer)
+
+namespace
+{
+
+class Conv2dTest
+{
+public:
+ using ConvDescriptorType = armnn::Convolution2dDescriptor;
+ using ConvLayerType = armnn::Convolution2dLayer;
+
+ static IConnectableLayer *AddConvolution(INetwork *network,
+ const Convolution2dDescriptor &descriptor,
+ const ConstTensor &weights,
+ const Optional<ConstTensor> &biases,
+ const char *name)
+ {
+ return network->AddConvolution2dLayer(descriptor, weights, biases, name);
+ }
+};
+
+class DepthwiseConv2dTest
+{
+public:
+ using ConvDescriptorType = armnn::DepthwiseConvolution2dDescriptor;
+ using ConvLayerType = armnn::DepthwiseConvolution2dLayer;
+
+ static IConnectableLayer *AddConvolution(INetwork *network,
+ const DepthwiseConvolution2dDescriptor &descriptor,
+ const ConstTensor &weights,
+ const Optional<ConstTensor> &biases,
+ const char *name)
+ {
+ return network->AddDepthwiseConvolution2dLayer(descriptor, weights, biases, name);
+ }
+};
+
+template<typename T>
+std::vector<T> GetVector(unsigned int size, float initial, float increment)
+{
+ std::vector<float> typeVector(size, initial);
+ std::vector<T> vector(size);
+
+ if (size > 1)
+ {
+ for (unsigned int i = 0; i < size; ++i)
+ {
+ vector[i] = T(initial + (increment * static_cast<float>(i)));
+ }
+ }
+ return vector;
+}
+
+} // namespace
+
+template <typename Conv2dTest,
+ armnn::DataType ArmnnType,
+ typename ConvDescriptorType = typename Conv2dTest::ConvDescriptorType,
+ typename T = armnn::ResolveType<ArmnnType>>
+INetworkPtr CreatNetwork(bool depthwise, bool preventFusing)
{
// Define layers information
- Convolution2dDescriptor convolution2dDescriptor;
+ ConvDescriptorType convolution2dDescriptor;
convolution2dDescriptor.m_BiasEnabled = false;
convolution2dDescriptor.m_DataLayout = DataLayout::NHWC;
convolution2dDescriptor.m_StrideX = 1;
@@ -22,127 +84,181 @@ BOOST_AUTO_TEST_CASE(Fuse_batchNorm_into_Conv2D_Float32_Test)
BatchNormalizationDescriptor batchNormDescriptor;
batchNormDescriptor.m_DataLayout = DataLayout::NHWC;
- const unsigned int inputDimensionSizes[] = {1, 4, 4, 3}; // NHWCin
- const unsigned int weightsDimensionSizes[] = {4, 2, 2, 3}; // CoutHWCin
- const unsigned int outputDimensionSizes[] = {1, 3, 3, 4}; // NHWCout
- const unsigned int outputChannelSize[] = {outputDimensionSizes[3]}; // Cout
-
- TensorInfo inputInfo (4, inputDimensionSizes, DataType::Float32);
- TensorInfo outputInfo(4, outputDimensionSizes, DataType::Float32);
-
- std::vector<float> weightsVector = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12,
- 11, 12, 13, 14, 15, 16, 17, 18, 19, 110, 111, 112,
- 21, 22, 23, 24, 25, 26, 27, 28, 29, 210, 211, 212,
- 31, 32, 33, 34, 35, 36, 37, 38, 39, 310, 311, 312};
- TensorInfo weightsInfo(4, weightsDimensionSizes, DataType::Float32);
- ConstTensor weights (weightsInfo, weightsVector);
- std::vector<float> biasVector = {3.3f, 3.2f, 3.1f, 3.0f};
- TensorInfo biasInfo(1, outputChannelSize, DataType::Float32);
- ConstTensor bias (biasInfo, biasVector);
- Optional<ConstTensor> optionalBias = Optional<ConstTensor>(bias);
+ const unsigned int inputDimensionSizes[] = {1, 4, 4, 3}; // NHWCin
+ unsigned int weightsDimensionSizes[] = {4, 2, 2, 3}; // CoutHWCin
+ unsigned int outputDimensionSizes[] = {1, 3, 3, 4}; // NHWCout
- std::vector<float> betaVector = {0.0f, 0.2f, 0.3f, 0.4f};
- std::vector<float> gammaVector = {0.5f, 0.6f, 0.7f, 0.8f};
- std::vector<float> meanVector = {0.1f, 0.2f, 0.3f, 0.4f};
- std::vector<float> varianceVector = {1.0f, 1.1f, 1.2f, 1.3f};
- ConstTensor beta (TensorInfo(1, outputChannelSize, DataType::Float32), betaVector);
- ConstTensor gamma (TensorInfo(1, outputChannelSize, DataType::Float32), gammaVector);
- ConstTensor mean (TensorInfo(1, outputChannelSize, DataType::Float32), meanVector);
- ConstTensor variance(TensorInfo(1, outputChannelSize, DataType::Float32), varianceVector);
+ if (depthwise)
+ {
+ //M Cin H W
+ weightsDimensionSizes[0] = 4;
+ weightsDimensionSizes[1] = 3;
+ weightsDimensionSizes[2] = 2;
+ weightsDimensionSizes[3] = 2;
+ outputDimensionSizes[3] = weightsDimensionSizes[0] * weightsDimensionSizes[1];
+ }
+ const unsigned int outputChannelSize[] = {outputDimensionSizes[3]}; // Cout
- auto inputSize = inputDimensionSizes[0]*inputDimensionSizes[1]*inputDimensionSizes[2]*inputDimensionSizes[3];
- auto outputSize = outputDimensionSizes[0]*outputDimensionSizes[1]*outputDimensionSizes[2]*outputDimensionSizes[3];
+ TensorInfo inputInfo(4, inputDimensionSizes, ArmnnType);
+ TensorInfo outputInfo(4, outputDimensionSizes, ArmnnType);
- // FIRST NETWORK: Fused
+ std::vector<int> weightsIntVector = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12,
+ 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22,
+ 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
+ 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42};
+ std::vector<T> weightsVector(begin(weightsIntVector), end(weightsIntVector));
+ TensorInfo weightsInfo(4, weightsDimensionSizes, ArmnnType);
+ ConstTensor weights(weightsInfo, weightsVector);
- // Construct ArmNN network
- NetworkId networkIdentifier;
+ std::vector<T> biasVector = GetVector<T>(outputDimensionSizes[3], 3.3f, 0.1f);
+ TensorInfo biasInfo(1, outputChannelSize, ArmnnType);
+ ConstTensor bias(biasInfo, biasVector);
+ Optional<ConstTensor> optionalBias = Optional<ConstTensor>(bias);
+
+ std::vector<T> betaVector = GetVector<T>(outputDimensionSizes[3], 0.0f, 0.2f);
+ std::vector<T> gammaVector = GetVector<T>(outputDimensionSizes[3], 0.5f, 0.1f);
+ std::vector<T> meanVector = GetVector<T>(outputDimensionSizes[3], 0.1f, 0.1f);
+ std::vector<T> varianceVector = GetVector<T>(outputDimensionSizes[3], 1.0f, 0.1f);
+
+ ConstTensor beta (TensorInfo(1, outputChannelSize, ArmnnType), betaVector);
+ ConstTensor gamma (TensorInfo(1, outputChannelSize, ArmnnType), gammaVector);
+ ConstTensor mean (TensorInfo(1, outputChannelSize, ArmnnType), meanVector);
+ ConstTensor variance(TensorInfo(1, outputChannelSize, ArmnnType), varianceVector);
+
+ // Create a network
INetworkPtr network = INetwork::Create();
- IConnectableLayer *inputLayer = network->AddInputLayer(0);
- IConnectableLayer *convLayer = network->AddConvolution2dLayer(convolution2dDescriptor,
- weights,
- optionalBias,
- "convolution");
- IConnectableLayer *batchNormLayer = network->AddBatchNormalizationLayer(batchNormDescriptor,
+
+ IConnectableLayer* inputLayer = network->AddInputLayer(0);
+
+ IConnectableLayer* convLayer = Conv2dTest::AddConvolution(network.get(),
+ convolution2dDescriptor,
+ weights,
+ optionalBias,
+ "convolution");
+
+ IConnectableLayer* batchNormLayer = network->AddBatchNormalizationLayer(batchNormDescriptor,
mean,
variance,
beta,
gamma,
"batchNorm");
- IConnectableLayer *outputLayer = network->AddOutputLayer(0);
- inputLayer ->GetOutputSlot(0).Connect(convLayer ->GetInputSlot(0));
- convLayer ->GetOutputSlot(0).Connect(batchNormLayer->GetInputSlot(0));
- batchNormLayer ->GetOutputSlot(0).Connect(outputLayer ->GetInputSlot(0));
+ IConnectableLayer* outputLayer = network->AddOutputLayer(0);
+ IConnectableLayer* output2Layer = nullptr;
+
+ if (preventFusing)
+ {
+ output2Layer = network->AddOutputLayer(1);
+ }
- //Set the tensors in the network.
- inputLayer ->GetOutputSlot(0).SetTensorInfo(inputInfo);
- convLayer ->GetOutputSlot(0).SetTensorInfo(outputInfo);
- batchNormLayer ->GetOutputSlot(0).SetTensorInfo(outputInfo);
+ // Set layer information
+ inputLayer ->GetOutputSlot(0).SetTensorInfo(inputInfo);
+ convLayer ->GetOutputSlot(0).SetTensorInfo(outputInfo);
+ batchNormLayer->GetOutputSlot(0).SetTensorInfo(outputInfo);
+
+ // Connect layers
+ inputLayer ->GetOutputSlot(0).Connect(convLayer->GetInputSlot(0));
+ convLayer ->GetOutputSlot(0).Connect(batchNormLayer->GetInputSlot(0));
+ batchNormLayer->GetOutputSlot(0).Connect(outputLayer->GetInputSlot(0));
+
+ if (preventFusing)
+ {
+ convLayer ->GetOutputSlot(0).Connect(output2Layer->GetInputSlot(0));
+ }
+
+ return network;
+}
+
+template <typename Conv2dTest,
+ armnn::DataType ArmnnType,
+ typename ConvDescriptorType = typename Conv2dTest::ConvDescriptorType,
+ typename ConvLayerType = typename Conv2dTest::ConvLayerType,
+ typename T = armnn::ResolveType<ArmnnType>>
+void FuseBatchNormIntoConvTest(bool depthwise, float tolerance, armnn::Compute backendId)
+{
+ // FIRST NETWORK: Fused
+ // Construct ArmNN network
+ INetworkPtr networkFused = CreatNetwork<Conv2dTest, ArmnnType>(depthwise, false);
// Create ArmNN runtime
- IRuntime::CreationOptions options; // default options
- IRuntimePtr run = IRuntime::Create(options);
+ IRuntimePtr run = IRuntime::Create(IRuntime::CreationOptions()); // default options
// Optimise ArmNN network
- IOptimizedNetworkPtr optNet = Optimize(*network, {Compute::CpuRef}, run->GetDeviceSpec());
+ IOptimizedNetworkPtr optNetFused = Optimize(*networkFused, {backendId}, run->GetDeviceSpec());
- // Load graph into runtime
- BOOST_TEST(run->LoadNetwork(networkIdentifier, std::move(optNet)) == Status::Success);
+ Graph graphFused = PolymorphicDowncast<OptimizedNetwork*>(optNetFused.get())->GetGraph();
+
+ auto checkFusedConv2d = [ ](const armnn::Layer* const layer) -> bool
+ {
+ return IsLayerOfType<ConvLayerType>(layer) &&
+ (layer->GetNameStr() == "fused-batchNorm-into-convolution");
+ };
+
+ BOOST_CHECK(3 == graphFused.GetNumLayers());
+ BOOST_TEST(CheckSequence(graphFused.cbegin(),
+ graphFused.cend(),
+ &IsLayerOfType<InputLayer>,
+ checkFusedConv2d,
+ &IsLayerOfType<OutputLayer>));
+
+ // Load network into runtime
+ NetworkId networkIdentifier;
+ BOOST_TEST(run->LoadNetwork(networkIdentifier, std::move(optNetFused)) == Status::Success);
//Creates structures for inputs and outputs.
- std::vector<float> inputData(inputSize, 128);
- std::vector<float> outputData(outputSize);
+ std::vector<T> inputDataFused = GetVector<T>(48, 1.0f, 0.1f);
+
+ std::vector<T> outputDataFused(36);
- InputTensors inputTensors {{0, ConstTensor(run->GetInputTensorInfo (networkIdentifier, 0), inputData.data())}};
- OutputTensors outputTensors{{0, Tensor(run->GetOutputTensorInfo(networkIdentifier, 0), outputData.data())}};
+ if (depthwise)
+ {
+ outputDataFused.resize(108);
+ }
+
+ InputTensors inputTensorsFused {
+ {0, ConstTensor(run->GetInputTensorInfo (networkIdentifier, 0), inputDataFused.data())}};
+ OutputTensors outputTensorsFused{
+ {0, Tensor(run->GetOutputTensorInfo(networkIdentifier, 0), outputDataFused.data())}};
// Execute network
- run->EnqueueWorkload(networkIdentifier, inputTensors, outputTensors);
+ run->EnqueueWorkload(networkIdentifier, inputTensorsFused, outputTensorsFused);
// SECOND NETWORK: NotFused
-
// Construct ArmNN network
- NetworkId networkIdentifierNotFused;
- INetworkPtr networkNotFused = INetwork::Create();
- IConnectableLayer *inputLayerNotFused = networkNotFused->AddInputLayer(0);
- IConnectableLayer *convLayerNotFused = networkNotFused->AddConvolution2dLayer(convolution2dDescriptor,
- weights,
- optionalBias,
- "convolution");
- IConnectableLayer *batchNormLayerNotFused = networkNotFused->AddBatchNormalizationLayer(batchNormDescriptor,
- mean,
- variance,
- beta,
- gamma,
- "batchNorm");
- IConnectableLayer *outputLayerNotFused = networkNotFused->AddOutputLayer(0);
- IConnectableLayer *output2LayerNotFused = networkNotFused->AddOutputLayer(1);
-
- inputLayerNotFused ->GetOutputSlot(0).Connect(convLayerNotFused ->GetInputSlot(0));
- convLayerNotFused ->GetOutputSlot(0).Connect(batchNormLayerNotFused->GetInputSlot(0));
- batchNormLayerNotFused ->GetOutputSlot(0).Connect(outputLayerNotFused ->GetInputSlot(0));
- convLayerNotFused ->GetOutputSlot(0).Connect(output2LayerNotFused ->GetInputSlot(0));
-
- //Set the tensors in the network.
- inputLayerNotFused ->GetOutputSlot(0).SetTensorInfo(inputInfo);
- convLayerNotFused ->GetOutputSlot(0).SetTensorInfo(outputInfo);
- batchNormLayerNotFused ->GetOutputSlot(0).SetTensorInfo(outputInfo);
+ INetworkPtr networkNotFused = CreatNetwork<Conv2dTest, ArmnnType>(depthwise, true);
// Create ArmNN runtime
- IRuntimePtr runNotFused = IRuntime::Create(options);
+ IRuntimePtr runNotFused = IRuntime::Create(IRuntime::CreationOptions()); // default options
// Optimise ArmNN network
- IOptimizedNetworkPtr optNetNotFused = Optimize(*networkNotFused, {Compute::CpuRef}, runNotFused->GetDeviceSpec());
+ IOptimizedNetworkPtr optNetNotFused = Optimize(*networkNotFused, {backendId}, runNotFused->GetDeviceSpec());
- // Load graph into runtime
+ Graph graphNotFused = PolymorphicDowncast<OptimizedNetwork*>(optNetNotFused.get())->GetGraph();
+
+ BOOST_CHECK(5 == graphNotFused.GetNumLayers());
+ BOOST_TEST(CheckSequence(graphNotFused.cbegin(),
+ graphNotFused.cend(),
+ &IsLayerOfType<armnn::InputLayer>,
+ &IsLayerOfType<ConvLayerType>,
+ &IsLayerOfType<armnn::BatchNormalizationLayer>,
+ &IsLayerOfType<armnn::OutputLayer>,
+ &IsLayerOfType<armnn::OutputLayer>));
+
+ // Load network into runtime
+ NetworkId networkIdentifierNotFused;
BOOST_TEST(runNotFused->LoadNetwork(networkIdentifierNotFused, std::move(optNetNotFused)) == Status::Success);
//Creates structures for inputs and outputs.
- std::vector<float> inputDataNotFused(inputSize, 128);
- std::vector<float> outputDataNotFused(outputSize);
- std::vector<float> outputData2NotFused(outputSize);
+ std::vector<T> inputDataNotFused = GetVector<T>(48, 1.0f, 0.1f);
+ std::vector<T> outputDataNotFused(36);
+ std::vector<T> outputData2NotFused(36);
+
+ if (depthwise)
+ {
+ outputDataNotFused.resize(108);
+ outputData2NotFused.resize(108);
+ }
InputTensors inputTensorsNotFused{
{0, ConstTensor(runNotFused->GetInputTensorInfo(networkIdentifierNotFused, 0), inputDataNotFused.data())}};
OutputTensors outputTensorsNotFused{
@@ -153,11 +269,33 @@ BOOST_AUTO_TEST_CASE(Fuse_batchNorm_into_Conv2D_Float32_Test)
runNotFused->EnqueueWorkload(networkIdentifierNotFused, inputTensorsNotFused, outputTensorsNotFused);
// Check the output of the fused-convolution matches with the output of the batchNormm in the "NotFused" network
- for (unsigned int n = 0; n < outputData.size(); ++n)
+ for (unsigned int n = 0; n < outputDataFused.size(); ++n)
{
- BOOST_CHECK_CLOSE(outputData[n], outputDataNotFused[n], 0.001);
+ BOOST_CHECK_CLOSE(outputDataFused[n], outputDataNotFused[n], T(tolerance));
}
}
+
+// This unit test needs the reference backend, it's not available if the reference backend is not built
+#if defined(ARMNNREF_ENABLED)
+BOOST_AUTO_TEST_CASE(FuseBatchNormIntoConv2DFloat32Test)
+{
+ FuseBatchNormIntoConvTest<Conv2dTest, DataType::Float32>(false, 0.0001f, armnn::Compute::CpuRef);
+}
+
+BOOST_AUTO_TEST_CASE(FuseBatchNormIntoConv2DFloat16Test)
+{
+ FuseBatchNormIntoConvTest<Conv2dTest, DataType::Float16>(false, 0.1f, armnn::Compute::CpuRef);
+}
+
+BOOST_AUTO_TEST_CASE(FuseBatchNormIntoDepthwiseConv2DFloat32Test)
+{
+ FuseBatchNormIntoConvTest<DepthwiseConv2dTest, DataType::Float32>(true, 0.0001f,armnn::Compute::CpuRef);
+}
+
+BOOST_AUTO_TEST_CASE(FuseBatchNormIntoDepthwiseConv2DFloat16Test)
+{
+ FuseBatchNormIntoConvTest<DepthwiseConv2dTest, DataType::Float16>(true, 0.1f,armnn::Compute::CpuRef);
+}
#endif
BOOST_AUTO_TEST_SUITE_END()