From 90231b8c9f680d323e4b93dcd0820a47925e6d24 Mon Sep 17 00:00:00 2001 From: Mike Kelly Date: Thu, 5 Nov 2020 15:44:56 +0000 Subject: IVGCVSW-5315 Create FuseBatchNorm class Signed-off-by: Teresa Charlin Signed-off-by: Mike Kelly Change-Id: Id0625c58dbeea79874bf986b70d136ed9390bf83 --- src/armnn/Network.cpp | 5 +- src/armnn/optimizations/FuseBatchNorm.hpp | 125 +++++--- src/armnn/test/OptimizerTests.cpp | 69 ++--- .../test/optimizations/FuseBatchNormTests.cpp | 326 +++++++++++++++------ 4 files changed, 354 insertions(+), 171 deletions(-) diff --git a/src/armnn/Network.cpp b/src/armnn/Network.cpp index 6578b8445f..347e39b4c8 100644 --- a/src/armnn/Network.cpp +++ b/src/armnn/Network.cpp @@ -1055,7 +1055,10 @@ IOptimizedNetworkPtr Optimize(const INetwork& inNetwork, FoldPadIntoConvolution2d(), PermuteAndBatchToSpaceAsDepthToSpace(), TransposeAndBatchToSpaceAsDepthToSpace(), - FuseBatchNormIntoConvolution2D())); + FuseBatchNormIntoConvolution2DFloat32(), + FuseBatchNormIntoConvolution2DFloat16(), + FuseBatchNormIntoDepthwiseConvolution2DFloat32(), + FuseBatchNormIntoDepthwiseConvolution2DFloat16())); // If Fp32 to Fp16 optimization is set convert Fp32 network to Fp16 if (options.m_ReduceFp32ToFp16) diff --git a/src/armnn/optimizations/FuseBatchNorm.hpp b/src/armnn/optimizations/FuseBatchNorm.hpp index e8e8c5d77f..9d25379930 100644 --- a/src/armnn/optimizations/FuseBatchNorm.hpp +++ b/src/armnn/optimizations/FuseBatchNorm.hpp @@ -7,13 +7,15 @@ #include "Optimization.hpp" #include +#include namespace armnn { namespace optimizations { -template +template > class FuseBatchNorm { public: @@ -27,10 +29,12 @@ public: Layer& base = connection.GetConnectedOutputSlot()->GetOwningLayer(); Layer& child = connection.GetOwningLayer(); - ARMNN_ASSERT(base.GetType() == LayerType::Convolution2d); + bool depthwise = (base.GetType() == LayerType::DepthwiseConvolution2d); + + ARMNN_ASSERT(base.GetType() == LayerType::Convolution2d || depthwise); ARMNN_ASSERT(child.GetType() == LayerType::BatchNormalization); - if (base.GetDataType() == DataType::Float32 && child.GetDataType() == DataType::Float32) + if (base.GetDataType() == ArmnnType && child.GetDataType() == ArmnnType) { OutputSlot* parentOut = base.GetInputSlot(0).GetConnectedOutputSlot(); auto convLayer = PolymorphicDowncast(&base); @@ -47,58 +51,92 @@ public: ConstTensor varTensor(batchNormLayer->m_Variance->GetTensorInfo(), batchNormLayer->m_Variance->Map(true)); auto convDescriptor = convLayer->GetParameters(); - ConstTensor weightsTensor(convLayer->m_Weight->GetTensorInfo(), convLayer->m_Weight->Map(true)); + auto weightsInfo(convLayer->m_Weight->GetTensorInfo()); + ConstTensor weightsTensor(weightsInfo, convLayer->m_Weight->Map(true)); armnnUtils::DataLayoutIndexed dataLayout(convDescriptor.m_DataLayout); - auto weightsShape = convLayer->m_Weight->GetTensorInfo().GetShape(); - const unsigned int outputChannels = weightsShape[0]; - const unsigned int inputChannels = weightsShape[dataLayout.GetChannelsIndex()]; - const unsigned int weightsHeight = weightsShape[dataLayout.GetHeightIndex()]; - const unsigned int weightsWidth = weightsShape[dataLayout.GetWidthIndex()]; - - const auto* weightsBuffer = static_cast(weightsTensor.GetMemoryArea()); - const auto* betaBuffer = static_cast(betaTensor.GetMemoryArea()); - const auto* gammaBuffer = static_cast(gammaTensor.GetMemoryArea()); - const auto* meanBuffer = static_cast(meanTensor.GetMemoryArea()); - const auto* varBuffer = static_cast(varTensor.GetMemoryArea()); - - std::vector weightsVector (weightsBuffer, weightsBuffer + weightsTensor.GetNumElements()); - std::vector betaVector (betaBuffer, betaBuffer + betaTensor.GetNumElements()); - std::vector gammaVector (gammaBuffer, gammaBuffer + gammaTensor.GetNumElements()); - std::vector meanVector (meanBuffer, meanBuffer + meanTensor.GetNumElements()); - std::vector varianceVector(varBuffer, varBuffer + varTensor.GetNumElements()); + auto weightsShape = weightsInfo.GetShape(); + const unsigned int depthMultiplier = depthwise ? weightsShape[0] : 1; + const unsigned int inputChannels = depthwise ? weightsShape[1] : + weightsShape[dataLayout.GetChannelsIndex()]; + const unsigned int outputChannels = depthwise ? inputChannels * depthMultiplier : weightsShape[0]; + const unsigned int weightsHeight = depthwise ? weightsShape[2] : + weightsShape[dataLayout.GetHeightIndex()]; + const unsigned int weightsWidth = depthwise ? weightsShape[3] : + weightsShape[dataLayout.GetWidthIndex()]; + + const auto* weightsBuffer = static_cast(weightsTensor.GetMemoryArea()); + const auto* betaBuffer = static_cast(betaTensor.GetMemoryArea()); + const auto* gammaBuffer = static_cast(gammaTensor.GetMemoryArea()); + const auto* meanBuffer = static_cast(meanTensor.GetMemoryArea()); + const auto* varBuffer = static_cast(varTensor.GetMemoryArea()); + + std::vector weightsVector (weightsBuffer, weightsBuffer + weightsTensor.GetNumElements()); + std::vector betaVector (betaBuffer, betaBuffer + betaTensor.GetNumElements()); + std::vector gammaVector (gammaBuffer, gammaBuffer + gammaTensor.GetNumElements()); + std::vector meanVector (meanBuffer, meanBuffer + meanTensor.GetNumElements()); + std::vector varianceVector(varBuffer, varBuffer + varTensor.GetNumElements()); // fusedWeights = ( gamma * weights ) / ( std - epsilon); - std::vector fusedWeightsVector(weightsVector.size()); + std::vector fusedWeightsVector(weightsVector.size()); + unsigned int depthwiseMultiplierIdx = 0; - unsigned int i = 0; - for (unsigned int cOut = 0; cOut < outputChannels; ++cOut) + for (unsigned int cInput = 0; cInput < inputChannels; ++cInput) { - auto mult = gammaVector[cOut] / sqrtf (varianceVector[cOut] + epsilon); - for (unsigned int cInput = 0; cInput < inputChannels; ++cInput) + for (unsigned int cOut = 0; cOut < outputChannels; ++cOut) { + T mult = gammaVector[cOut] / static_cast(sqrtf (varianceVector[cOut] + epsilon)); + + if (depthwise) + { + cInput = cOut / depthMultiplier; + depthwiseMultiplierIdx = cOut % depthMultiplier; + } + for (unsigned int h = 0; h < weightsHeight; ++h) { for (unsigned int w = 0; w < weightsWidth; ++w) { - fusedWeightsVector[i] = mult * weightsVector[i]; - i++; + unsigned int weightsIdx = 0; + + if (depthwise) + { + weightsIdx = depthwiseMultiplierIdx * weightsWidth * weightsHeight * inputChannels + + cInput * weightsWidth * weightsHeight + + h * weightsWidth + + w; + } + else if (convDescriptor.m_DataLayout == DataLayout::NHWC) + { + weightsIdx = cOut * weightsHeight * weightsWidth * inputChannels + + h * weightsWidth * inputChannels + + w * inputChannels + + cInput; + } + else + { + weightsIdx = cOut * weightsWidth * weightsHeight * inputChannels + + cInput * weightsWidth * weightsHeight + + h * weightsWidth + + w; + } + fusedWeightsVector[weightsIdx] = mult * weightsVector[weightsIdx]; } } } } - ConstTensor fusedWeightsTensor(convLayer->m_Weight->GetTensorInfo(), fusedWeightsVector); + ConstTensor fusedWeightsTensor(weightsInfo, fusedWeightsVector); // fusedBias = (gamma * (bias - mean)) / (variance - epsilon) + beta; - std::vector fusedBiasVector(outputChannels); + std::vector fusedBiasVector(outputChannels); if (convDescriptor.m_BiasEnabled) { ARMNN_ASSERT_MSG(convLayer->m_Bias != nullptr, "FuseBatchNorm: Bias data should not be null if bias is enabled."); ConstTensor biasTensor(convLayer->m_Bias->GetTensorInfo(), convLayer->m_Bias->Map(true)); - const auto* biasBuffer = static_cast(biasTensor.GetMemoryArea()); - std::vector biasVector(biasBuffer, biasBuffer + biasTensor.GetNumElements()); + const auto* biasBuffer = static_cast(biasTensor.GetMemoryArea()); + std::vector biasVector(biasBuffer, biasBuffer + biasTensor.GetNumElements()); for (unsigned int cOut = 0; cOut < outputChannels; ++cOut) { @@ -109,7 +147,7 @@ public: else { convDescriptor.m_BiasEnabled = true; - std::vector biasVector(outputChannels, 0); + std::vector biasVector(outputChannels, T(0)); for (unsigned int cOut = 0; cOut < outputChannels; ++cOut) { @@ -117,7 +155,7 @@ public: sqrtf(varianceVector[cOut] + epsilon)) + betaVector[cOut]; } } - ConstTensor fusedBiasTensor(TensorInfo({outputChannels}, DataType::Float32), fusedBiasVector); + ConstTensor fusedBiasTensor(TensorInfo({outputChannels}, ArmnnType), fusedBiasVector); // Insert the new convolution layer that has batch norm parameters fused into const std::string name = std::string("fused-") + child.GetName() + std::string("-into-") + base.GetName(); @@ -143,10 +181,25 @@ protected: ~FuseBatchNorm() = default; }; -using FuseBatchNormIntoConvolution2D = +using FuseBatchNormIntoConvolution2DFloat32 = OptimizeForExclusiveConnection>; + FuseBatchNorm>; + +using FuseBatchNormIntoConvolution2DFloat16 = + OptimizeForExclusiveConnection>; + +using FuseBatchNormIntoDepthwiseConvolution2DFloat32 = + OptimizeForExclusiveConnection>; + +using FuseBatchNormIntoDepthwiseConvolution2DFloat16 = + OptimizeForExclusiveConnection>; } // namespace optimizations } // namespace armnn \ No newline at end of file diff --git a/src/armnn/test/OptimizerTests.cpp b/src/armnn/test/OptimizerTests.cpp index 8845dae6c8..0179589bf4 100644 --- a/src/armnn/test/OptimizerTests.cpp +++ b/src/armnn/test/OptimizerTests.cpp @@ -255,8 +255,6 @@ BOOST_AUTO_TEST_CASE(InsertConvertersTest) &IsLayerOfType)); } - - void CreateConvolution2dGraph(Graph &graph, const unsigned int* inputShape, const unsigned int* weightsShape, const unsigned int* outputShape, DataLayout dataLayout = DataLayout::NCHW) @@ -308,8 +306,8 @@ BOOST_AUTO_TEST_CASE(Conv2dValidateTensorShapesFromInputsNhwc) } void CreateDepthwiseConvolution2dGraph(Graph &graph, const unsigned int* inputShape, - const unsigned int* weightsShape, const unsigned int* outputShape, - DataLayout dataLayout = DataLayout::NCHW) + const unsigned int* weightsShape, const unsigned int* outputShape, + DataLayout dataLayout = DataLayout::NCHW) { armnn::TensorInfo inputInfo(4, inputShape, DataType::Float32); armnn::TensorInfo outputInfo(4, outputShape, DataType::Float32); @@ -357,7 +355,7 @@ BOOST_AUTO_TEST_CASE(DepthwiseConv2dValidateTensorShapesFromInputsNhwc) BOOST_CHECK_NO_THROW(graph.InferTensorInfos()); } -void CreatePooling2dGraph(Graph &graph, const unsigned int* inputShape, const unsigned int* outputShape, +void CreatePooling2dGraph(Graph& graph, const unsigned int* inputShape, const unsigned int* outputShape, DataLayout dataLayout = DataLayout::NCHW) { armnn::TensorInfo inputInfo(4, inputShape, DataType::Float32); @@ -405,7 +403,7 @@ BOOST_AUTO_TEST_CASE(Pooling2dValidateTensorShapesFromInputsNhwc) BOOST_CHECK_NO_THROW(graph.InferTensorInfos()); } -void CreateResizeBilinearGraph(Graph &graph, const unsigned int* inputShape, const unsigned int* outputShape, +void CreateResizeBilinearGraph(Graph& graph, const unsigned int* inputShape, const unsigned int* outputShape, DataLayout dataLayout = DataLayout::NCHW) { TensorInfo inputInfo(4, inputShape, DataType::Float32); @@ -448,7 +446,6 @@ BOOST_AUTO_TEST_CASE(ResizeBilinearValidateTensorShapesFromInputsNhwc) BOOST_CHECK_NO_THROW(graph.InferTensorInfos()); } - void CreateGatherGraph(Graph& graph, const armnn::TensorInfo& paramsInfo, const armnn::TensorInfo& indicesInfo, const armnn::TensorInfo& outputInfo) { @@ -547,7 +544,6 @@ BOOST_AUTO_TEST_CASE(FoldPadLayerIntoConvolution2dLayer) const unsigned int weightsShape[] = { 1, 2, 3, 3 }; const unsigned int outputShape[] = { 1, 2, 1, 1 }; - armnn::TensorInfo inputInfo(4, inputShape, DataType::Float32); armnn::TensorInfo paddedInfo(4, paddedShape, DataType::Float32); armnn::TensorInfo outputInfo(4, outputShape, DataType::Float32); @@ -628,9 +624,6 @@ BOOST_AUTO_TEST_CASE(FoldPadLayerIntoConvolution2dLayer) &IsLayerOfType)); } - - - class MockLayerSupport : public LayerSupportBase { public: bool IsInputSupported(const TensorInfo& /*input*/, @@ -686,7 +679,6 @@ public: }; }; - BOOST_AUTO_TEST_CASE(BackendHintTest) { class TestBackendAssignment : public LayerVisitorBase @@ -764,7 +756,6 @@ BOOST_AUTO_TEST_CASE(BackendHintTest) input->GetOutputSlot(0).Connect(act->GetInputSlot(0)); act->GetOutputSlot(0).Connect(output->GetInputSlot(0)); - auto optNet = IOptimizedNetworkPtr(new OptimizedNetwork(std::move(graph)), &IOptimizedNetwork::Destroy); OptimizedNetwork* optNetObjPtr = PolymorphicDowncast(optNet.get()); @@ -772,7 +763,6 @@ BOOST_AUTO_TEST_CASE(BackendHintTest) // Get the optimized graph Graph& optGraph = optNetObjPtr->GetGraph(); - std::vector prefs{"MockBackend", "CustomBackend"}; BackendIdSet availableBackends = {"CustomBackend", "MockBackend"}; @@ -799,13 +789,13 @@ BOOST_AUTO_TEST_CASE(BackendHintTest) } // Tests that OptimizeForExclusiveConnections works, fusing when needed, using BatchNorm fusing as example -BOOST_AUTO_TEST_CASE(OptimizeForExclusiveConnections_fuse_Test) +BOOST_AUTO_TEST_CASE(OptimizeForExclusiveConnectionsFuseTest) { using namespace armnn; // Define layers information Convolution2dDescriptor convolution2dDescriptor; convolution2dDescriptor.m_BiasEnabled = false; - convolution2dDescriptor.m_DataLayout = DataLayout::NHWC; + convolution2dDescriptor.m_DataLayout = DataLayout::NHWC; BatchNormalizationDescriptor batchNormDescriptor; batchNormDescriptor.m_DataLayout = DataLayout::NHWC; @@ -814,32 +804,31 @@ BOOST_AUTO_TEST_CASE(OptimizeForExclusiveConnections_fuse_Test) const unsigned int outputDimensionSizes[] = {1, 3, 3, 1}; // NHWCout const unsigned int outputChannelSize[] = {outputDimensionSizes[3]}; // Cout - TensorInfo inputInfo (4, inputDimensionSizes, DataType::Float32); + TensorInfo inputInfo(4, inputDimensionSizes, DataType::Float32); TensorInfo outputInfo(4, outputDimensionSizes, DataType::Float32); - std::vector weightsVector = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12}; - ConstTensor weights (TensorInfo(4, weightsDimensionSizes, DataType::Float32), weightsVector); - + std::vector weightsVector = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12}; + ConstTensor weights(TensorInfo(4, weightsDimensionSizes, DataType::Float32), weightsVector); std::vector betaVector = {0.1f}; std::vector gammaVector = {0.5f}; std::vector meanVector = {0}; std::vector varianceVector = {1}; - ConstTensor beta (TensorInfo(1, outputChannelSize, DataType::Float32), betaVector); - ConstTensor gamma (TensorInfo(1, outputChannelSize, DataType::Float32), gammaVector); - ConstTensor mean (TensorInfo(1, outputChannelSize, DataType::Float32), meanVector); - ConstTensor variance(TensorInfo(1, outputChannelSize, DataType::Float32), varianceVector); + ConstTensor beta(TensorInfo(1, outputChannelSize, DataType::Float32), betaVector); + ConstTensor gamma(TensorInfo(1, outputChannelSize, DataType::Float32), gammaVector); + ConstTensor mean(TensorInfo(1, outputChannelSize, DataType::Float32), meanVector); + ConstTensor variance(TensorInfo(1, outputChannelSize, DataType::Float32), varianceVector); // Define the network Graph graph; - auto input = graph.AddLayer(0, "input"); - auto conv = graph.AddLayer(convolution2dDescriptor, "convolution"); - auto batchNorm = graph.AddLayer(batchNormDescriptor, "batchNorm"); - auto output = graph.AddLayer(0, "output"); + auto input = graph.AddLayer(0, "input"); + auto conv = graph.AddLayer(convolution2dDescriptor, "convolution"); + auto batchNorm = graph.AddLayer(batchNormDescriptor, "batchNorm"); + auto output = graph.AddLayer(0, "output"); // Set layer information - input ->GetOutputSlot().SetTensorInfo(inputInfo); - conv ->GetOutputSlot().SetTensorInfo(outputInfo); + input->GetOutputSlot().SetTensorInfo(inputInfo); + conv->GetOutputSlot().SetTensorInfo(outputInfo); batchNorm->GetOutputSlot().SetTensorInfo(outputInfo); conv ->m_Weight = std::make_unique(weights); batchNorm->m_Beta = std::make_unique(beta); @@ -849,8 +838,8 @@ BOOST_AUTO_TEST_CASE(OptimizeForExclusiveConnections_fuse_Test) if (convolution2dDescriptor.m_BiasEnabled) { std::vector biasVector = {11}; - ConstTensor bias (TensorInfo(1, outputChannelSize, DataType::Float32), biasVector); - conv->m_Bias = std::make_unique(bias); + ConstTensor bias(TensorInfo(1, outputChannelSize, DataType::Float32), biasVector); + conv->m_Bias = std::make_unique(bias); } // Connect layers @@ -867,12 +856,12 @@ BOOST_AUTO_TEST_CASE(OptimizeForExclusiveConnections_fuse_Test) &IsLayerOfType)); // Optimize graph - armnn::Optimizer::Pass(graph, MakeOptimizations(FuseBatchNormIntoConvolution2D())); + armnn::Optimizer::Pass(graph, MakeOptimizations(FuseBatchNormIntoConvolution2DFloat32())); - auto checkFusedConv2d = [ ](const armnn::Layer* const layer) -> bool + auto checkFusedConv2d = [](const armnn::Layer* const layer)->bool { return IsLayerOfType(layer) && - (layer->GetNameStr() == "fused-batchNorm-into-convolution"); + (layer->GetNameStr() == "fused-batchNorm-into-convolution"); }; BOOST_CHECK(3 == graph.GetNumLayers()); @@ -884,11 +873,11 @@ BOOST_AUTO_TEST_CASE(OptimizeForExclusiveConnections_fuse_Test) } // Tests that OptimizeForExclusiveConnections works, not fusing when not needed, using BatchNorm fusing as example -BOOST_AUTO_TEST_CASE(OptimizeForExclusiveConnections_notFuse_Test) +BOOST_AUTO_TEST_CASE(OptimizeForExclusiveConnectionsWithoutFuseTest) { // Define the network - Graph graph; - Convolution2dDescriptor convolution2dDescriptor; + Graph graph; + Convolution2dDescriptor convolution2dDescriptor; BatchNormalizationDescriptor batchNormDescriptor; auto input = graph.AddLayer(0, "input"); @@ -912,7 +901,7 @@ BOOST_AUTO_TEST_CASE(OptimizeForExclusiveConnections_notFuse_Test) &IsLayerOfType, &IsLayerOfType)); // Optimize graph - armnn::Optimizer::Pass(graph, armnn::MakeOptimizations(FuseBatchNormIntoConvolution2D())); + armnn::Optimizer::Pass(graph, armnn::MakeOptimizations(FuseBatchNormIntoConvolution2DFloat32())); BOOST_CHECK(5 == graph.GetNumLayers()); BOOST_TEST(CheckSequence(graph.cbegin(), @@ -923,4 +912,4 @@ BOOST_AUTO_TEST_CASE(OptimizeForExclusiveConnections_notFuse_Test) &IsLayerOfType, &IsLayerOfType)); } -BOOST_AUTO_TEST_SUITE_END() +BOOST_AUTO_TEST_SUITE_END() \ No newline at end of file diff --git a/src/armnn/test/optimizations/FuseBatchNormTests.cpp b/src/armnn/test/optimizations/FuseBatchNormTests.cpp index 74cb8f96b7..bf47c577a4 100644 --- a/src/armnn/test/optimizations/FuseBatchNormTests.cpp +++ b/src/armnn/test/optimizations/FuseBatchNormTests.cpp @@ -4,17 +4,79 @@ // #include "LayersFwd.hpp" + +#include +#include +#include +#include + #include -BOOST_AUTO_TEST_SUITE(Optimizer) using namespace armnn; -// This unit test needs the reference backend, it's not available if the reference backend is not built -#if defined(ARMNNREF_ENABLED) -BOOST_AUTO_TEST_CASE(Fuse_batchNorm_into_Conv2D_Float32_Test) +BOOST_AUTO_TEST_SUITE(Optimizer) + +namespace +{ + +class Conv2dTest +{ +public: + using ConvDescriptorType = armnn::Convolution2dDescriptor; + using ConvLayerType = armnn::Convolution2dLayer; + + static IConnectableLayer *AddConvolution(INetwork *network, + const Convolution2dDescriptor &descriptor, + const ConstTensor &weights, + const Optional &biases, + const char *name) + { + return network->AddConvolution2dLayer(descriptor, weights, biases, name); + } +}; + +class DepthwiseConv2dTest +{ +public: + using ConvDescriptorType = armnn::DepthwiseConvolution2dDescriptor; + using ConvLayerType = armnn::DepthwiseConvolution2dLayer; + + static IConnectableLayer *AddConvolution(INetwork *network, + const DepthwiseConvolution2dDescriptor &descriptor, + const ConstTensor &weights, + const Optional &biases, + const char *name) + { + return network->AddDepthwiseConvolution2dLayer(descriptor, weights, biases, name); + } +}; + +template +std::vector GetVector(unsigned int size, float initial, float increment) +{ + std::vector typeVector(size, initial); + std::vector vector(size); + + if (size > 1) + { + for (unsigned int i = 0; i < size; ++i) + { + vector[i] = T(initial + (increment * static_cast(i))); + } + } + return vector; +} + +} // namespace + +template > +INetworkPtr CreatNetwork(bool depthwise, bool preventFusing) { // Define layers information - Convolution2dDescriptor convolution2dDescriptor; + ConvDescriptorType convolution2dDescriptor; convolution2dDescriptor.m_BiasEnabled = false; convolution2dDescriptor.m_DataLayout = DataLayout::NHWC; convolution2dDescriptor.m_StrideX = 1; @@ -22,127 +84,181 @@ BOOST_AUTO_TEST_CASE(Fuse_batchNorm_into_Conv2D_Float32_Test) BatchNormalizationDescriptor batchNormDescriptor; batchNormDescriptor.m_DataLayout = DataLayout::NHWC; - const unsigned int inputDimensionSizes[] = {1, 4, 4, 3}; // NHWCin - const unsigned int weightsDimensionSizes[] = {4, 2, 2, 3}; // CoutHWCin - const unsigned int outputDimensionSizes[] = {1, 3, 3, 4}; // NHWCout - const unsigned int outputChannelSize[] = {outputDimensionSizes[3]}; // Cout - - TensorInfo inputInfo (4, inputDimensionSizes, DataType::Float32); - TensorInfo outputInfo(4, outputDimensionSizes, DataType::Float32); - - std::vector weightsVector = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, - 11, 12, 13, 14, 15, 16, 17, 18, 19, 110, 111, 112, - 21, 22, 23, 24, 25, 26, 27, 28, 29, 210, 211, 212, - 31, 32, 33, 34, 35, 36, 37, 38, 39, 310, 311, 312}; - TensorInfo weightsInfo(4, weightsDimensionSizes, DataType::Float32); - ConstTensor weights (weightsInfo, weightsVector); - std::vector biasVector = {3.3f, 3.2f, 3.1f, 3.0f}; - TensorInfo biasInfo(1, outputChannelSize, DataType::Float32); - ConstTensor bias (biasInfo, biasVector); - Optional optionalBias = Optional(bias); + const unsigned int inputDimensionSizes[] = {1, 4, 4, 3}; // NHWCin + unsigned int weightsDimensionSizes[] = {4, 2, 2, 3}; // CoutHWCin + unsigned int outputDimensionSizes[] = {1, 3, 3, 4}; // NHWCout - std::vector betaVector = {0.0f, 0.2f, 0.3f, 0.4f}; - std::vector gammaVector = {0.5f, 0.6f, 0.7f, 0.8f}; - std::vector meanVector = {0.1f, 0.2f, 0.3f, 0.4f}; - std::vector varianceVector = {1.0f, 1.1f, 1.2f, 1.3f}; - ConstTensor beta (TensorInfo(1, outputChannelSize, DataType::Float32), betaVector); - ConstTensor gamma (TensorInfo(1, outputChannelSize, DataType::Float32), gammaVector); - ConstTensor mean (TensorInfo(1, outputChannelSize, DataType::Float32), meanVector); - ConstTensor variance(TensorInfo(1, outputChannelSize, DataType::Float32), varianceVector); + if (depthwise) + { + //M Cin H W + weightsDimensionSizes[0] = 4; + weightsDimensionSizes[1] = 3; + weightsDimensionSizes[2] = 2; + weightsDimensionSizes[3] = 2; + outputDimensionSizes[3] = weightsDimensionSizes[0] * weightsDimensionSizes[1]; + } + const unsigned int outputChannelSize[] = {outputDimensionSizes[3]}; // Cout - auto inputSize = inputDimensionSizes[0]*inputDimensionSizes[1]*inputDimensionSizes[2]*inputDimensionSizes[3]; - auto outputSize = outputDimensionSizes[0]*outputDimensionSizes[1]*outputDimensionSizes[2]*outputDimensionSizes[3]; + TensorInfo inputInfo(4, inputDimensionSizes, ArmnnType); + TensorInfo outputInfo(4, outputDimensionSizes, ArmnnType); - // FIRST NETWORK: Fused + std::vector weightsIntVector = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, + 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, + 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, + 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42}; + std::vector weightsVector(begin(weightsIntVector), end(weightsIntVector)); + TensorInfo weightsInfo(4, weightsDimensionSizes, ArmnnType); + ConstTensor weights(weightsInfo, weightsVector); - // Construct ArmNN network - NetworkId networkIdentifier; + std::vector biasVector = GetVector(outputDimensionSizes[3], 3.3f, 0.1f); + TensorInfo biasInfo(1, outputChannelSize, ArmnnType); + ConstTensor bias(biasInfo, biasVector); + Optional optionalBias = Optional(bias); + + std::vector betaVector = GetVector(outputDimensionSizes[3], 0.0f, 0.2f); + std::vector gammaVector = GetVector(outputDimensionSizes[3], 0.5f, 0.1f); + std::vector meanVector = GetVector(outputDimensionSizes[3], 0.1f, 0.1f); + std::vector varianceVector = GetVector(outputDimensionSizes[3], 1.0f, 0.1f); + + ConstTensor beta (TensorInfo(1, outputChannelSize, ArmnnType), betaVector); + ConstTensor gamma (TensorInfo(1, outputChannelSize, ArmnnType), gammaVector); + ConstTensor mean (TensorInfo(1, outputChannelSize, ArmnnType), meanVector); + ConstTensor variance(TensorInfo(1, outputChannelSize, ArmnnType), varianceVector); + + // Create a network INetworkPtr network = INetwork::Create(); - IConnectableLayer *inputLayer = network->AddInputLayer(0); - IConnectableLayer *convLayer = network->AddConvolution2dLayer(convolution2dDescriptor, - weights, - optionalBias, - "convolution"); - IConnectableLayer *batchNormLayer = network->AddBatchNormalizationLayer(batchNormDescriptor, + + IConnectableLayer* inputLayer = network->AddInputLayer(0); + + IConnectableLayer* convLayer = Conv2dTest::AddConvolution(network.get(), + convolution2dDescriptor, + weights, + optionalBias, + "convolution"); + + IConnectableLayer* batchNormLayer = network->AddBatchNormalizationLayer(batchNormDescriptor, mean, variance, beta, gamma, "batchNorm"); - IConnectableLayer *outputLayer = network->AddOutputLayer(0); - inputLayer ->GetOutputSlot(0).Connect(convLayer ->GetInputSlot(0)); - convLayer ->GetOutputSlot(0).Connect(batchNormLayer->GetInputSlot(0)); - batchNormLayer ->GetOutputSlot(0).Connect(outputLayer ->GetInputSlot(0)); + IConnectableLayer* outputLayer = network->AddOutputLayer(0); + IConnectableLayer* output2Layer = nullptr; + + if (preventFusing) + { + output2Layer = network->AddOutputLayer(1); + } - //Set the tensors in the network. - inputLayer ->GetOutputSlot(0).SetTensorInfo(inputInfo); - convLayer ->GetOutputSlot(0).SetTensorInfo(outputInfo); - batchNormLayer ->GetOutputSlot(0).SetTensorInfo(outputInfo); + // Set layer information + inputLayer ->GetOutputSlot(0).SetTensorInfo(inputInfo); + convLayer ->GetOutputSlot(0).SetTensorInfo(outputInfo); + batchNormLayer->GetOutputSlot(0).SetTensorInfo(outputInfo); + + // Connect layers + inputLayer ->GetOutputSlot(0).Connect(convLayer->GetInputSlot(0)); + convLayer ->GetOutputSlot(0).Connect(batchNormLayer->GetInputSlot(0)); + batchNormLayer->GetOutputSlot(0).Connect(outputLayer->GetInputSlot(0)); + + if (preventFusing) + { + convLayer ->GetOutputSlot(0).Connect(output2Layer->GetInputSlot(0)); + } + + return network; +} + +template > +void FuseBatchNormIntoConvTest(bool depthwise, float tolerance, armnn::Compute backendId) +{ + // FIRST NETWORK: Fused + // Construct ArmNN network + INetworkPtr networkFused = CreatNetwork(depthwise, false); // Create ArmNN runtime - IRuntime::CreationOptions options; // default options - IRuntimePtr run = IRuntime::Create(options); + IRuntimePtr run = IRuntime::Create(IRuntime::CreationOptions()); // default options // Optimise ArmNN network - IOptimizedNetworkPtr optNet = Optimize(*network, {Compute::CpuRef}, run->GetDeviceSpec()); + IOptimizedNetworkPtr optNetFused = Optimize(*networkFused, {backendId}, run->GetDeviceSpec()); - // Load graph into runtime - BOOST_TEST(run->LoadNetwork(networkIdentifier, std::move(optNet)) == Status::Success); + Graph graphFused = PolymorphicDowncast(optNetFused.get())->GetGraph(); + + auto checkFusedConv2d = [ ](const armnn::Layer* const layer) -> bool + { + return IsLayerOfType(layer) && + (layer->GetNameStr() == "fused-batchNorm-into-convolution"); + }; + + BOOST_CHECK(3 == graphFused.GetNumLayers()); + BOOST_TEST(CheckSequence(graphFused.cbegin(), + graphFused.cend(), + &IsLayerOfType, + checkFusedConv2d, + &IsLayerOfType)); + + // Load network into runtime + NetworkId networkIdentifier; + BOOST_TEST(run->LoadNetwork(networkIdentifier, std::move(optNetFused)) == Status::Success); //Creates structures for inputs and outputs. - std::vector inputData(inputSize, 128); - std::vector outputData(outputSize); + std::vector inputDataFused = GetVector(48, 1.0f, 0.1f); + + std::vector outputDataFused(36); - InputTensors inputTensors {{0, ConstTensor(run->GetInputTensorInfo (networkIdentifier, 0), inputData.data())}}; - OutputTensors outputTensors{{0, Tensor(run->GetOutputTensorInfo(networkIdentifier, 0), outputData.data())}}; + if (depthwise) + { + outputDataFused.resize(108); + } + + InputTensors inputTensorsFused { + {0, ConstTensor(run->GetInputTensorInfo (networkIdentifier, 0), inputDataFused.data())}}; + OutputTensors outputTensorsFused{ + {0, Tensor(run->GetOutputTensorInfo(networkIdentifier, 0), outputDataFused.data())}}; // Execute network - run->EnqueueWorkload(networkIdentifier, inputTensors, outputTensors); + run->EnqueueWorkload(networkIdentifier, inputTensorsFused, outputTensorsFused); // SECOND NETWORK: NotFused - // Construct ArmNN network - NetworkId networkIdentifierNotFused; - INetworkPtr networkNotFused = INetwork::Create(); - IConnectableLayer *inputLayerNotFused = networkNotFused->AddInputLayer(0); - IConnectableLayer *convLayerNotFused = networkNotFused->AddConvolution2dLayer(convolution2dDescriptor, - weights, - optionalBias, - "convolution"); - IConnectableLayer *batchNormLayerNotFused = networkNotFused->AddBatchNormalizationLayer(batchNormDescriptor, - mean, - variance, - beta, - gamma, - "batchNorm"); - IConnectableLayer *outputLayerNotFused = networkNotFused->AddOutputLayer(0); - IConnectableLayer *output2LayerNotFused = networkNotFused->AddOutputLayer(1); - - inputLayerNotFused ->GetOutputSlot(0).Connect(convLayerNotFused ->GetInputSlot(0)); - convLayerNotFused ->GetOutputSlot(0).Connect(batchNormLayerNotFused->GetInputSlot(0)); - batchNormLayerNotFused ->GetOutputSlot(0).Connect(outputLayerNotFused ->GetInputSlot(0)); - convLayerNotFused ->GetOutputSlot(0).Connect(output2LayerNotFused ->GetInputSlot(0)); - - //Set the tensors in the network. - inputLayerNotFused ->GetOutputSlot(0).SetTensorInfo(inputInfo); - convLayerNotFused ->GetOutputSlot(0).SetTensorInfo(outputInfo); - batchNormLayerNotFused ->GetOutputSlot(0).SetTensorInfo(outputInfo); + INetworkPtr networkNotFused = CreatNetwork(depthwise, true); // Create ArmNN runtime - IRuntimePtr runNotFused = IRuntime::Create(options); + IRuntimePtr runNotFused = IRuntime::Create(IRuntime::CreationOptions()); // default options // Optimise ArmNN network - IOptimizedNetworkPtr optNetNotFused = Optimize(*networkNotFused, {Compute::CpuRef}, runNotFused->GetDeviceSpec()); + IOptimizedNetworkPtr optNetNotFused = Optimize(*networkNotFused, {backendId}, runNotFused->GetDeviceSpec()); - // Load graph into runtime + Graph graphNotFused = PolymorphicDowncast(optNetNotFused.get())->GetGraph(); + + BOOST_CHECK(5 == graphNotFused.GetNumLayers()); + BOOST_TEST(CheckSequence(graphNotFused.cbegin(), + graphNotFused.cend(), + &IsLayerOfType, + &IsLayerOfType, + &IsLayerOfType, + &IsLayerOfType, + &IsLayerOfType)); + + // Load network into runtime + NetworkId networkIdentifierNotFused; BOOST_TEST(runNotFused->LoadNetwork(networkIdentifierNotFused, std::move(optNetNotFused)) == Status::Success); //Creates structures for inputs and outputs. - std::vector inputDataNotFused(inputSize, 128); - std::vector outputDataNotFused(outputSize); - std::vector outputData2NotFused(outputSize); + std::vector inputDataNotFused = GetVector(48, 1.0f, 0.1f); + std::vector outputDataNotFused(36); + std::vector outputData2NotFused(36); + + if (depthwise) + { + outputDataNotFused.resize(108); + outputData2NotFused.resize(108); + } InputTensors inputTensorsNotFused{ {0, ConstTensor(runNotFused->GetInputTensorInfo(networkIdentifierNotFused, 0), inputDataNotFused.data())}}; OutputTensors outputTensorsNotFused{ @@ -153,11 +269,33 @@ BOOST_AUTO_TEST_CASE(Fuse_batchNorm_into_Conv2D_Float32_Test) runNotFused->EnqueueWorkload(networkIdentifierNotFused, inputTensorsNotFused, outputTensorsNotFused); // Check the output of the fused-convolution matches with the output of the batchNormm in the "NotFused" network - for (unsigned int n = 0; n < outputData.size(); ++n) + for (unsigned int n = 0; n < outputDataFused.size(); ++n) { - BOOST_CHECK_CLOSE(outputData[n], outputDataNotFused[n], 0.001); + BOOST_CHECK_CLOSE(outputDataFused[n], outputDataNotFused[n], T(tolerance)); } } + +// This unit test needs the reference backend, it's not available if the reference backend is not built +#if defined(ARMNNREF_ENABLED) +BOOST_AUTO_TEST_CASE(FuseBatchNormIntoConv2DFloat32Test) +{ + FuseBatchNormIntoConvTest(false, 0.0001f, armnn::Compute::CpuRef); +} + +BOOST_AUTO_TEST_CASE(FuseBatchNormIntoConv2DFloat16Test) +{ + FuseBatchNormIntoConvTest(false, 0.1f, armnn::Compute::CpuRef); +} + +BOOST_AUTO_TEST_CASE(FuseBatchNormIntoDepthwiseConv2DFloat32Test) +{ + FuseBatchNormIntoConvTest(true, 0.0001f,armnn::Compute::CpuRef); +} + +BOOST_AUTO_TEST_CASE(FuseBatchNormIntoDepthwiseConv2DFloat16Test) +{ + FuseBatchNormIntoConvTest(true, 0.1f,armnn::Compute::CpuRef); +} #endif BOOST_AUTO_TEST_SUITE_END() -- cgit v1.2.1