From d97db7e6bb9738590e3980c6e721669006e85af4 Mon Sep 17 00:00:00 2001 From: Francis Murtagh Date: Mon, 16 Jan 2023 13:11:29 +0000 Subject: Github #700: Fix order of optimizations so dequantization works with folding * Folding of pad into conv2d expected a Constant layer not Dequantisation * Fusing Dequantisation with Constant to a Constant ensures that. * Group Constant layer optimizations together where possible. * Add unit test. Signed-off-by: Francis Murtagh Change-Id: Id0393313bf097595f2f13738b7513e427116ea4a --- src/armnn/Network.cpp | 17 +-- ...tConstDequantisationLayersToConstLayersTest.cpp | 134 +++++++++++++++++---- 2 files changed, 120 insertions(+), 31 deletions(-) diff --git a/src/armnn/Network.cpp b/src/armnn/Network.cpp index e81b87b382..08d3280cfe 100644 --- a/src/armnn/Network.cpp +++ b/src/armnn/Network.cpp @@ -1,5 +1,5 @@ // -// Copyright © 2017,2022,2023 Arm Ltd and Contributors. All rights reserved. +// Copyright © 2017, 2022-2023 Arm Ltd and Contributors. All rights reserved. // SPDX-License-Identifier: MIT // @@ -1056,7 +1056,7 @@ OptimizationResult ApplyBackendOptimizations(OptimizedNetworkImpl* optNetObjPtr, auto backendObjPtr = backends.find(selectedBackend)->second.get(); ARMNN_ASSERT(backendObjPtr); - if(selectedBackend == armnn::Compute::GpuAcc || selectedBackend == armnn::Compute::CpuAcc) + if (selectedBackend == armnn::Compute::GpuAcc || selectedBackend == armnn::Compute::CpuAcc) { Optimizer::Pass(optGraph, MakeOptimizations(optimizations::PermuteDepthwiseConv2dWeights())); Optimizer::Pass(optGraph, MakeOptimizations(optimizations::FusePermuteIntoConstLayer())); @@ -1636,10 +1636,14 @@ IOptimizedNetworkPtr Optimize(const Graph& inGraph, optGraph.InferTensorInfos(); } - // Need to FusePermuteIntoConstantLayer before FoldPadIntoDepthwiseConvolution2d or - // FuseBatchNormIntoDepthwiseConvolution2D optimizations are called. - Optimizer::Pass(optGraph, MakeOptimizations(FusePermuteIntoConstLayer())); + // Group Constant Layer optimizations together where possible. + // This is important as: + // FusePermuteIntoConstantLayer must happen before FoldPadIntoDepthwiseConvolution2d and + // FuseBatchNormIntoDepthwiseConvolution2D. + // ConvertConstDequantisationLayersToConstLayers must happen before FoldPadIntoConvolution2d + Optimizer::Pass(optGraph, MakeOptimizations(FusePermuteIntoConstLayer(), + ConvertConstDequantisationLayersToConstLayers())); // Perform optimisation passes Optimizer::Pass(optGraph, MakeOptimizations(SquashEqualPermuteSiblings(), SquashEqualTransposeSiblings(), @@ -1659,8 +1663,7 @@ IOptimizedNetworkPtr Optimize(const Graph& inGraph, FuseBatchNormIntoConvolution2DFloat32(), FuseBatchNormIntoConvolution2DFloat16(), FuseBatchNormIntoDepthwiseConvolution2DFloat32(), - FuseBatchNormIntoDepthwiseConvolution2DFloat16(), - ConvertConstDequantisationLayersToConstLayers())); + FuseBatchNormIntoDepthwiseConvolution2DFloat16())); // If Fp32 to Fp16 optimization is set convert Fp32 network to Fp16 if (options.m_ReduceFp32ToFp16) diff --git a/src/armnn/test/optimizations/ConvertConstDequantisationLayersToConstLayersTest.cpp b/src/armnn/test/optimizations/ConvertConstDequantisationLayersToConstLayersTest.cpp index 926ac2d26d..f54ac9f9cb 100644 --- a/src/armnn/test/optimizations/ConvertConstDequantisationLayersToConstLayersTest.cpp +++ b/src/armnn/test/optimizations/ConvertConstDequantisationLayersToConstLayersTest.cpp @@ -1,5 +1,5 @@ // -// Copyright © 2022 Arm Ltd and Contributors. All rights reserved. +// Copyright © 2022-2023 Arm Ltd and Contributors. All rights reserved. // SPDX-License-Identifier: MIT // @@ -15,6 +15,39 @@ TEST_SUITE("Optimizer") using namespace armnn; using namespace armnn::optimizations; +// Helpers for testing +auto checkConstantFloat32 = [](const armnn::Layer *const layer) +{ + return IsLayerOfType(layer) && (layer->GetDataType() == DataType::Float32); +}; + +auto checkConstantFloat16 = [](const armnn::Layer *const layer) +{ + return IsLayerOfType(layer) && (layer->GetDataType() == DataType::Float16); +}; + +auto checkConstantQAsymmS8 = [](const armnn::Layer *const layer) +{ + return IsLayerOfType(layer) && (layer->GetDataType() == DataType::QAsymmS8); +}; + +auto checkPadFoldedIntoConv2d = [](const Layer* const layer) +{ + const auto conv2dLayer = static_cast(layer); + const auto conv2dLayerParams = conv2dLayer->GetParameters(); + + return IsLayerOfType(layer) && + (layer->GetNameStr() == "folded-pad-into-conv2d") && + (conv2dLayerParams.m_PadLeft == 2) && + (conv2dLayerParams.m_PadRight == 2) && + (conv2dLayerParams.m_PadTop == 2) && + (conv2dLayerParams.m_PadBottom == 2) && + (conv2dLayerParams.m_StrideX == 1) && + (conv2dLayerParams.m_StrideY == 1) && + (conv2dLayerParams.m_BiasEnabled == false) && + (conv2dLayerParams.m_DataLayout == DataLayout::NHWC); +}; + TEST_CASE("ConvertConstFloat16DequantizeToConstFloat32") { Graph graph; @@ -23,29 +56,21 @@ TEST_CASE("ConvertConstFloat16DequantizeToConstFloat32") const TensorInfo constTensorInfo(4, shape, DataType::Float16, 1.0, 0, true); const TensorInfo outputDequantizeInfo(4, shape, DataType::Float32, 1.0, 0, true); - ConstantLayer *constantLayer = graph.AddLayer("constant"); + auto constantLayer = graph.AddLayer("constant"); std::vector constantValues(constTensorInfo.GetNumElements(), 4.5f); ConstTensor constTensor(constTensorInfo, constantValues.data()); constantLayer->m_LayerOutput = std::make_shared(constTensor); constantLayer->GetOutputSlot().SetTensorInfo(constTensorInfo); - DequantizeLayer *dequantizeLayer = graph.AddLayer("dequantize"); + auto dequantizeLayer = graph.AddLayer("dequantize"); dequantizeLayer->GetOutputSlot().SetTensorInfo(outputDequantizeInfo); - OutputLayer *output = graph.AddLayer(0, "output"); + auto output = graph.AddLayer(0, "output"); // Connect up constant -> dequantize -> output constantLayer->GetOutputSlot().Connect(dequantizeLayer->GetInputSlot(0)); dequantizeLayer->GetOutputSlot().Connect(output->GetInputSlot(0)); - auto checkConstantFloat16 = [](const armnn::Layer *const layer) -> bool { - return IsLayerOfType(layer) && - (layer->GetDataType() == DataType::Float16); - }; - auto checkConstantFloat32 = [](const armnn::Layer *const layer) -> bool { - return IsLayerOfType(layer) && - (layer->GetDataType() == DataType::Float32); - }; CHECK(CheckSequence(graph.cbegin(), graph.cend(), checkConstantFloat16, @@ -59,6 +84,76 @@ TEST_CASE("ConvertConstFloat16DequantizeToConstFloat32") &IsLayerOfType)); } +TEST_CASE("ConvertConstFloat16DequantizeToConstFloat32PlusFusePadWithConv2d") +{ + Graph graph; + const unsigned int shape[] = {1, 2, 2, 3}; + + const TensorInfo constTensorInfo(4, shape, DataType::Float16, 1.0, 0, true); + const TensorInfo outputDequantizeInfo(4, shape, DataType::Float32, 1.0, 0, true); + + auto constantLayer = graph.AddLayer("constant"); + std::vector constantValues(constTensorInfo.GetNumElements(), 4.5f); + ConstTensor constTensor(constTensorInfo, constantValues.data()); + constantLayer->m_LayerOutput = std::make_shared(constTensor); + constantLayer->GetOutputSlot().SetTensorInfo(constTensorInfo); + + auto dequantizeLayer = graph.AddLayer("dequantize"); + dequantizeLayer->GetOutputSlot().SetTensorInfo(outputDequantizeInfo); + + auto output = graph.AddLayer(0, "output"); + + Convolution2dDescriptor convolution2dDescriptor; + convolution2dDescriptor.m_BiasEnabled = false; + convolution2dDescriptor.m_StrideX = 1; + convolution2dDescriptor.m_StrideY = 1; + convolution2dDescriptor.m_DataLayout = DataLayout::NHWC; + auto conv2d = graph.AddLayer(convolution2dDescriptor, "conv2d"); + + + auto inputLayer = graph.AddLayer(0, "input"); + + PadDescriptor padDescriptor({{0, 0}, + {2, 2}, + {2, 2}, + {0, 0}}); + + const unsigned int paddedShape[] = {1, 6, 6, 3}; + + TensorInfo paddedInfo(4, paddedShape, DataType::Float32); + + auto padLayer = graph.AddLayer(padDescriptor, "pad"); + padLayer->GetOutputSlot().SetTensorInfo(paddedInfo); + + // Connect up: + // input -> pad -> conv2d -> output + // constant -> dequantize -> + constantLayer->GetOutputSlot().Connect(dequantizeLayer->GetInputSlot(0)); + dequantizeLayer->GetOutputSlot().Connect(conv2d->GetInputSlot(1)); + inputLayer->GetOutputSlot().Connect(padLayer->GetInputSlot(0)); + padLayer->GetOutputSlot().Connect(conv2d->GetInputSlot(0)); + conv2d->GetOutputSlot().Connect(output->GetInputSlot(0)); + + CHECK(CheckSequence(graph.cbegin(), graph.cend(), + &IsLayerOfType, + checkConstantFloat16, + &IsLayerOfType, + &IsLayerOfType, + &IsLayerOfType, + &IsLayerOfType)); + + armnn::Optimizer::Pass(graph, MakeOptimizations(ConvertConstDequantisationLayersToConstLayers())); + armnn::Optimizer::Pass(graph, MakeOptimizations(FoldPadIntoConvolution2d())); + + // Ensure that the const and dequantize are now constant of type fp32 + // Ensure pad and conv2d are now just convolution + CHECK(CheckSequence(graph.cbegin(), graph.cend(), + &IsLayerOfType, + checkConstantFloat32, + checkPadFoldedIntoConv2d, + &IsLayerOfType)); +} + TEST_CASE("ConvertConstInt8DequantizeToConstFloat32") { Graph graph; @@ -67,30 +162,21 @@ TEST_CASE("ConvertConstInt8DequantizeToConstFloat32") const TensorInfo constTensorInfo(4, shape, DataType::QAsymmS8, 1.0, 0, true); const TensorInfo outputDequantizeInfo(4, shape, DataType::Float32, 1.0, 0, true); - ConstantLayer *constantLayer = graph.AddLayer("constant"); + auto constantLayer = graph.AddLayer("constant"); std::vector constantValues(constTensorInfo.GetNumElements(), 5); ConstTensor constTensor(constTensorInfo, constantValues.data()); constantLayer->m_LayerOutput = std::make_shared(constTensor); constantLayer->GetOutputSlot().SetTensorInfo(constTensorInfo); - DequantizeLayer *dequantizeLayer = graph.AddLayer("dequantize"); + auto dequantizeLayer = graph.AddLayer("dequantize"); dequantizeLayer->GetOutputSlot().SetTensorInfo(outputDequantizeInfo); - OutputLayer *output = graph.AddLayer(0, "output"); + auto output = graph.AddLayer(0, "output"); // Connect up constant -> dequantize -> output constantLayer->GetOutputSlot().Connect(dequantizeLayer->GetInputSlot(0)); dequantizeLayer->GetOutputSlot().Connect(output->GetInputSlot(0)); - auto checkConstantQAsymmS8 = [](const armnn::Layer *const layer) -> bool { - return IsLayerOfType(layer) && - (layer->GetDataType() == DataType::QAsymmS8); - }; - auto checkConstantFloat32 = [](const armnn::Layer *const layer) -> bool { - return IsLayerOfType(layer) && - (layer->GetDataType() == DataType::Float32); - }; - CHECK(CheckSequence(graph.cbegin(), graph.cend(), checkConstantQAsymmS8, &IsLayerOfType, -- cgit v1.2.1