aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorFrancis Murtagh <francis.murtagh@arm.com>2023-01-16 13:11:29 +0000
committerFrancis Murtagh <francis.murtagh@arm.com>2023-01-18 13:45:40 +0000
commitd97db7e6bb9738590e3980c6e721669006e85af4 (patch)
treeb07cc306d3f73ab040c50583397544b842188ad9
parente27983ccfc9f73e2ec69863dcc5d9812fba7f5ef (diff)
downloadarmnn-d97db7e6bb9738590e3980c6e721669006e85af4.tar.gz
Github #700: Fix order of optimizations so dequantization works with folding
* Folding of pad into conv2d expected a Constant layer not Dequantisation * Fusing Dequantisation with Constant to a Constant ensures that. * Group Constant layer optimizations together where possible. * Add unit test. Signed-off-by: Francis Murtagh <francis.murtagh@arm.com> Change-Id: Id0393313bf097595f2f13738b7513e427116ea4a
-rw-r--r--src/armnn/Network.cpp17
-rw-r--r--src/armnn/test/optimizations/ConvertConstDequantisationLayersToConstLayersTest.cpp134
2 files changed, 120 insertions, 31 deletions
diff --git a/src/armnn/Network.cpp b/src/armnn/Network.cpp
index e81b87b382..08d3280cfe 100644
--- a/src/armnn/Network.cpp
+++ b/src/armnn/Network.cpp
@@ -1,5 +1,5 @@
//
-// Copyright © 2017,2022,2023 Arm Ltd and Contributors. All rights reserved.
+// Copyright © 2017, 2022-2023 Arm Ltd and Contributors. All rights reserved.
// SPDX-License-Identifier: MIT
//
@@ -1056,7 +1056,7 @@ OptimizationResult ApplyBackendOptimizations(OptimizedNetworkImpl* optNetObjPtr,
auto backendObjPtr = backends.find(selectedBackend)->second.get();
ARMNN_ASSERT(backendObjPtr);
- if(selectedBackend == armnn::Compute::GpuAcc || selectedBackend == armnn::Compute::CpuAcc)
+ if (selectedBackend == armnn::Compute::GpuAcc || selectedBackend == armnn::Compute::CpuAcc)
{
Optimizer::Pass(optGraph, MakeOptimizations(optimizations::PermuteDepthwiseConv2dWeights()));
Optimizer::Pass(optGraph, MakeOptimizations(optimizations::FusePermuteIntoConstLayer()));
@@ -1636,10 +1636,14 @@ IOptimizedNetworkPtr Optimize(const Graph& inGraph,
optGraph.InferTensorInfos();
}
- // Need to FusePermuteIntoConstantLayer before FoldPadIntoDepthwiseConvolution2d or
- // FuseBatchNormIntoDepthwiseConvolution2D optimizations are called.
- Optimizer::Pass(optGraph, MakeOptimizations(FusePermuteIntoConstLayer()));
+ // Group Constant Layer optimizations together where possible.
+ // This is important as:
+ // FusePermuteIntoConstantLayer must happen before FoldPadIntoDepthwiseConvolution2d and
+ // FuseBatchNormIntoDepthwiseConvolution2D.
+ // ConvertConstDequantisationLayersToConstLayers must happen before FoldPadIntoConvolution2d
+ Optimizer::Pass(optGraph, MakeOptimizations(FusePermuteIntoConstLayer(),
+ ConvertConstDequantisationLayersToConstLayers()));
// Perform optimisation passes
Optimizer::Pass(optGraph, MakeOptimizations(SquashEqualPermuteSiblings(),
SquashEqualTransposeSiblings(),
@@ -1659,8 +1663,7 @@ IOptimizedNetworkPtr Optimize(const Graph& inGraph,
FuseBatchNormIntoConvolution2DFloat32(),
FuseBatchNormIntoConvolution2DFloat16(),
FuseBatchNormIntoDepthwiseConvolution2DFloat32(),
- FuseBatchNormIntoDepthwiseConvolution2DFloat16(),
- ConvertConstDequantisationLayersToConstLayers()));
+ FuseBatchNormIntoDepthwiseConvolution2DFloat16()));
// If Fp32 to Fp16 optimization is set convert Fp32 network to Fp16
if (options.m_ReduceFp32ToFp16)
diff --git a/src/armnn/test/optimizations/ConvertConstDequantisationLayersToConstLayersTest.cpp b/src/armnn/test/optimizations/ConvertConstDequantisationLayersToConstLayersTest.cpp
index 926ac2d26d..f54ac9f9cb 100644
--- a/src/armnn/test/optimizations/ConvertConstDequantisationLayersToConstLayersTest.cpp
+++ b/src/armnn/test/optimizations/ConvertConstDequantisationLayersToConstLayersTest.cpp
@@ -1,5 +1,5 @@
//
-// Copyright © 2022 Arm Ltd and Contributors. All rights reserved.
+// Copyright © 2022-2023 Arm Ltd and Contributors. All rights reserved.
// SPDX-License-Identifier: MIT
//
@@ -15,6 +15,39 @@ TEST_SUITE("Optimizer")
using namespace armnn;
using namespace armnn::optimizations;
+// Helpers for testing
+auto checkConstantFloat32 = [](const armnn::Layer *const layer)
+{
+ return IsLayerOfType<ConstantLayer>(layer) && (layer->GetDataType() == DataType::Float32);
+};
+
+auto checkConstantFloat16 = [](const armnn::Layer *const layer)
+{
+ return IsLayerOfType<ConstantLayer>(layer) && (layer->GetDataType() == DataType::Float16);
+};
+
+auto checkConstantQAsymmS8 = [](const armnn::Layer *const layer)
+{
+ return IsLayerOfType<ConstantLayer>(layer) && (layer->GetDataType() == DataType::QAsymmS8);
+};
+
+auto checkPadFoldedIntoConv2d = [](const Layer* const layer)
+{
+ const auto conv2dLayer = static_cast<const Convolution2dLayer*>(layer);
+ const auto conv2dLayerParams = conv2dLayer->GetParameters();
+
+ return IsLayerOfType<Convolution2dLayer>(layer) &&
+ (layer->GetNameStr() == "folded-pad-into-conv2d") &&
+ (conv2dLayerParams.m_PadLeft == 2) &&
+ (conv2dLayerParams.m_PadRight == 2) &&
+ (conv2dLayerParams.m_PadTop == 2) &&
+ (conv2dLayerParams.m_PadBottom == 2) &&
+ (conv2dLayerParams.m_StrideX == 1) &&
+ (conv2dLayerParams.m_StrideY == 1) &&
+ (conv2dLayerParams.m_BiasEnabled == false) &&
+ (conv2dLayerParams.m_DataLayout == DataLayout::NHWC);
+};
+
TEST_CASE("ConvertConstFloat16DequantizeToConstFloat32")
{
Graph graph;
@@ -23,29 +56,21 @@ TEST_CASE("ConvertConstFloat16DequantizeToConstFloat32")
const TensorInfo constTensorInfo(4, shape, DataType::Float16, 1.0, 0, true);
const TensorInfo outputDequantizeInfo(4, shape, DataType::Float32, 1.0, 0, true);
- ConstantLayer *constantLayer = graph.AddLayer<ConstantLayer>("constant");
+ auto constantLayer = graph.AddLayer<ConstantLayer>("constant");
std::vector<float> constantValues(constTensorInfo.GetNumElements(), 4.5f);
ConstTensor constTensor(constTensorInfo, constantValues.data());
constantLayer->m_LayerOutput = std::make_shared<ScopedTensorHandle>(constTensor);
constantLayer->GetOutputSlot().SetTensorInfo(constTensorInfo);
- DequantizeLayer *dequantizeLayer = graph.AddLayer<DequantizeLayer>("dequantize");
+ auto dequantizeLayer = graph.AddLayer<DequantizeLayer>("dequantize");
dequantizeLayer->GetOutputSlot().SetTensorInfo(outputDequantizeInfo);
- OutputLayer *output = graph.AddLayer<OutputLayer>(0, "output");
+ auto output = graph.AddLayer<OutputLayer>(0, "output");
// Connect up constant -> dequantize -> output
constantLayer->GetOutputSlot().Connect(dequantizeLayer->GetInputSlot(0));
dequantizeLayer->GetOutputSlot().Connect(output->GetInputSlot(0));
- auto checkConstantFloat16 = [](const armnn::Layer *const layer) -> bool {
- return IsLayerOfType<ConstantLayer>(layer) &&
- (layer->GetDataType() == DataType::Float16);
- };
- auto checkConstantFloat32 = [](const armnn::Layer *const layer) -> bool {
- return IsLayerOfType<ConstantLayer>(layer) &&
- (layer->GetDataType() == DataType::Float32);
- };
CHECK(CheckSequence(graph.cbegin(), graph.cend(),
checkConstantFloat16,
@@ -59,6 +84,76 @@ TEST_CASE("ConvertConstFloat16DequantizeToConstFloat32")
&IsLayerOfType<OutputLayer>));
}
+TEST_CASE("ConvertConstFloat16DequantizeToConstFloat32PlusFusePadWithConv2d")
+{
+ Graph graph;
+ const unsigned int shape[] = {1, 2, 2, 3};
+
+ const TensorInfo constTensorInfo(4, shape, DataType::Float16, 1.0, 0, true);
+ const TensorInfo outputDequantizeInfo(4, shape, DataType::Float32, 1.0, 0, true);
+
+ auto constantLayer = graph.AddLayer<ConstantLayer>("constant");
+ std::vector<float> constantValues(constTensorInfo.GetNumElements(), 4.5f);
+ ConstTensor constTensor(constTensorInfo, constantValues.data());
+ constantLayer->m_LayerOutput = std::make_shared<ScopedTensorHandle>(constTensor);
+ constantLayer->GetOutputSlot().SetTensorInfo(constTensorInfo);
+
+ auto dequantizeLayer = graph.AddLayer<DequantizeLayer>("dequantize");
+ dequantizeLayer->GetOutputSlot().SetTensorInfo(outputDequantizeInfo);
+
+ auto output = graph.AddLayer<OutputLayer>(0, "output");
+
+ Convolution2dDescriptor convolution2dDescriptor;
+ convolution2dDescriptor.m_BiasEnabled = false;
+ convolution2dDescriptor.m_StrideX = 1;
+ convolution2dDescriptor.m_StrideY = 1;
+ convolution2dDescriptor.m_DataLayout = DataLayout::NHWC;
+ auto conv2d = graph.AddLayer<Convolution2dLayer>(convolution2dDescriptor, "conv2d");
+
+
+ auto inputLayer = graph.AddLayer<InputLayer>(0, "input");
+
+ PadDescriptor padDescriptor({{0, 0},
+ {2, 2},
+ {2, 2},
+ {0, 0}});
+
+ const unsigned int paddedShape[] = {1, 6, 6, 3};
+
+ TensorInfo paddedInfo(4, paddedShape, DataType::Float32);
+
+ auto padLayer = graph.AddLayer<PadLayer>(padDescriptor, "pad");
+ padLayer->GetOutputSlot().SetTensorInfo(paddedInfo);
+
+ // Connect up:
+ // input -> pad -> conv2d -> output
+ // constant -> dequantize ->
+ constantLayer->GetOutputSlot().Connect(dequantizeLayer->GetInputSlot(0));
+ dequantizeLayer->GetOutputSlot().Connect(conv2d->GetInputSlot(1));
+ inputLayer->GetOutputSlot().Connect(padLayer->GetInputSlot(0));
+ padLayer->GetOutputSlot().Connect(conv2d->GetInputSlot(0));
+ conv2d->GetOutputSlot().Connect(output->GetInputSlot(0));
+
+ CHECK(CheckSequence(graph.cbegin(), graph.cend(),
+ &IsLayerOfType<InputLayer>,
+ checkConstantFloat16,
+ &IsLayerOfType<DequantizeLayer>,
+ &IsLayerOfType<Convolution2dLayer>,
+ &IsLayerOfType<PadLayer>,
+ &IsLayerOfType<OutputLayer>));
+
+ armnn::Optimizer::Pass(graph, MakeOptimizations(ConvertConstDequantisationLayersToConstLayers()));
+ armnn::Optimizer::Pass(graph, MakeOptimizations(FoldPadIntoConvolution2d()));
+
+ // Ensure that the const and dequantize are now constant of type fp32
+ // Ensure pad and conv2d are now just convolution
+ CHECK(CheckSequence(graph.cbegin(), graph.cend(),
+ &IsLayerOfType<InputLayer>,
+ checkConstantFloat32,
+ checkPadFoldedIntoConv2d,
+ &IsLayerOfType<OutputLayer>));
+}
+
TEST_CASE("ConvertConstInt8DequantizeToConstFloat32")
{
Graph graph;
@@ -67,30 +162,21 @@ TEST_CASE("ConvertConstInt8DequantizeToConstFloat32")
const TensorInfo constTensorInfo(4, shape, DataType::QAsymmS8, 1.0, 0, true);
const TensorInfo outputDequantizeInfo(4, shape, DataType::Float32, 1.0, 0, true);
- ConstantLayer *constantLayer = graph.AddLayer<ConstantLayer>("constant");
+ auto constantLayer = graph.AddLayer<ConstantLayer>("constant");
std::vector<int8_t> constantValues(constTensorInfo.GetNumElements(), 5);
ConstTensor constTensor(constTensorInfo, constantValues.data());
constantLayer->m_LayerOutput = std::make_shared<ScopedTensorHandle>(constTensor);
constantLayer->GetOutputSlot().SetTensorInfo(constTensorInfo);
- DequantizeLayer *dequantizeLayer = graph.AddLayer<DequantizeLayer>("dequantize");
+ auto dequantizeLayer = graph.AddLayer<DequantizeLayer>("dequantize");
dequantizeLayer->GetOutputSlot().SetTensorInfo(outputDequantizeInfo);
- OutputLayer *output = graph.AddLayer<OutputLayer>(0, "output");
+ auto output = graph.AddLayer<OutputLayer>(0, "output");
// Connect up constant -> dequantize -> output
constantLayer->GetOutputSlot().Connect(dequantizeLayer->GetInputSlot(0));
dequantizeLayer->GetOutputSlot().Connect(output->GetInputSlot(0));
- auto checkConstantQAsymmS8 = [](const armnn::Layer *const layer) -> bool {
- return IsLayerOfType<ConstantLayer>(layer) &&
- (layer->GetDataType() == DataType::QAsymmS8);
- };
- auto checkConstantFloat32 = [](const armnn::Layer *const layer) -> bool {
- return IsLayerOfType<ConstantLayer>(layer) &&
- (layer->GetDataType() == DataType::Float32);
- };
-
CHECK(CheckSequence(graph.cbegin(), graph.cend(),
checkConstantQAsymmS8,
&IsLayerOfType<DequantizeLayer>,