From 630ce65543c08d8e7fca5be80f9a64122744d135 Mon Sep 17 00:00:00 2001 From: Cathal Corbett Date: Mon, 16 May 2022 15:20:56 +0100 Subject: IVGCVSW-6147 ConstTensorsAsInput: Optimizer - FusePermuteIntoConstLayer * No trailing permute layer after a constant layer * Unit test for optimization Signed-off-by: Cathal Corbett Change-Id: I0d098f5af41d2c55df7cef1ccfb848093320ddc1 --- CMakeLists.txt | 1 + src/armnn/Network.cpp | 8 +- src/armnn/optimizations/All.hpp | 1 + .../ConvertConstPermuteLayersToConstLayers.hpp | 127 +++++++++++++++++++++ .../ConvertConstPermuteLayersToConstLayersTest.cpp | 60 ++++++++++ src/armnnOnnxParser/OnnxParser.cpp | 23 ++-- 6 files changed, 211 insertions(+), 9 deletions(-) create mode 100644 src/armnn/optimizations/ConvertConstPermuteLayersToConstLayers.hpp create mode 100644 src/armnn/test/optimizations/ConvertConstPermuteLayersToConstLayersTest.cpp diff --git a/CMakeLists.txt b/CMakeLists.txt index 903f06c86c..52e60e00d4 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -529,6 +529,7 @@ if(BUILD_UNIT_TESTS) src/armnn/test/OptimizerTests.cpp src/armnn/test/optimizations/AddBroadcastReshapeLayerTests.cpp src/armnn/test/optimizations/ConvertConstDequantisationLayersToConstLayersTest.cpp + src/armnn/test/optimizations/ConvertConstPermuteLayersToConstLayersTest.cpp src/armnn/test/optimizations/ConvertConstantsBFloatTests.cpp src/armnn/test/optimizations/ConvertConstantsFloatToHalfTests.cpp src/armnn/test/optimizations/ConvertConstantsHalfToFloatTests.cpp diff --git a/src/armnn/Network.cpp b/src/armnn/Network.cpp index 9da28ceeea..fecc766836 100644 --- a/src/armnn/Network.cpp +++ b/src/armnn/Network.cpp @@ -1158,6 +1158,7 @@ OptimizationResult ApplyBackendOptimizations(OptimizedNetworkImpl* optNetObjPtr, if(selectedBackend == armnn::Compute::GpuAcc || selectedBackend == armnn::Compute::CpuAcc) { Optimizer::Pass(optGraph, MakeOptimizations(optimizations::PermuteDepthwiseConv2dWeights())); + Optimizer::Pass(optGraph, MakeOptimizations(optimizations::FusePermuteIntoConstLayer())); } // Select sub-graphs based on backend @@ -1719,6 +1720,10 @@ IOptimizedNetworkPtr Optimize(const Graph& inGraph, optGraph.InferTensorInfos(); } + // Need to FusePermuteIntoConstantLayer before FoldPadIntoDepthwiseConvolution2d or + // FuseBatchNormIntoDepthwiseConvolution2D optimizations are called. + Optimizer::Pass(optGraph, MakeOptimizations(FusePermuteIntoConstLayer())); + // Perform optimisation passes Optimizer::Pass(optGraph, MakeOptimizations(SquashEqualPermuteSiblings(), SquashEqualTransposeSiblings(), @@ -1739,8 +1744,7 @@ IOptimizedNetworkPtr Optimize(const Graph& inGraph, FuseBatchNormIntoConvolution2DFloat16(), FuseBatchNormIntoDepthwiseConvolution2DFloat32(), FuseBatchNormIntoDepthwiseConvolution2DFloat16(), - ConvertConstDequantisationLayersToConstLayers(), - RedirectMembersToConstantInputs())); + ConvertConstDequantisationLayersToConstLayers())); // If Fp32 to Fp16 optimization is set convert Fp32 network to Fp16 if (options.m_ReduceFp32ToFp16) diff --git a/src/armnn/optimizations/All.hpp b/src/armnn/optimizations/All.hpp index e4a1f33e08..900e763762 100644 --- a/src/armnn/optimizations/All.hpp +++ b/src/armnn/optimizations/All.hpp @@ -8,6 +8,7 @@ #include "AddDebug.hpp" #include "ConvertConstants.hpp" #include "ConvertConstDequantisationLayersToConstLayers.hpp" +#include "ConvertConstPermuteLayersToConstLayers.hpp" #include "ConvertFp32NetworkToBf16.hpp" #include "ConvertFp32NetworkToFp16.hpp" #include "FoldPadIntoLayer2d.hpp" diff --git a/src/armnn/optimizations/ConvertConstPermuteLayersToConstLayers.hpp b/src/armnn/optimizations/ConvertConstPermuteLayersToConstLayers.hpp new file mode 100644 index 0000000000..2cc3e8eaef --- /dev/null +++ b/src/armnn/optimizations/ConvertConstPermuteLayersToConstLayers.hpp @@ -0,0 +1,127 @@ +// +// Copyright © 2022 Arm Ltd and Contributors. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include "Optimization.hpp" +#include +#include + +namespace armnn +{ +namespace optimizations +{ + +class ConvertConstPermuteLayersToConstLayers +{ +public: + void Run(Graph& graph, InputSlot& connection) const + { + Layer& base = connection.GetConnectedOutputSlot()->GetOwningLayer(); + Layer& child = connection.GetOwningLayer(); + + ARMNN_ASSERT(base.GetType() == LayerType::Constant); + ARMNN_ASSERT(child.GetType() == LayerType::Permute); + + if (base.GetDataType() == child.GetDataType()) + { + switch (base.GetDataType()) + { + case DataType::Float16: + ReplaceConstPermuteLayer(graph, + PolymorphicDowncast(&base), + PolymorphicDowncast(&child)); + break; + case DataType::Float32: + ReplaceConstPermuteLayer(graph, + PolymorphicDowncast(&base), + PolymorphicDowncast(&child)); + break; + case DataType::QAsymmU8: + ReplaceConstPermuteLayer(graph, + PolymorphicDowncast(&base), + PolymorphicDowncast(&child)); + break; + case DataType::Signed32: + ReplaceConstPermuteLayer(graph, + PolymorphicDowncast(&base), + PolymorphicDowncast(&child)); + break; + case DataType::QSymmS16: + ReplaceConstPermuteLayer(graph, + PolymorphicDowncast(&base), + PolymorphicDowncast(&child)); + break; + case DataType::QSymmS8: + ReplaceConstPermuteLayer(graph, + PolymorphicDowncast(&base), + PolymorphicDowncast(&child)); + break; + case DataType::QAsymmS8: + ReplaceConstPermuteLayer(graph, + PolymorphicDowncast(&base), + PolymorphicDowncast(&child)); + break; + case DataType::BFloat16: + ReplaceConstPermuteLayer(graph, + PolymorphicDowncast(&base), + PolymorphicDowncast(&child)); + break; + case DataType::Signed64: + ReplaceConstPermuteLayer(graph, + PolymorphicDowncast(&base), + PolymorphicDowncast(&child)); + break; + case DataType::Boolean: + ReplaceConstPermuteLayer(graph, + PolymorphicDowncast(&base), + PolymorphicDowncast(&child)); + break; + } + } + } +protected: + ConvertConstPermuteLayersToConstLayers() = default; + ~ConvertConstPermuteLayersToConstLayers() = default; +private: + template> + static void ReplaceConstPermuteLayer(Graph& graph, + ConstantLayer* constantLayer, + PermuteLayer* permuteLayer) + { + IgnoreUnused(graph); + /** + * This optimisation is to find situations where a constant set of inputs is being provided to a Permute + * layer. In this case we don't want the overhead of Permuting the values on every inference, instead we + * want to Permute them once and store them in a Const layer to be used everytime as they will not change. + */ + TensorInfo outputPermuteInfo = permuteLayer->GetOutputSlot(0).GetTensorInfo(); + std::vector newValues(outputPermuteInfo.GetNumElements()); + armnnUtils::Permute(outputPermuteInfo.GetShape(), permuteLayer->GetPermutation(), + constantLayer->m_LayerOutput->Map(true), newValues.data(), + GetDataTypeSize(outputPermuteInfo.GetDataType())); + + TensorInfo newInfo = outputPermuteInfo; + newInfo.SetConstant(true); + ConstTensor newInput(newInfo, newValues); + constantLayer->m_LayerOutput.reset(new ScopedTensorHandle(newInput)); + + // Moves connections in permute output to the constant layer. + // Permute layer will be removed if left unconnected. + permuteLayer->GetOutputSlot().MoveAllConnections(constantLayer->GetOutputSlot()); + + // Updating the output tensor + constantLayer->GetOutputSlot(0).SetTensorInfo(newInfo); + ARMNN_ASSERT(constantLayer->GetOutputSlot(0).GetTensorInfo().IsConstant() == true); + } +}; + +using FusePermuteIntoConstLayer = OptimizeForConnection; + +} // namespace optimizations +} // namespace armnn \ No newline at end of file diff --git a/src/armnn/test/optimizations/ConvertConstPermuteLayersToConstLayersTest.cpp b/src/armnn/test/optimizations/ConvertConstPermuteLayersToConstLayersTest.cpp new file mode 100644 index 0000000000..1fcba0e581 --- /dev/null +++ b/src/armnn/test/optimizations/ConvertConstPermuteLayersToConstLayersTest.cpp @@ -0,0 +1,60 @@ +// +// Copyright © 2022 Arm Ltd and Contributors. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "LayersFwd.hpp" +#include +#include +#include +#include + +TEST_SUITE("Optimizer") +{ +using namespace armnn; +using namespace armnn::optimizations; + +TEST_CASE("ConvertConstPermuteToConst") +{ + Graph graph; + const unsigned int shape[] = {1, 2, 2, 3}; + + const TensorInfo constTensorInfo(4, shape, DataType::Float32, 1.0, 0, true); + + ConstantLayer* constant = graph.AddLayer("constant"); + std::vector constantValues(constTensorInfo.GetNumElements(), 4.5f); + ConstTensor constTensor(constTensorInfo, constantValues.data()); + constant->m_LayerOutput = std::make_shared(constTensor); + constant->GetOutputSlot().SetTensorInfo(constTensorInfo); + + PermuteDescriptor desc({ 0, 2, 3, 1 }); + PermuteLayer* permuteLayer = graph.AddLayer(desc, "permute"); + TensorInfo infoPermuted = armnnUtils::Permuted(constTensorInfo, { 0, 2, 3, 1 }); + permuteLayer->GetOutputSlot().SetTensorInfo(infoPermuted); + + OutputLayer* output = graph.AddLayer(0, "output"); + + // Connect up constant -> permute -> output + constant->GetOutputSlot().Connect(permuteLayer->GetInputSlot(0)); + permuteLayer->GetOutputSlot().Connect(output->GetInputSlot(0)); + + CHECK(CheckSequence(graph.cbegin(), graph.cend(), + &IsLayerOfType, + &IsLayerOfType, + &IsLayerOfType)); + + armnn::Optimizer::Pass(graph, MakeOptimizations(FusePermuteIntoConstLayer())); + + CHECK(CheckSequence(graph.cbegin(), graph.cend(), + &IsLayerOfType, + &IsLayerOfType)); + + TensorShape tensorShape = constant->GetOutputSlot(0).GetTensorInfo().GetShape(); + CHECK(tensorShape[0] == shape[0]); + CHECK(tensorShape[1] == shape[3]); + CHECK(tensorShape[2] == shape[1]); + CHECK(tensorShape[3] == shape[2]); + +} + +} diff --git a/src/armnnOnnxParser/OnnxParser.cpp b/src/armnnOnnxParser/OnnxParser.cpp index 4eaf63653b..60bd962db7 100644 --- a/src/armnnOnnxParser/OnnxParser.cpp +++ b/src/armnnOnnxParser/OnnxParser.cpp @@ -1043,15 +1043,24 @@ void OnnxParserImpl::AddConvLayerWithDepthwiseConv(const onnx::NodeProto& node, desc.m_BiasEnabled = convDesc.m_BiasEnabled; armnn::IConnectableLayer* layer = m_Network->AddDepthwiseConvolution2dLayer(desc, node.name().c_str()); - std::vector tensorIndexes= {node.input(0), node.input(1)}; - - // weights come in as [O,1,H,W] from ONNX and need to be converted to ArmNNs dephtwise weights layout [1,H,W,O] - armnn::PermutationVector perVec {3,0,1,2}; - auto weightTensor = CreateConstTensor(node.input(1), perVec); + std::string permuteStr = "permute_" + node.input(1); + std::vector tensorIndexes= {node.input(0), permuteStr}; + auto weightTensor = CreateConstTensor(node.input(1)); IConnectableLayer* weightsLayer = m_Network->AddConstantLayer(weightTensor.first); + + // weights come in as [O,1,H,W] from ONNX and need to be converted to ArmNNs depthwise weights layout [1,H,W,O] + armnn::PermutationVector perVec {3, 0, 1, 2}; + TensorInfo weightsPermuted = armnnUtils::Permuted(weightTensor.first.GetInfo(), perVec); + + // Inserts NewLayer so layers don't need to be re-sorted. + IConnectableLayer* permuteLayer = m_Network->AddPermuteLayer(PermuteDescriptor(perVec), + "permute_layer"); + permuteLayer->GetOutputSlot(0).SetTensorInfo(weightsPermuted); + permuteLayer->GetOutputSlot(0).Connect(layer->GetInputSlot(1u)); + weightsLayer->GetOutputSlot(0).SetTensorInfo(weightTensor.first.GetInfo()); - weightsLayer->GetOutputSlot(0).Connect(layer->GetInputSlot(1u)); + weightsLayer->GetOutputSlot(0).Connect(permuteLayer->GetInputSlot(0u)); if (node.input_size() == 3) { @@ -1076,7 +1085,7 @@ void OnnxParserImpl::AddConvLayerWithDepthwiseConv(const onnx::NodeProto& node, auto outputInfo = ComputeOutputInfo({ node.output(0) }, layer, { m_TensorsInfo[node.input(0)].m_info->GetShape(), - weightTensor.first.GetInfo().GetShape() }); + weightsPermuted.GetShape() }); layer->GetOutputSlot(0).SetTensorInfo(outputInfo[0]); -- cgit v1.2.1