From 89cc6b39376419565505c59c693a107a5edd47aa Mon Sep 17 00:00:00 2001 From: Francis Murtagh Date: Fri, 22 Jul 2022 10:23:41 +0100 Subject: IVGCVSW-6978: RedirectMembersToConstantInputs does not work with Fp32NetworkToBf16Converter * Fuse FP32ToBF16Layers with Constant Layer so Conv2d/FullyConnected can have their weights redirected. * If BF16 Unsupported in Conv2d || FullyConnected revert fused Constant Layer to FP32 Change-Id: If523c708a822659d64597d9ae39cca1c2f84b76f Signed-off-by: Francis Murtagh --- CMakeLists.txt | 1 + src/armnn/Network.cpp | 17 ++- src/armnn/NetworkUtils.cpp | 50 ++++++- src/armnn/NetworkUtils.hpp | 4 +- src/armnn/optimizations/All.hpp | 3 +- .../FuseConvertFp32ToBf16IntoConstLayers.hpp | 89 ++++++++++++ .../FuseConvertF32BF16IntoConstLayerTests.cpp | 151 +++++++++++++++++++++ 7 files changed, 307 insertions(+), 8 deletions(-) create mode 100644 src/armnn/optimizations/FuseConvertFp32ToBf16IntoConstLayers.hpp create mode 100644 src/armnn/test/optimizations/FuseConvertF32BF16IntoConstLayerTests.cpp diff --git a/CMakeLists.txt b/CMakeLists.txt index 41db8661d3..f0eb81cc6c 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -547,6 +547,7 @@ if(BUILD_UNIT_TESTS) src/armnn/test/optimizations/Fp32NetworkToFp16ConverterTests.cpp src/armnn/test/optimizations/FuseActivationTests.cpp src/armnn/test/optimizations/FuseBatchNormTests.cpp + src/armnn/test/optimizations/FuseConvertF32BF16IntoConstLayerTests.cpp src/armnn/test/optimizations/InsertDebugLayerTests.cpp src/armnn/test/optimizations/MovePermuteUpTests.cpp src/armnn/test/optimizations/MoveTransposeUpTests.cpp diff --git a/src/armnn/Network.cpp b/src/armnn/Network.cpp index 8fe4445dcf..5d443068ce 100644 --- a/src/armnn/Network.cpp +++ b/src/armnn/Network.cpp @@ -1,5 +1,5 @@ // -// Copyright © 2017 Arm Ltd and Contributors. All rights reserved. +// Copyright © 2022 Arm Ltd and Contributors. All rights reserved. // SPDX-License-Identifier: MIT // @@ -790,13 +790,18 @@ OptimizationResult AttemptBackendAssignment(BackendSettings& backendSettings, } else if (dataTypeIn == DataType::BFloat16 || dataTypeOut == DataType::BFloat16) { + const auto layerType = layer->GetType(); if (IWorkloadFactory::IsLayerSupported(*layer, DataType::Float32, reasonIfUnsupported) - && layer->GetType() != LayerType::ConvertFp32ToBf16 - && layer->GetType() != LayerType::ConvertBf16ToFp32) + && layerType != LayerType::ConvertFp32ToBf16 + && layerType != LayerType::ConvertBf16ToFp32) { - // Insert BF16 -> FP32 conversion layer before current layer + bool revertConstantWeightsConversion = RevertConstantWeightsToFP32(layer); + + // Insert BF16 -> FP32 conversion layer before current layer. + // Unless we have reverted Constant Weights Type above. std::vector convertBf16ToFp32Layers; - if (dataTypeIn == DataType::BFloat16) + if (dataTypeIn == DataType::BFloat16 && dataTypeOut != DataType::BFloat16 + && !revertConstantWeightsConversion) { convertBf16ToFp32Layers = InsertConvertBf16ToFp32LayersBefore(graph, *layer); @@ -1759,10 +1764,12 @@ IOptimizedNetworkPtr Optimize(const Graph& inGraph, // If Fp32 to Bf16 optimization is set convert Fp32 network to Bf16 // Convert input of Convolution2d and FullyConnected from Fp32 to Bf16 // Only Constant weight of Convolution2d and FullyConnected are converted from Fp32 to Bf16 + // Constant and Fp32ToBf16 layers will also be fused so conversion is no longer needed at inference time if (options.m_ReduceFp32ToBf16) { ARMNN_SCOPED_PROFILING_EVENT(Compute::Undefined, "Optimizer_ReduceFp32ToBf16"); Optimizer::Pass(optGraph, MakeOptimizations(Fp32NetworkToBf16Converter())); + Optimizer::Pass(optGraph, MakeOptimizations(FuseConversionLayersIntoConstLayers())); } // Initialize backend settings diff --git a/src/armnn/NetworkUtils.cpp b/src/armnn/NetworkUtils.cpp index 7597798fa4..5ff0e6c4e1 100644 --- a/src/armnn/NetworkUtils.cpp +++ b/src/armnn/NetworkUtils.cpp @@ -1,10 +1,12 @@ // -// Copyright © 2017 Arm Ltd. All rights reserved. +// Copyright © 2022 Arm Ltd and Contributors. All rights reserved. // SPDX-License-Identifier: MIT // #include "NetworkUtils.hpp" +#include +#include #include "SubgraphViewSelector.hpp" #include @@ -272,4 +274,50 @@ std::vector InsertDebugLayerAfter(Graph& graph, Layer& layer) return debugLayers; } +bool RevertConstantWeightsToFP32(Layer* layer) +{ + if (layer->GetType() == LayerType::Convolution2d || layer->GetType() == LayerType::FullyConnected) + { + // Revert Weights on Constant Layer to FP32 so they can be accessed by Conv2d or FullyConnected + // This prevents a conversion layer being added in during backend assignment which blocks + // the RedirectMembersToConstantInputs backward compatibility workaround/optimization. + auto constantLayerInfo = layer->GetInputSlot(1).GetConnection()->GetTensorInfo(); + + if (constantLayerInfo.IsConstant() && constantLayerInfo.GetDataType() == DataType::BFloat16) + { + std::vector newValues(constantLayerInfo.GetNumElements()); + + auto weightLayer = PolymorphicDowncast( + &layer->GetInputSlot(1).GetConnection()->GetOwningIConnectableLayer()); + armnnUtils::FloatingPointConverter::ConvertBFloat16ToFloat32( + weightLayer->m_LayerOutput->GetConstTensor(), + constantLayerInfo.GetNumElements(), + newValues.data()); + + TensorInfo newInfo(constantLayerInfo.GetShape(), DataType::Float32); + newInfo.SetConstant(true); + ConstTensor newInput(newInfo, newValues); + weightLayer->m_LayerOutput.reset(new ScopedTensorHandle(newInput)); + weightLayer->GetOutputSlot(0).SetTensorInfo(newInfo); + + // Connect Conv2d/FullyConnected to InputLayer directly leaving out + // the ConversionLayer to be cleaned up later + auto& conversionLayer = layer->GetInputSlot(0).GetConnection()->GetOwningIConnectableLayer(); + auto actualInputOutputSlot = conversionLayer.GetInputSlot(0).GetConnection(); + + auto& conversionLayerOutputSlot = + layer->GetInputSlot(0).GetConnection()->GetOwningIConnectableLayer().GetOutputSlot(0); + auto& conversionLayerInputSlot = + layer->GetInputSlot(0).GetConnection()->GetOwningIConnectableLayer().GetInputSlot(0); + actualInputOutputSlot->Disconnect(conversionLayerInputSlot); + conversionLayerOutputSlot.Disconnect(layer->GetInputSlot(0)); + + actualInputOutputSlot->Connect(layer->GetInputSlot(0)); + + return true; + } + } + return false; +} + } // namespace armnn diff --git a/src/armnn/NetworkUtils.hpp b/src/armnn/NetworkUtils.hpp index a922770285..77dd068cb3 100644 --- a/src/armnn/NetworkUtils.hpp +++ b/src/armnn/NetworkUtils.hpp @@ -1,5 +1,5 @@ // -// Copyright © 2017 Arm Ltd. All rights reserved. +// Copyright © 2022 Arm Ltd and Contributors. All rights reserved. // SPDX-License-Identifier: MIT // @@ -29,4 +29,6 @@ std::vector InsertConvertFp32ToFp16LayersAfter(Graph& g std::vector InsertDebugLayerAfter(Graph& graph, Layer& layer); +bool RevertConstantWeightsToFP32(Layer* layer); + } // namespace armnn diff --git a/src/armnn/optimizations/All.hpp b/src/armnn/optimizations/All.hpp index 900e763762..0421f31973 100644 --- a/src/armnn/optimizations/All.hpp +++ b/src/armnn/optimizations/All.hpp @@ -1,5 +1,5 @@ // -// Copyright © 2017 Arm Ltd. All rights reserved. +// Copyright © 2022 Arm Ltd and Contributors. All rights reserved. // SPDX-License-Identifier: MIT // #pragma once @@ -9,6 +9,7 @@ #include "ConvertConstants.hpp" #include "ConvertConstDequantisationLayersToConstLayers.hpp" #include "ConvertConstPermuteLayersToConstLayers.hpp" +#include "FuseConvertFp32ToBf16IntoConstLayers.hpp" #include "ConvertFp32NetworkToBf16.hpp" #include "ConvertFp32NetworkToFp16.hpp" #include "FoldPadIntoLayer2d.hpp" diff --git a/src/armnn/optimizations/FuseConvertFp32ToBf16IntoConstLayers.hpp b/src/armnn/optimizations/FuseConvertFp32ToBf16IntoConstLayers.hpp new file mode 100644 index 0000000000..d112010539 --- /dev/null +++ b/src/armnn/optimizations/FuseConvertFp32ToBf16IntoConstLayers.hpp @@ -0,0 +1,89 @@ +// +// Copyright © 2022 Arm Ltd and Contributors. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include "Optimization.hpp" +#include +#include + +namespace armnn +{ +namespace optimizations +{ + +class FuseConvertFp32ToBf16IntoConstLayers +{ +public: + void Run(Graph& graph, InputSlot& connection) const + { + Layer& base = connection.GetConnectedOutputSlot()->GetOwningLayer(); + Layer& child = connection.GetOwningLayer(); + + ARMNN_ASSERT(base.GetType() == LayerType::Constant); + ARMNN_ASSERT(child.GetType() == LayerType::ConvertFp32ToBf16); + + auto dataType = base.GetDataType(); + switch (dataType) + { + case DataType::Float32: + ReplaceConvertFp32ToBf16Layer( + graph, + PolymorphicDowncast(&base), + PolymorphicDowncast(&child)); + break; + default: + throw InvalidArgumentException(GetDataTypeName(dataType) + + std::string(" Constant Layer cannot be fused into ") + + GetDataTypeName(child.GetDataType()) + + std::string(" conversion layer.")); + } + } +protected: + FuseConvertFp32ToBf16IntoConstLayers() = default; + ~FuseConvertFp32ToBf16IntoConstLayers() = default; +private: + template> + static void ReplaceConvertFp32ToBf16Layer(Graph& graph, + ConstantLayer* constantLayer, + ConvertFp32ToBf16Layer* convertFp32ToBf16layer) + { + IgnoreUnused(graph); + /** + * This optimisation is to find situations where a constant set of inputs is being provided to a + * ConvertFp32ToBf16 layer. In this case we don't want the overhead of Converting the values on + * every inference, instead we want to Convert them once and store them in a Const layer to be + * used everytime as they will not change. + */ + TensorInfo outputConvertFp32ToBf16Info = convertFp32ToBf16layer->GetOutputSlot(0).GetTensorInfo(); + std::vector newValues(outputConvertFp32ToBf16Info.GetNumElements()); + + armnnUtils::FloatingPointConverter::ConvertFloat32ToBFloat16( + constantLayer->m_LayerOutput->GetConstTensor(), + outputConvertFp32ToBf16Info.GetNumElements(), + newValues.data()); + TensorInfo newInfo = outputConvertFp32ToBf16Info; + newInfo.SetConstant(true); + ConstTensor newInput(newInfo, newValues); + + constantLayer->m_LayerOutput.reset(new ScopedTensorHandle(newInput)); + + // Moves connections in convertFp32ToBf16layer output slot to the constant layer. + // ConvertFp32ToBf16layer layer will be removed if left unconnected. + convertFp32ToBf16layer->GetOutputSlot().MoveAllConnections(constantLayer->GetOutputSlot()); + + // Updating the output tensor + constantLayer->GetOutputSlot(0).SetTensorInfo(newInfo); + ARMNN_ASSERT(constantLayer->GetOutputSlot(0).GetTensorInfo().IsConstant() == true); + } +}; + +using FuseConversionLayersIntoConstLayers = OptimizeForConnection; + +} // namespace optimizations +} // namespace armnn \ No newline at end of file diff --git a/src/armnn/test/optimizations/FuseConvertF32BF16IntoConstLayerTests.cpp b/src/armnn/test/optimizations/FuseConvertF32BF16IntoConstLayerTests.cpp new file mode 100644 index 0000000000..93d5948d61 --- /dev/null +++ b/src/armnn/test/optimizations/FuseConvertF32BF16IntoConstLayerTests.cpp @@ -0,0 +1,151 @@ +// +// Copyright © 2022 Arm Ltd and Contributors. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include +#include +#include +#include +#include + +#include + +#include + +TEST_SUITE("Optimizer") +{ +using namespace armnn; +using namespace armnn::optimizations; + +TEST_CASE("FuseConvertFp32Fp16intoConst") +{ + Graph graph; + const unsigned int shape[] = {1, 2, 2, 3}; + + const TensorInfo constTensorInfo(4, shape, DataType::Float32, 1.0, 0, true); + const TensorInfo outputConvertInfo(4, shape, DataType::BFloat16, 1.0, 0, true); + + ConstantLayer* constantLayer = graph.AddLayer("constant"); + std::vector constantValues(constTensorInfo.GetNumElements(), 3.1416f); + ConstTensor constTensor(constTensorInfo, constantValues.data()); + constantLayer->m_LayerOutput = std::make_shared(constTensor); + constantLayer->GetOutputSlot().SetTensorInfo(constTensorInfo); + + ConvertFp32ToBf16Layer* convertLayer = graph.AddLayer("convert"); + convertLayer->GetOutputSlot().SetTensorInfo(outputConvertInfo); + + OutputLayer* output = graph.AddLayer(0, "output"); + + // Connect up constant -> convert -> output + constantLayer->GetOutputSlot().Connect(convertLayer->GetInputSlot(0)); + convertLayer->GetOutputSlot().Connect(output->GetInputSlot(0)); + + auto checkConstantFloat32 = [](const armnn::Layer *const layer) -> bool { + return IsLayerOfType(layer) && + (layer->GetDataType() == DataType::Float32); + }; + auto checkConstantBFloat16 = [](const armnn::Layer *const layer) -> bool { + return IsLayerOfType(layer) && + (layer->GetDataType() == DataType::BFloat16); + }; + + CHECK(CheckSequence(graph.cbegin(), graph.cend(), + checkConstantFloat32, + &IsLayerOfType, + &IsLayerOfType)); + + armnn::Optimizer::Pass(graph, MakeOptimizations(FuseConversionLayersIntoConstLayers())); + + CHECK(CheckSequence(graph.cbegin(), graph.cend(), + checkConstantBFloat16, + &IsLayerOfType)); +} + +TEST_CASE("RevertConstantWeightsToFP32") +{ + Graph graph; + const unsigned int shape[] = {1, 2, 2, 3}; + + const TensorInfo constTensorInfo(4, shape, DataType::Float32, 1.0, 0, true); + const TensorInfo outputConvertInfo(4, shape, DataType::BFloat16, 1.0, 0, true); + + TensorInfo inputInfo(4, shape, DataType::Float32); + auto* input = graph.AddLayer(0, "input0"); + input->GetOutputSlot().SetTensorInfo(inputInfo); + + auto* constantLayer = graph.AddLayer("constant"); + std::vector constantValues(constTensorInfo.GetNumElements(), 3.1416f); + ConstTensor constTensor(constTensorInfo, constantValues.data()); + constantLayer->m_LayerOutput = std::make_shared(constTensor); + constantLayer->GetOutputSlot().SetTensorInfo(constTensorInfo); + + ConvertFp32ToBf16Layer* convertLayerInputs = graph.AddLayer("convert"); + convertLayerInputs->GetOutputSlot().SetTensorInfo(outputConvertInfo); + ConvertFp32ToBf16Layer* convertLayerWeights = graph.AddLayer("convert2"); + convertLayerWeights->GetOutputSlot().SetTensorInfo(outputConvertInfo); + ConvertFp32ToBf16Layer* convertLayerBiases = graph.AddLayer("convert3"); + convertLayerBiases->GetOutputSlot().SetTensorInfo(outputConvertInfo); + + auto* biases = graph.AddLayer("Biases"); + biases->m_LayerOutput = std::make_unique(constTensor); + biases->GetOutputSlot().SetTensorInfo(constTensorInfo); + + armnn::Convolution2dDescriptor descriptor; + descriptor.m_BiasEnabled = true; + auto* conv = graph.AddLayer(descriptor, "conv2d"); + const armnn::TensorInfo infoFP32({ 2, 3, 8, 1 }, armnn::DataType::Float32); + conv->GetOutputSlot().SetTensorInfo(infoFP32); + + auto* output = graph.AddLayer(0, "output"); + + // Connect up Input -> Convert -> + // Constant -> Convert -> Conv2d -> Output + // Constant -> Convert -> + input->GetOutputSlot().Connect(convertLayerInputs->GetInputSlot(0)); + constantLayer->GetOutputSlot().Connect(convertLayerWeights->GetInputSlot(0)); + biases->GetOutputSlot().Connect(convertLayerBiases->GetInputSlot(0)); + + convertLayerInputs->GetOutputSlot().Connect(conv->GetInputSlot(0)); + convertLayerWeights->GetOutputSlot().Connect(conv->GetInputSlot(1)); + convertLayerBiases->GetOutputSlot().Connect(conv->GetInputSlot(2)); + + conv->GetOutputSlot().Connect(output->GetInputSlot(0)); + + auto checkConstantFloat32 = [](const armnn::Layer *const layer) -> bool { + return IsLayerOfType(layer) && + (layer->GetDataType() == DataType::Float32); + }; + auto checkConstantBFloat16 = [](const armnn::Layer *const layer) -> bool { + return IsLayerOfType(layer) && + (layer->GetDataType() == DataType::BFloat16); + }; + + CHECK(CheckSequence(graph.cbegin(), graph.cend(), + &IsLayerOfType, + checkConstantFloat32, + checkConstantFloat32, + &IsLayerOfType, + &IsLayerOfType, + &IsLayerOfType, + &IsLayerOfType, + &IsLayerOfType)); + + armnn::Optimizer::Pass(graph, MakeOptimizations(FuseConversionLayersIntoConstLayers())); + + bool revert = RevertConstantWeightsToFP32(conv); + + // Erase unconnected layer as occurs during Topological Sort. + graph.EraseLayer(convertLayerInputs); + + CHECK(revert); + CHECK(constantLayer->GetDataType() == DataType::Float32); + + CHECK(CheckSequence(graph.cbegin(), graph.cend(), + &IsLayerOfType, + checkConstantBFloat16, + checkConstantFloat32, + &IsLayerOfType, + &IsLayerOfType)); +} +} -- cgit v1.2.1