diff options
author | Francis Murtagh <francis.murtagh@arm.com> | 2022-07-22 10:23:41 +0100 |
---|---|---|
committer | Nikhil Raj <nikhil.raj@arm.com> | 2022-07-27 15:58:02 +0100 |
commit | 4073e147982df3022ea97cb85179c8c7a695a17a (patch) | |
tree | 657123d837744e982cc87c4ef024a5fbb60a0c8a /src/armnn/NetworkUtils.cpp | |
parent | 1e276f38e67af7505a25010eee579034ee83d12b (diff) | |
download | armnn-4073e147982df3022ea97cb85179c8c7a695a17a.tar.gz |
IVGCVSW-6978: RedirectMembersToConstantInputs does not work with Fp32NetworkToBf16Converter
* Fuse FP32ToBF16Layers with Constant Layer so Conv2d/FullyConnected
can have their weights redirected.
* If BF16 Unsupported in Conv2d || FullyConnected revert fused
Constant Layer to FP32
Change-Id: If523c708a822659d64597d9ae39cca1c2f84b76f
Signed-off-by: Francis Murtagh <francis.murtagh@arm.com>
Diffstat (limited to 'src/armnn/NetworkUtils.cpp')
-rw-r--r-- | src/armnn/NetworkUtils.cpp | 50 |
1 files changed, 49 insertions, 1 deletions
diff --git a/src/armnn/NetworkUtils.cpp b/src/armnn/NetworkUtils.cpp index 7597798fa4..5ff0e6c4e1 100644 --- a/src/armnn/NetworkUtils.cpp +++ b/src/armnn/NetworkUtils.cpp @@ -1,10 +1,12 @@ // -// Copyright © 2017 Arm Ltd. All rights reserved. +// Copyright © 2022 Arm Ltd and Contributors. All rights reserved. // SPDX-License-Identifier: MIT // #include "NetworkUtils.hpp" +#include <armnnUtils/FloatingPointConverter.hpp> +#include <BFloat16.hpp> #include "SubgraphViewSelector.hpp" #include <armnn/Exceptions.hpp> @@ -272,4 +274,50 @@ std::vector<DebugLayer*> InsertDebugLayerAfter(Graph& graph, Layer& layer) return debugLayers; } +bool RevertConstantWeightsToFP32(Layer* layer) +{ + if (layer->GetType() == LayerType::Convolution2d || layer->GetType() == LayerType::FullyConnected) + { + // Revert Weights on Constant Layer to FP32 so they can be accessed by Conv2d or FullyConnected + // This prevents a conversion layer being added in during backend assignment which blocks + // the RedirectMembersToConstantInputs backward compatibility workaround/optimization. + auto constantLayerInfo = layer->GetInputSlot(1).GetConnection()->GetTensorInfo(); + + if (constantLayerInfo.IsConstant() && constantLayerInfo.GetDataType() == DataType::BFloat16) + { + std::vector<float> newValues(constantLayerInfo.GetNumElements()); + + auto weightLayer = PolymorphicDowncast<ConstantLayer*>( + &layer->GetInputSlot(1).GetConnection()->GetOwningIConnectableLayer()); + armnnUtils::FloatingPointConverter::ConvertBFloat16ToFloat32( + weightLayer->m_LayerOutput->GetConstTensor<BFloat16>(), + constantLayerInfo.GetNumElements(), + newValues.data()); + + TensorInfo newInfo(constantLayerInfo.GetShape(), DataType::Float32); + newInfo.SetConstant(true); + ConstTensor newInput(newInfo, newValues); + weightLayer->m_LayerOutput.reset(new ScopedTensorHandle(newInput)); + weightLayer->GetOutputSlot(0).SetTensorInfo(newInfo); + + // Connect Conv2d/FullyConnected to InputLayer directly leaving out + // the ConversionLayer to be cleaned up later + auto& conversionLayer = layer->GetInputSlot(0).GetConnection()->GetOwningIConnectableLayer(); + auto actualInputOutputSlot = conversionLayer.GetInputSlot(0).GetConnection(); + + auto& conversionLayerOutputSlot = + layer->GetInputSlot(0).GetConnection()->GetOwningIConnectableLayer().GetOutputSlot(0); + auto& conversionLayerInputSlot = + layer->GetInputSlot(0).GetConnection()->GetOwningIConnectableLayer().GetInputSlot(0); + actualInputOutputSlot->Disconnect(conversionLayerInputSlot); + conversionLayerOutputSlot.Disconnect(layer->GetInputSlot(0)); + + actualInputOutputSlot->Connect(layer->GetInputSlot(0)); + + return true; + } + } + return false; +} + } // namespace armnn |