aboutsummaryrefslogtreecommitdiff
path: root/src/armnn/NetworkUtils.cpp
diff options
context:
space:
mode:
authorFrancis Murtagh <francis.murtagh@arm.com>2022-07-22 10:23:41 +0100
committerNikhil Raj <nikhil.raj@arm.com>2022-07-27 15:58:02 +0100
commit4073e147982df3022ea97cb85179c8c7a695a17a (patch)
tree657123d837744e982cc87c4ef024a5fbb60a0c8a /src/armnn/NetworkUtils.cpp
parent1e276f38e67af7505a25010eee579034ee83d12b (diff)
downloadarmnn-4073e147982df3022ea97cb85179c8c7a695a17a.tar.gz
IVGCVSW-6978: RedirectMembersToConstantInputs does not work with Fp32NetworkToBf16Converter
* Fuse FP32ToBF16Layers with Constant Layer so Conv2d/FullyConnected can have their weights redirected. * If BF16 Unsupported in Conv2d || FullyConnected revert fused Constant Layer to FP32 Change-Id: If523c708a822659d64597d9ae39cca1c2f84b76f Signed-off-by: Francis Murtagh <francis.murtagh@arm.com>
Diffstat (limited to 'src/armnn/NetworkUtils.cpp')
-rw-r--r--src/armnn/NetworkUtils.cpp50
1 files changed, 49 insertions, 1 deletions
diff --git a/src/armnn/NetworkUtils.cpp b/src/armnn/NetworkUtils.cpp
index 7597798fa4..5ff0e6c4e1 100644
--- a/src/armnn/NetworkUtils.cpp
+++ b/src/armnn/NetworkUtils.cpp
@@ -1,10 +1,12 @@
//
-// Copyright © 2017 Arm Ltd. All rights reserved.
+// Copyright © 2022 Arm Ltd and Contributors. All rights reserved.
// SPDX-License-Identifier: MIT
//
#include "NetworkUtils.hpp"
+#include <armnnUtils/FloatingPointConverter.hpp>
+#include <BFloat16.hpp>
#include "SubgraphViewSelector.hpp"
#include <armnn/Exceptions.hpp>
@@ -272,4 +274,50 @@ std::vector<DebugLayer*> InsertDebugLayerAfter(Graph& graph, Layer& layer)
return debugLayers;
}
+bool RevertConstantWeightsToFP32(Layer* layer)
+{
+ if (layer->GetType() == LayerType::Convolution2d || layer->GetType() == LayerType::FullyConnected)
+ {
+ // Revert Weights on Constant Layer to FP32 so they can be accessed by Conv2d or FullyConnected
+ // This prevents a conversion layer being added in during backend assignment which blocks
+ // the RedirectMembersToConstantInputs backward compatibility workaround/optimization.
+ auto constantLayerInfo = layer->GetInputSlot(1).GetConnection()->GetTensorInfo();
+
+ if (constantLayerInfo.IsConstant() && constantLayerInfo.GetDataType() == DataType::BFloat16)
+ {
+ std::vector<float> newValues(constantLayerInfo.GetNumElements());
+
+ auto weightLayer = PolymorphicDowncast<ConstantLayer*>(
+ &layer->GetInputSlot(1).GetConnection()->GetOwningIConnectableLayer());
+ armnnUtils::FloatingPointConverter::ConvertBFloat16ToFloat32(
+ weightLayer->m_LayerOutput->GetConstTensor<BFloat16>(),
+ constantLayerInfo.GetNumElements(),
+ newValues.data());
+
+ TensorInfo newInfo(constantLayerInfo.GetShape(), DataType::Float32);
+ newInfo.SetConstant(true);
+ ConstTensor newInput(newInfo, newValues);
+ weightLayer->m_LayerOutput.reset(new ScopedTensorHandle(newInput));
+ weightLayer->GetOutputSlot(0).SetTensorInfo(newInfo);
+
+ // Connect Conv2d/FullyConnected to InputLayer directly leaving out
+ // the ConversionLayer to be cleaned up later
+ auto& conversionLayer = layer->GetInputSlot(0).GetConnection()->GetOwningIConnectableLayer();
+ auto actualInputOutputSlot = conversionLayer.GetInputSlot(0).GetConnection();
+
+ auto& conversionLayerOutputSlot =
+ layer->GetInputSlot(0).GetConnection()->GetOwningIConnectableLayer().GetOutputSlot(0);
+ auto& conversionLayerInputSlot =
+ layer->GetInputSlot(0).GetConnection()->GetOwningIConnectableLayer().GetInputSlot(0);
+ actualInputOutputSlot->Disconnect(conversionLayerInputSlot);
+ conversionLayerOutputSlot.Disconnect(layer->GetInputSlot(0));
+
+ actualInputOutputSlot->Connect(layer->GetInputSlot(0));
+
+ return true;
+ }
+ }
+ return false;
+}
+
} // namespace armnn