aboutsummaryrefslogtreecommitdiff
path: root/src/armnn/Network.cpp
diff options
context:
space:
mode:
authorFrancis Murtagh <francis.murtagh@arm.com>2022-07-22 10:23:41 +0100
committerFrancis Murtagh <francis.murtagh@arm.com>2022-07-22 10:23:41 +0100
commit89cc6b39376419565505c59c693a107a5edd47aa (patch)
tree657123d837744e982cc87c4ef024a5fbb60a0c8a /src/armnn/Network.cpp
parent5b1f53975d7fa482475af781ffa2a2277f40e0c9 (diff)
downloadarmnn-89cc6b39376419565505c59c693a107a5edd47aa.tar.gz
IVGCVSW-6978: RedirectMembersToConstantInputs does not work with Fp32NetworkToBf16Converter
* Fuse FP32ToBF16Layers with Constant Layer so Conv2d/FullyConnected can have their weights redirected. * If BF16 Unsupported in Conv2d || FullyConnected revert fused Constant Layer to FP32 Change-Id: If523c708a822659d64597d9ae39cca1c2f84b76f Signed-off-by: Francis Murtagh <francis.murtagh@arm.com>
Diffstat (limited to 'src/armnn/Network.cpp')
-rw-r--r--src/armnn/Network.cpp17
1 files changed, 12 insertions, 5 deletions
diff --git a/src/armnn/Network.cpp b/src/armnn/Network.cpp
index 8fe4445dcf..5d443068ce 100644
--- a/src/armnn/Network.cpp
+++ b/src/armnn/Network.cpp
@@ -1,5 +1,5 @@
//
-// Copyright © 2017 Arm Ltd and Contributors. All rights reserved.
+// Copyright © 2022 Arm Ltd and Contributors. All rights reserved.
// SPDX-License-Identifier: MIT
//
@@ -790,13 +790,18 @@ OptimizationResult AttemptBackendAssignment(BackendSettings& backendSettings,
}
else if (dataTypeIn == DataType::BFloat16 || dataTypeOut == DataType::BFloat16)
{
+ const auto layerType = layer->GetType();
if (IWorkloadFactory::IsLayerSupported(*layer, DataType::Float32, reasonIfUnsupported)
- && layer->GetType() != LayerType::ConvertFp32ToBf16
- && layer->GetType() != LayerType::ConvertBf16ToFp32)
+ && layerType != LayerType::ConvertFp32ToBf16
+ && layerType != LayerType::ConvertBf16ToFp32)
{
- // Insert BF16 -> FP32 conversion layer before current layer
+ bool revertConstantWeightsConversion = RevertConstantWeightsToFP32(layer);
+
+ // Insert BF16 -> FP32 conversion layer before current layer.
+ // Unless we have reverted Constant Weights Type above.
std::vector<ConvertBf16ToFp32Layer*> convertBf16ToFp32Layers;
- if (dataTypeIn == DataType::BFloat16)
+ if (dataTypeIn == DataType::BFloat16 && dataTypeOut != DataType::BFloat16
+ && !revertConstantWeightsConversion)
{
convertBf16ToFp32Layers =
InsertConvertBf16ToFp32LayersBefore(graph, *layer);
@@ -1759,10 +1764,12 @@ IOptimizedNetworkPtr Optimize(const Graph& inGraph,
// If Fp32 to Bf16 optimization is set convert Fp32 network to Bf16
// Convert input of Convolution2d and FullyConnected from Fp32 to Bf16
// Only Constant weight of Convolution2d and FullyConnected are converted from Fp32 to Bf16
+ // Constant and Fp32ToBf16 layers will also be fused so conversion is no longer needed at inference time
if (options.m_ReduceFp32ToBf16)
{
ARMNN_SCOPED_PROFILING_EVENT(Compute::Undefined, "Optimizer_ReduceFp32ToBf16");
Optimizer::Pass(optGraph, MakeOptimizations(Fp32NetworkToBf16Converter()));
+ Optimizer::Pass(optGraph, MakeOptimizations(FuseConversionLayersIntoConstLayers()));
}
// Initialize backend settings