aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorFrancis Murtagh <francis.murtagh@arm.com>2022-07-22 10:23:41 +0100
committerFrancis Murtagh <francis.murtagh@arm.com>2022-07-22 10:23:41 +0100
commit89cc6b39376419565505c59c693a107a5edd47aa (patch)
tree657123d837744e982cc87c4ef024a5fbb60a0c8a
parent5b1f53975d7fa482475af781ffa2a2277f40e0c9 (diff)
downloadarmnn-89cc6b39376419565505c59c693a107a5edd47aa.tar.gz
IVGCVSW-6978: RedirectMembersToConstantInputs does not work with Fp32NetworkToBf16Converter
* Fuse FP32ToBF16Layers with Constant Layer so Conv2d/FullyConnected can have their weights redirected. * If BF16 Unsupported in Conv2d || FullyConnected revert fused Constant Layer to FP32 Change-Id: If523c708a822659d64597d9ae39cca1c2f84b76f Signed-off-by: Francis Murtagh <francis.murtagh@arm.com>
-rw-r--r--CMakeLists.txt1
-rw-r--r--src/armnn/Network.cpp17
-rw-r--r--src/armnn/NetworkUtils.cpp50
-rw-r--r--src/armnn/NetworkUtils.hpp4
-rw-r--r--src/armnn/optimizations/All.hpp3
-rw-r--r--src/armnn/optimizations/FuseConvertFp32ToBf16IntoConstLayers.hpp89
-rw-r--r--src/armnn/test/optimizations/FuseConvertF32BF16IntoConstLayerTests.cpp151
7 files changed, 307 insertions, 8 deletions
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 41db8661d3..f0eb81cc6c 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -547,6 +547,7 @@ if(BUILD_UNIT_TESTS)
src/armnn/test/optimizations/Fp32NetworkToFp16ConverterTests.cpp
src/armnn/test/optimizations/FuseActivationTests.cpp
src/armnn/test/optimizations/FuseBatchNormTests.cpp
+ src/armnn/test/optimizations/FuseConvertF32BF16IntoConstLayerTests.cpp
src/armnn/test/optimizations/InsertDebugLayerTests.cpp
src/armnn/test/optimizations/MovePermuteUpTests.cpp
src/armnn/test/optimizations/MoveTransposeUpTests.cpp
diff --git a/src/armnn/Network.cpp b/src/armnn/Network.cpp
index 8fe4445dcf..5d443068ce 100644
--- a/src/armnn/Network.cpp
+++ b/src/armnn/Network.cpp
@@ -1,5 +1,5 @@
//
-// Copyright © 2017 Arm Ltd and Contributors. All rights reserved.
+// Copyright © 2022 Arm Ltd and Contributors. All rights reserved.
// SPDX-License-Identifier: MIT
//
@@ -790,13 +790,18 @@ OptimizationResult AttemptBackendAssignment(BackendSettings& backendSettings,
}
else if (dataTypeIn == DataType::BFloat16 || dataTypeOut == DataType::BFloat16)
{
+ const auto layerType = layer->GetType();
if (IWorkloadFactory::IsLayerSupported(*layer, DataType::Float32, reasonIfUnsupported)
- && layer->GetType() != LayerType::ConvertFp32ToBf16
- && layer->GetType() != LayerType::ConvertBf16ToFp32)
+ && layerType != LayerType::ConvertFp32ToBf16
+ && layerType != LayerType::ConvertBf16ToFp32)
{
- // Insert BF16 -> FP32 conversion layer before current layer
+ bool revertConstantWeightsConversion = RevertConstantWeightsToFP32(layer);
+
+ // Insert BF16 -> FP32 conversion layer before current layer.
+ // Unless we have reverted Constant Weights Type above.
std::vector<ConvertBf16ToFp32Layer*> convertBf16ToFp32Layers;
- if (dataTypeIn == DataType::BFloat16)
+ if (dataTypeIn == DataType::BFloat16 && dataTypeOut != DataType::BFloat16
+ && !revertConstantWeightsConversion)
{
convertBf16ToFp32Layers =
InsertConvertBf16ToFp32LayersBefore(graph, *layer);
@@ -1759,10 +1764,12 @@ IOptimizedNetworkPtr Optimize(const Graph& inGraph,
// If Fp32 to Bf16 optimization is set convert Fp32 network to Bf16
// Convert input of Convolution2d and FullyConnected from Fp32 to Bf16
// Only Constant weight of Convolution2d and FullyConnected are converted from Fp32 to Bf16
+ // Constant and Fp32ToBf16 layers will also be fused so conversion is no longer needed at inference time
if (options.m_ReduceFp32ToBf16)
{
ARMNN_SCOPED_PROFILING_EVENT(Compute::Undefined, "Optimizer_ReduceFp32ToBf16");
Optimizer::Pass(optGraph, MakeOptimizations(Fp32NetworkToBf16Converter()));
+ Optimizer::Pass(optGraph, MakeOptimizations(FuseConversionLayersIntoConstLayers()));
}
// Initialize backend settings
diff --git a/src/armnn/NetworkUtils.cpp b/src/armnn/NetworkUtils.cpp
index 7597798fa4..5ff0e6c4e1 100644
--- a/src/armnn/NetworkUtils.cpp
+++ b/src/armnn/NetworkUtils.cpp
@@ -1,10 +1,12 @@
//
-// Copyright © 2017 Arm Ltd. All rights reserved.
+// Copyright © 2022 Arm Ltd and Contributors. All rights reserved.
// SPDX-License-Identifier: MIT
//
#include "NetworkUtils.hpp"
+#include <armnnUtils/FloatingPointConverter.hpp>
+#include <BFloat16.hpp>
#include "SubgraphViewSelector.hpp"
#include <armnn/Exceptions.hpp>
@@ -272,4 +274,50 @@ std::vector<DebugLayer*> InsertDebugLayerAfter(Graph& graph, Layer& layer)
return debugLayers;
}
+bool RevertConstantWeightsToFP32(Layer* layer)
+{
+ if (layer->GetType() == LayerType::Convolution2d || layer->GetType() == LayerType::FullyConnected)
+ {
+ // Revert Weights on Constant Layer to FP32 so they can be accessed by Conv2d or FullyConnected
+ // This prevents a conversion layer being added in during backend assignment which blocks
+ // the RedirectMembersToConstantInputs backward compatibility workaround/optimization.
+ auto constantLayerInfo = layer->GetInputSlot(1).GetConnection()->GetTensorInfo();
+
+ if (constantLayerInfo.IsConstant() && constantLayerInfo.GetDataType() == DataType::BFloat16)
+ {
+ std::vector<float> newValues(constantLayerInfo.GetNumElements());
+
+ auto weightLayer = PolymorphicDowncast<ConstantLayer*>(
+ &layer->GetInputSlot(1).GetConnection()->GetOwningIConnectableLayer());
+ armnnUtils::FloatingPointConverter::ConvertBFloat16ToFloat32(
+ weightLayer->m_LayerOutput->GetConstTensor<BFloat16>(),
+ constantLayerInfo.GetNumElements(),
+ newValues.data());
+
+ TensorInfo newInfo(constantLayerInfo.GetShape(), DataType::Float32);
+ newInfo.SetConstant(true);
+ ConstTensor newInput(newInfo, newValues);
+ weightLayer->m_LayerOutput.reset(new ScopedTensorHandle(newInput));
+ weightLayer->GetOutputSlot(0).SetTensorInfo(newInfo);
+
+ // Connect Conv2d/FullyConnected to InputLayer directly leaving out
+ // the ConversionLayer to be cleaned up later
+ auto& conversionLayer = layer->GetInputSlot(0).GetConnection()->GetOwningIConnectableLayer();
+ auto actualInputOutputSlot = conversionLayer.GetInputSlot(0).GetConnection();
+
+ auto& conversionLayerOutputSlot =
+ layer->GetInputSlot(0).GetConnection()->GetOwningIConnectableLayer().GetOutputSlot(0);
+ auto& conversionLayerInputSlot =
+ layer->GetInputSlot(0).GetConnection()->GetOwningIConnectableLayer().GetInputSlot(0);
+ actualInputOutputSlot->Disconnect(conversionLayerInputSlot);
+ conversionLayerOutputSlot.Disconnect(layer->GetInputSlot(0));
+
+ actualInputOutputSlot->Connect(layer->GetInputSlot(0));
+
+ return true;
+ }
+ }
+ return false;
+}
+
} // namespace armnn
diff --git a/src/armnn/NetworkUtils.hpp b/src/armnn/NetworkUtils.hpp
index a922770285..77dd068cb3 100644
--- a/src/armnn/NetworkUtils.hpp
+++ b/src/armnn/NetworkUtils.hpp
@@ -1,5 +1,5 @@
//
-// Copyright © 2017 Arm Ltd. All rights reserved.
+// Copyright © 2022 Arm Ltd and Contributors. All rights reserved.
// SPDX-License-Identifier: MIT
//
@@ -29,4 +29,6 @@ std::vector<ConvertFp32ToFp16Layer*> InsertConvertFp32ToFp16LayersAfter(Graph& g
std::vector<DebugLayer*> InsertDebugLayerAfter(Graph& graph, Layer& layer);
+bool RevertConstantWeightsToFP32(Layer* layer);
+
} // namespace armnn
diff --git a/src/armnn/optimizations/All.hpp b/src/armnn/optimizations/All.hpp
index 900e763762..0421f31973 100644
--- a/src/armnn/optimizations/All.hpp
+++ b/src/armnn/optimizations/All.hpp
@@ -1,5 +1,5 @@
//
-// Copyright © 2017 Arm Ltd. All rights reserved.
+// Copyright © 2022 Arm Ltd and Contributors. All rights reserved.
// SPDX-License-Identifier: MIT
//
#pragma once
@@ -9,6 +9,7 @@
#include "ConvertConstants.hpp"
#include "ConvertConstDequantisationLayersToConstLayers.hpp"
#include "ConvertConstPermuteLayersToConstLayers.hpp"
+#include "FuseConvertFp32ToBf16IntoConstLayers.hpp"
#include "ConvertFp32NetworkToBf16.hpp"
#include "ConvertFp32NetworkToFp16.hpp"
#include "FoldPadIntoLayer2d.hpp"
diff --git a/src/armnn/optimizations/FuseConvertFp32ToBf16IntoConstLayers.hpp b/src/armnn/optimizations/FuseConvertFp32ToBf16IntoConstLayers.hpp
new file mode 100644
index 0000000000..d112010539
--- /dev/null
+++ b/src/armnn/optimizations/FuseConvertFp32ToBf16IntoConstLayers.hpp
@@ -0,0 +1,89 @@
+//
+// Copyright © 2022 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#pragma once
+
+#include "Optimization.hpp"
+#include <armnnUtils/Permute.hpp>
+#include <ResolveType.hpp>
+
+namespace armnn
+{
+namespace optimizations
+{
+
+class FuseConvertFp32ToBf16IntoConstLayers
+{
+public:
+ void Run(Graph& graph, InputSlot& connection) const
+ {
+ Layer& base = connection.GetConnectedOutputSlot()->GetOwningLayer();
+ Layer& child = connection.GetOwningLayer();
+
+ ARMNN_ASSERT(base.GetType() == LayerType::Constant);
+ ARMNN_ASSERT(child.GetType() == LayerType::ConvertFp32ToBf16);
+
+ auto dataType = base.GetDataType();
+ switch (dataType)
+ {
+ case DataType::Float32:
+ ReplaceConvertFp32ToBf16Layer<DataType::BFloat16>(
+ graph,
+ PolymorphicDowncast<ConstantLayer*>(&base),
+ PolymorphicDowncast<ConvertFp32ToBf16Layer*>(&child));
+ break;
+ default:
+ throw InvalidArgumentException(GetDataTypeName(dataType) +
+ std::string(" Constant Layer cannot be fused into ") +
+ GetDataTypeName(child.GetDataType()) +
+ std::string(" conversion layer."));
+ }
+ }
+protected:
+ FuseConvertFp32ToBf16IntoConstLayers() = default;
+ ~FuseConvertFp32ToBf16IntoConstLayers() = default;
+private:
+ template<armnn::DataType ArmnnType,
+ typename T = armnn::ResolveType<ArmnnType>>
+ static void ReplaceConvertFp32ToBf16Layer(Graph& graph,
+ ConstantLayer* constantLayer,
+ ConvertFp32ToBf16Layer* convertFp32ToBf16layer)
+ {
+ IgnoreUnused(graph);
+ /**
+ * This optimisation is to find situations where a constant set of inputs is being provided to a
+ * ConvertFp32ToBf16 layer. In this case we don't want the overhead of Converting the values on
+ * every inference, instead we want to Convert them once and store them in a Const layer to be
+ * used everytime as they will not change.
+ */
+ TensorInfo outputConvertFp32ToBf16Info = convertFp32ToBf16layer->GetOutputSlot(0).GetTensorInfo();
+ std::vector<T> newValues(outputConvertFp32ToBf16Info.GetNumElements());
+
+ armnnUtils::FloatingPointConverter::ConvertFloat32ToBFloat16(
+ constantLayer->m_LayerOutput->GetConstTensor<float>(),
+ outputConvertFp32ToBf16Info.GetNumElements(),
+ newValues.data());
+ TensorInfo newInfo = outputConvertFp32ToBf16Info;
+ newInfo.SetConstant(true);
+ ConstTensor newInput(newInfo, newValues);
+
+ constantLayer->m_LayerOutput.reset(new ScopedTensorHandle(newInput));
+
+ // Moves connections in convertFp32ToBf16layer output slot to the constant layer.
+ // ConvertFp32ToBf16layer layer will be removed if left unconnected.
+ convertFp32ToBf16layer->GetOutputSlot().MoveAllConnections(constantLayer->GetOutputSlot());
+
+ // Updating the output tensor
+ constantLayer->GetOutputSlot(0).SetTensorInfo(newInfo);
+ ARMNN_ASSERT(constantLayer->GetOutputSlot(0).GetTensorInfo().IsConstant() == true);
+ }
+};
+
+using FuseConversionLayersIntoConstLayers = OptimizeForConnection<ConstantLayer,
+ ConvertFp32ToBf16Layer,
+ FuseConvertFp32ToBf16IntoConstLayers>;
+
+} // namespace optimizations
+} // namespace armnn \ No newline at end of file
diff --git a/src/armnn/test/optimizations/FuseConvertF32BF16IntoConstLayerTests.cpp b/src/armnn/test/optimizations/FuseConvertF32BF16IntoConstLayerTests.cpp
new file mode 100644
index 0000000000..93d5948d61
--- /dev/null
+++ b/src/armnn/test/optimizations/FuseConvertF32BF16IntoConstLayerTests.cpp
@@ -0,0 +1,151 @@
+//
+// Copyright © 2022 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#include <LayersFwd.hpp>
+#include <Network.hpp>
+#include <NetworkUtils.hpp>
+#include <Optimizer.hpp>
+#include <TestUtils.hpp>
+
+#include <armnn/backends/TensorHandle.hpp>
+
+#include <doctest/doctest.h>
+
+TEST_SUITE("Optimizer")
+{
+using namespace armnn;
+using namespace armnn::optimizations;
+
+TEST_CASE("FuseConvertFp32Fp16intoConst")
+{
+ Graph graph;
+ const unsigned int shape[] = {1, 2, 2, 3};
+
+ const TensorInfo constTensorInfo(4, shape, DataType::Float32, 1.0, 0, true);
+ const TensorInfo outputConvertInfo(4, shape, DataType::BFloat16, 1.0, 0, true);
+
+ ConstantLayer* constantLayer = graph.AddLayer<ConstantLayer>("constant");
+ std::vector<float> constantValues(constTensorInfo.GetNumElements(), 3.1416f);
+ ConstTensor constTensor(constTensorInfo, constantValues.data());
+ constantLayer->m_LayerOutput = std::make_shared<ScopedTensorHandle>(constTensor);
+ constantLayer->GetOutputSlot().SetTensorInfo(constTensorInfo);
+
+ ConvertFp32ToBf16Layer* convertLayer = graph.AddLayer<ConvertFp32ToBf16Layer>("convert");
+ convertLayer->GetOutputSlot().SetTensorInfo(outputConvertInfo);
+
+ OutputLayer* output = graph.AddLayer<OutputLayer>(0, "output");
+
+ // Connect up constant -> convert -> output
+ constantLayer->GetOutputSlot().Connect(convertLayer->GetInputSlot(0));
+ convertLayer->GetOutputSlot().Connect(output->GetInputSlot(0));
+
+ auto checkConstantFloat32 = [](const armnn::Layer *const layer) -> bool {
+ return IsLayerOfType<ConstantLayer>(layer) &&
+ (layer->GetDataType() == DataType::Float32);
+ };
+ auto checkConstantBFloat16 = [](const armnn::Layer *const layer) -> bool {
+ return IsLayerOfType<ConstantLayer>(layer) &&
+ (layer->GetDataType() == DataType::BFloat16);
+ };
+
+ CHECK(CheckSequence(graph.cbegin(), graph.cend(),
+ checkConstantFloat32,
+ &IsLayerOfType<ConvertFp32ToBf16Layer>,
+ &IsLayerOfType<OutputLayer>));
+
+ armnn::Optimizer::Pass(graph, MakeOptimizations(FuseConversionLayersIntoConstLayers()));
+
+ CHECK(CheckSequence(graph.cbegin(), graph.cend(),
+ checkConstantBFloat16,
+ &IsLayerOfType<OutputLayer>));
+}
+
+TEST_CASE("RevertConstantWeightsToFP32")
+{
+ Graph graph;
+ const unsigned int shape[] = {1, 2, 2, 3};
+
+ const TensorInfo constTensorInfo(4, shape, DataType::Float32, 1.0, 0, true);
+ const TensorInfo outputConvertInfo(4, shape, DataType::BFloat16, 1.0, 0, true);
+
+ TensorInfo inputInfo(4, shape, DataType::Float32);
+ auto* input = graph.AddLayer<InputLayer>(0, "input0");
+ input->GetOutputSlot().SetTensorInfo(inputInfo);
+
+ auto* constantLayer = graph.AddLayer<ConstantLayer>("constant");
+ std::vector<float> constantValues(constTensorInfo.GetNumElements(), 3.1416f);
+ ConstTensor constTensor(constTensorInfo, constantValues.data());
+ constantLayer->m_LayerOutput = std::make_shared<ScopedTensorHandle>(constTensor);
+ constantLayer->GetOutputSlot().SetTensorInfo(constTensorInfo);
+
+ ConvertFp32ToBf16Layer* convertLayerInputs = graph.AddLayer<ConvertFp32ToBf16Layer>("convert");
+ convertLayerInputs->GetOutputSlot().SetTensorInfo(outputConvertInfo);
+ ConvertFp32ToBf16Layer* convertLayerWeights = graph.AddLayer<ConvertFp32ToBf16Layer>("convert2");
+ convertLayerWeights->GetOutputSlot().SetTensorInfo(outputConvertInfo);
+ ConvertFp32ToBf16Layer* convertLayerBiases = graph.AddLayer<ConvertFp32ToBf16Layer>("convert3");
+ convertLayerBiases->GetOutputSlot().SetTensorInfo(outputConvertInfo);
+
+ auto* biases = graph.AddLayer<armnn::ConstantLayer>("Biases");
+ biases->m_LayerOutput = std::make_unique<armnn::ScopedTensorHandle>(constTensor);
+ biases->GetOutputSlot().SetTensorInfo(constTensorInfo);
+
+ armnn::Convolution2dDescriptor descriptor;
+ descriptor.m_BiasEnabled = true;
+ auto* conv = graph.AddLayer<armnn::Convolution2dLayer>(descriptor, "conv2d");
+ const armnn::TensorInfo infoFP32({ 2, 3, 8, 1 }, armnn::DataType::Float32);
+ conv->GetOutputSlot().SetTensorInfo(infoFP32);
+
+ auto* output = graph.AddLayer<OutputLayer>(0, "output");
+
+ // Connect up Input -> Convert ->
+ // Constant -> Convert -> Conv2d -> Output
+ // Constant -> Convert ->
+ input->GetOutputSlot().Connect(convertLayerInputs->GetInputSlot(0));
+ constantLayer->GetOutputSlot().Connect(convertLayerWeights->GetInputSlot(0));
+ biases->GetOutputSlot().Connect(convertLayerBiases->GetInputSlot(0));
+
+ convertLayerInputs->GetOutputSlot().Connect(conv->GetInputSlot(0));
+ convertLayerWeights->GetOutputSlot().Connect(conv->GetInputSlot(1));
+ convertLayerBiases->GetOutputSlot().Connect(conv->GetInputSlot(2));
+
+ conv->GetOutputSlot().Connect(output->GetInputSlot(0));
+
+ auto checkConstantFloat32 = [](const armnn::Layer *const layer) -> bool {
+ return IsLayerOfType<ConstantLayer>(layer) &&
+ (layer->GetDataType() == DataType::Float32);
+ };
+ auto checkConstantBFloat16 = [](const armnn::Layer *const layer) -> bool {
+ return IsLayerOfType<ConstantLayer>(layer) &&
+ (layer->GetDataType() == DataType::BFloat16);
+ };
+
+ CHECK(CheckSequence(graph.cbegin(), graph.cend(),
+ &IsLayerOfType<InputLayer>,
+ checkConstantFloat32,
+ checkConstantFloat32,
+ &IsLayerOfType<ConvertFp32ToBf16Layer>,
+ &IsLayerOfType<ConvertFp32ToBf16Layer>,
+ &IsLayerOfType<ConvertFp32ToBf16Layer>,
+ &IsLayerOfType<Convolution2dLayer>,
+ &IsLayerOfType<OutputLayer>));
+
+ armnn::Optimizer::Pass(graph, MakeOptimizations(FuseConversionLayersIntoConstLayers()));
+
+ bool revert = RevertConstantWeightsToFP32(conv);
+
+ // Erase unconnected layer as occurs during Topological Sort.
+ graph.EraseLayer(convertLayerInputs);
+
+ CHECK(revert);
+ CHECK(constantLayer->GetDataType() == DataType::Float32);
+
+ CHECK(CheckSequence(graph.cbegin(), graph.cend(),
+ &IsLayerOfType<InputLayer>,
+ checkConstantBFloat16,
+ checkConstantFloat32,
+ &IsLayerOfType<Convolution2dLayer>,
+ &IsLayerOfType<OutputLayer>));
+}
+}