diff options
author | Francis Murtagh <francis.murtagh@arm.com> | 2022-07-22 10:23:41 +0100 |
---|---|---|
committer | Nikhil Raj <nikhil.raj@arm.com> | 2022-07-27 15:58:02 +0100 |
commit | 4073e147982df3022ea97cb85179c8c7a695a17a (patch) | |
tree | 657123d837744e982cc87c4ef024a5fbb60a0c8a /src/armnn/test/optimizations | |
parent | 1e276f38e67af7505a25010eee579034ee83d12b (diff) | |
download | armnn-4073e147982df3022ea97cb85179c8c7a695a17a.tar.gz |
IVGCVSW-6978: RedirectMembersToConstantInputs does not work with Fp32NetworkToBf16Converter
* Fuse FP32ToBF16Layers with Constant Layer so Conv2d/FullyConnected
can have their weights redirected.
* If BF16 Unsupported in Conv2d || FullyConnected revert fused
Constant Layer to FP32
Change-Id: If523c708a822659d64597d9ae39cca1c2f84b76f
Signed-off-by: Francis Murtagh <francis.murtagh@arm.com>
Diffstat (limited to 'src/armnn/test/optimizations')
-rw-r--r-- | src/armnn/test/optimizations/FuseConvertF32BF16IntoConstLayerTests.cpp | 151 |
1 files changed, 151 insertions, 0 deletions
diff --git a/src/armnn/test/optimizations/FuseConvertF32BF16IntoConstLayerTests.cpp b/src/armnn/test/optimizations/FuseConvertF32BF16IntoConstLayerTests.cpp new file mode 100644 index 0000000000..93d5948d61 --- /dev/null +++ b/src/armnn/test/optimizations/FuseConvertF32BF16IntoConstLayerTests.cpp @@ -0,0 +1,151 @@ +// +// Copyright © 2022 Arm Ltd and Contributors. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include <LayersFwd.hpp> +#include <Network.hpp> +#include <NetworkUtils.hpp> +#include <Optimizer.hpp> +#include <TestUtils.hpp> + +#include <armnn/backends/TensorHandle.hpp> + +#include <doctest/doctest.h> + +TEST_SUITE("Optimizer") +{ +using namespace armnn; +using namespace armnn::optimizations; + +TEST_CASE("FuseConvertFp32Fp16intoConst") +{ + Graph graph; + const unsigned int shape[] = {1, 2, 2, 3}; + + const TensorInfo constTensorInfo(4, shape, DataType::Float32, 1.0, 0, true); + const TensorInfo outputConvertInfo(4, shape, DataType::BFloat16, 1.0, 0, true); + + ConstantLayer* constantLayer = graph.AddLayer<ConstantLayer>("constant"); + std::vector<float> constantValues(constTensorInfo.GetNumElements(), 3.1416f); + ConstTensor constTensor(constTensorInfo, constantValues.data()); + constantLayer->m_LayerOutput = std::make_shared<ScopedTensorHandle>(constTensor); + constantLayer->GetOutputSlot().SetTensorInfo(constTensorInfo); + + ConvertFp32ToBf16Layer* convertLayer = graph.AddLayer<ConvertFp32ToBf16Layer>("convert"); + convertLayer->GetOutputSlot().SetTensorInfo(outputConvertInfo); + + OutputLayer* output = graph.AddLayer<OutputLayer>(0, "output"); + + // Connect up constant -> convert -> output + constantLayer->GetOutputSlot().Connect(convertLayer->GetInputSlot(0)); + convertLayer->GetOutputSlot().Connect(output->GetInputSlot(0)); + + auto checkConstantFloat32 = [](const armnn::Layer *const layer) -> bool { + return IsLayerOfType<ConstantLayer>(layer) && + (layer->GetDataType() == DataType::Float32); + }; + auto checkConstantBFloat16 = [](const armnn::Layer *const layer) -> bool { + return IsLayerOfType<ConstantLayer>(layer) && + (layer->GetDataType() == DataType::BFloat16); + }; + + CHECK(CheckSequence(graph.cbegin(), graph.cend(), + checkConstantFloat32, + &IsLayerOfType<ConvertFp32ToBf16Layer>, + &IsLayerOfType<OutputLayer>)); + + armnn::Optimizer::Pass(graph, MakeOptimizations(FuseConversionLayersIntoConstLayers())); + + CHECK(CheckSequence(graph.cbegin(), graph.cend(), + checkConstantBFloat16, + &IsLayerOfType<OutputLayer>)); +} + +TEST_CASE("RevertConstantWeightsToFP32") +{ + Graph graph; + const unsigned int shape[] = {1, 2, 2, 3}; + + const TensorInfo constTensorInfo(4, shape, DataType::Float32, 1.0, 0, true); + const TensorInfo outputConvertInfo(4, shape, DataType::BFloat16, 1.0, 0, true); + + TensorInfo inputInfo(4, shape, DataType::Float32); + auto* input = graph.AddLayer<InputLayer>(0, "input0"); + input->GetOutputSlot().SetTensorInfo(inputInfo); + + auto* constantLayer = graph.AddLayer<ConstantLayer>("constant"); + std::vector<float> constantValues(constTensorInfo.GetNumElements(), 3.1416f); + ConstTensor constTensor(constTensorInfo, constantValues.data()); + constantLayer->m_LayerOutput = std::make_shared<ScopedTensorHandle>(constTensor); + constantLayer->GetOutputSlot().SetTensorInfo(constTensorInfo); + + ConvertFp32ToBf16Layer* convertLayerInputs = graph.AddLayer<ConvertFp32ToBf16Layer>("convert"); + convertLayerInputs->GetOutputSlot().SetTensorInfo(outputConvertInfo); + ConvertFp32ToBf16Layer* convertLayerWeights = graph.AddLayer<ConvertFp32ToBf16Layer>("convert2"); + convertLayerWeights->GetOutputSlot().SetTensorInfo(outputConvertInfo); + ConvertFp32ToBf16Layer* convertLayerBiases = graph.AddLayer<ConvertFp32ToBf16Layer>("convert3"); + convertLayerBiases->GetOutputSlot().SetTensorInfo(outputConvertInfo); + + auto* biases = graph.AddLayer<armnn::ConstantLayer>("Biases"); + biases->m_LayerOutput = std::make_unique<armnn::ScopedTensorHandle>(constTensor); + biases->GetOutputSlot().SetTensorInfo(constTensorInfo); + + armnn::Convolution2dDescriptor descriptor; + descriptor.m_BiasEnabled = true; + auto* conv = graph.AddLayer<armnn::Convolution2dLayer>(descriptor, "conv2d"); + const armnn::TensorInfo infoFP32({ 2, 3, 8, 1 }, armnn::DataType::Float32); + conv->GetOutputSlot().SetTensorInfo(infoFP32); + + auto* output = graph.AddLayer<OutputLayer>(0, "output"); + + // Connect up Input -> Convert -> + // Constant -> Convert -> Conv2d -> Output + // Constant -> Convert -> + input->GetOutputSlot().Connect(convertLayerInputs->GetInputSlot(0)); + constantLayer->GetOutputSlot().Connect(convertLayerWeights->GetInputSlot(0)); + biases->GetOutputSlot().Connect(convertLayerBiases->GetInputSlot(0)); + + convertLayerInputs->GetOutputSlot().Connect(conv->GetInputSlot(0)); + convertLayerWeights->GetOutputSlot().Connect(conv->GetInputSlot(1)); + convertLayerBiases->GetOutputSlot().Connect(conv->GetInputSlot(2)); + + conv->GetOutputSlot().Connect(output->GetInputSlot(0)); + + auto checkConstantFloat32 = [](const armnn::Layer *const layer) -> bool { + return IsLayerOfType<ConstantLayer>(layer) && + (layer->GetDataType() == DataType::Float32); + }; + auto checkConstantBFloat16 = [](const armnn::Layer *const layer) -> bool { + return IsLayerOfType<ConstantLayer>(layer) && + (layer->GetDataType() == DataType::BFloat16); + }; + + CHECK(CheckSequence(graph.cbegin(), graph.cend(), + &IsLayerOfType<InputLayer>, + checkConstantFloat32, + checkConstantFloat32, + &IsLayerOfType<ConvertFp32ToBf16Layer>, + &IsLayerOfType<ConvertFp32ToBf16Layer>, + &IsLayerOfType<ConvertFp32ToBf16Layer>, + &IsLayerOfType<Convolution2dLayer>, + &IsLayerOfType<OutputLayer>)); + + armnn::Optimizer::Pass(graph, MakeOptimizations(FuseConversionLayersIntoConstLayers())); + + bool revert = RevertConstantWeightsToFP32(conv); + + // Erase unconnected layer as occurs during Topological Sort. + graph.EraseLayer(convertLayerInputs); + + CHECK(revert); + CHECK(constantLayer->GetDataType() == DataType::Float32); + + CHECK(CheckSequence(graph.cbegin(), graph.cend(), + &IsLayerOfType<InputLayer>, + checkConstantBFloat16, + checkConstantFloat32, + &IsLayerOfType<Convolution2dLayer>, + &IsLayerOfType<OutputLayer>)); +} +} |