diff options
author | Narumol Prangnawarat <narumol.prangnawarat@arm.com> | 2020-03-20 15:01:01 +0000 |
---|---|---|
committer | Narumol Prangnawarat <narumol.prangnawarat@arm.com> | 2020-03-20 19:09:07 +0000 |
commit | bc7ffb5e9e5f4c86280b20c65772eb12d8bb140e (patch) | |
tree | 5187f34326414e7dfea80e0f4efaae5cbeb05d1d /src/armnn/optimizations | |
parent | cf2ad554502830804e991aca2e5b0741623119b2 (diff) | |
download | armnn-bc7ffb5e9e5f4c86280b20c65772eb12d8bb140e.tar.gz |
IVGCVSW-4520 Implement BFloat16 Optimizer
* Add ReduceFp32ToBf16 to OptimizerOptions
* Add ConvertFp32NetworkToBf16
* Add utility functions to insert conversion layers
* Add constant conversion BF16 <-> FP32
* Unit tests
Signed-off-by: Narumol Prangnawarat <narumol.prangnawarat@arm.com>
Change-Id: Iaff77e20c721400b052cb37eb9ef6fe16d7abaff
Diffstat (limited to 'src/armnn/optimizations')
-rw-r--r-- | src/armnn/optimizations/All.hpp | 1 | ||||
-rw-r--r-- | src/armnn/optimizations/ConvertConstants.hpp | 54 | ||||
-rw-r--r-- | src/armnn/optimizations/ConvertFp32NetworkToBf16.hpp | 81 |
3 files changed, 136 insertions, 0 deletions
diff --git a/src/armnn/optimizations/All.hpp b/src/armnn/optimizations/All.hpp index 273c337665..9fc284213d 100644 --- a/src/armnn/optimizations/All.hpp +++ b/src/armnn/optimizations/All.hpp @@ -6,6 +6,7 @@ #include "AddDebug.hpp" #include "ConvertConstants.hpp" +#include "ConvertFp32NetworkToBf16.hpp" #include "ConvertFp32NetworkToFp16.hpp" #include "FoldPadIntoConvolution2d.hpp" #include "MovePermuteUp.hpp" diff --git a/src/armnn/optimizations/ConvertConstants.hpp b/src/armnn/optimizations/ConvertConstants.hpp index 5e19c7bd05..f3ebcdf5d9 100644 --- a/src/armnn/optimizations/ConvertConstants.hpp +++ b/src/armnn/optimizations/ConvertConstants.hpp @@ -13,6 +13,7 @@ #include <armnn/utility/IgnoreUnused.hpp> +#include <BFloat16.hpp> #include <Half.hpp> namespace armnn @@ -20,6 +21,27 @@ namespace armnn namespace optimizations { +struct BFloat16ToFloat32 +{ + static void Func(std::unique_ptr<ScopedCpuTensorHandle>& handle) + { + const TensorInfo& info = handle->GetTensorInfo(); + + if (info.GetDataType() == DataType::BFloat16) + { + std::vector<float> newValues(info.GetNumElements()); + + armnnUtils::FloatingPointConverter::ConvertBFloat16ToFloat32(handle->GetTensor<BFloat16>(), + info.GetNumElements(), + newValues.data()); + + TensorInfo newInfo(info.GetShape(), DataType::Float32); + ConstTensor newInput(newInfo, newValues); + handle.reset(new ScopedCpuTensorHandle(newInput)); + } + } +}; + struct Float16ToFloat32 { static void Func(std::unique_ptr<ScopedCpuTensorHandle>& handle) @@ -41,6 +63,27 @@ struct Float16ToFloat32 } }; +struct Float32ToBFloat16 +{ + static void Func(std::unique_ptr<ScopedCpuTensorHandle>& handle) + { + const TensorInfo& info = handle->GetTensorInfo(); + + if (info.GetDataType() == DataType::Float32) + { + std::vector<BFloat16> newValues(info.GetNumElements()); + + armnnUtils::FloatingPointConverter::ConvertFloat32ToBFloat16(handle->GetTensor<float>(), + info.GetNumElements(), + newValues.data()); + + TensorInfo newInfo(info.GetShape(), DataType::BFloat16); + ConstTensor newInput(newInfo, newValues); + handle.reset(new ScopedCpuTensorHandle(newInput)); + } + } +}; + struct Float32ToFloat16 { static void Func(std::unique_ptr<ScopedCpuTensorHandle>& handle) @@ -97,6 +140,17 @@ struct IsFloat16Layer } }; +struct IsBFloat16Layer +{ + static bool Test(const Layer& layer) + { + return layer.GetDataType() == DataType::BFloat16; + } +}; + +using ConvertConstantsBFloatToFloat = ConvertConstants<BFloat16ToFloat32, IsFloat32Layer>; +using ConvertConstantsFloatToBFloat = ConvertConstants<Float32ToBFloat16, IsBFloat16Layer>; + using ConvertConstantsHalfToFloat = ConvertConstants<Float16ToFloat32, IsFloat32Layer>; using ConvertConstantsFloatToHalf = ConvertConstants<Float32ToFloat16, IsFloat16Layer>; diff --git a/src/armnn/optimizations/ConvertFp32NetworkToBf16.hpp b/src/armnn/optimizations/ConvertFp32NetworkToBf16.hpp new file mode 100644 index 0000000000..d6350c3af3 --- /dev/null +++ b/src/armnn/optimizations/ConvertFp32NetworkToBf16.hpp @@ -0,0 +1,81 @@ +// +// Copyright © 2020 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// +#pragma once + +#include "Optimization.hpp" +#include "NetworkUtils.hpp" + +namespace armnn +{ +namespace optimizations +{ + +class ConvertFp32NetworkToBf16Impl +{ +public: + void Run(Graph& graph, Layer& layer) const + { + if(layer.GetType() == LayerType::Input) + { + // if the outputs of this layer are DataType::Float32 + // add a ConvertFloat32ToBFloat16 layer after each of the outputs + if (layer.GetDataType() == DataType::Float32) + { + InsertConvertFp32ToBf16LayersAfter(graph, layer); + } + } + else if (layer.GetType() == LayerType::Output) + { + // if the inputs of this layer are DataType::Float32 + // add a ConvertBFloat16ToFloat32 layer before each of the inputs + if (layer.GetDataType() == DataType::Float32) + { + // NOTE: We need to call InsertConvertBf16ToFp32LayersBefore with expectCorrectInputType = false + // here, otherwise it will expect the inputs to be DataType::BFloat16 + InsertConvertBf16ToFp32LayersBefore(graph, layer, false); + } + } + else if (layer.GetType() != LayerType::ConvertFp32ToBf16 && layer.GetType() != LayerType::ConvertBf16ToFp32) + { + // if the inputs/outputs of this layer are DataType::Float32 + // change the data type for all inputs and outputs to DataType::BFloat16 + for (auto&& input = layer.BeginInputSlots(); input != layer.EndInputSlots(); ++input) + { + // if it is connected to OutputSlot of the InputLayer do not change the DataType of connection + // InputSlots of the current layer will be updated when conversion layer is inserted after InputLayer + Layer& base = input->GetConnectedOutputSlot()->GetOwningLayer(); + if (base.GetType() != LayerType::Input) + { + TensorInfo convertInfo = input->GetConnection()->GetTensorInfo(); + if (convertInfo.GetDataType() == DataType::Float32) + { + convertInfo.SetDataType(DataType::BFloat16); + input->GetConnection()->SetTensorInfo(convertInfo); + } + } + } + + // change outputs to DataType::BFloat16 + for (auto&& output = layer.BeginOutputSlots(); output != layer.EndOutputSlots(); ++output) + { + TensorInfo convertInfo = output->GetTensorInfo(); + if (convertInfo.GetDataType() == DataType::Float32) + { + convertInfo.SetDataType(DataType::BFloat16); + output->SetTensorInfo(convertInfo); + } + } + } + } + +protected: + ConvertFp32NetworkToBf16Impl() = default; + ~ConvertFp32NetworkToBf16Impl() = default; +}; + +using Fp32NetworkToBf16Converter = OptimizeForType<Layer, ConvertFp32NetworkToBf16Impl>; + +} // namespace optimizations +} // namespace armnn |