diff options
author | Ryan OShea <ryan.oshea3@arm.com> | 2022-11-07 16:20:48 +0000 |
---|---|---|
committer | ryan.oshea3 <ryan.oshea3@arm.com> | 2022-11-16 15:22:50 +0000 |
commit | 31441595009182c985dacbedc70c41ee6664d070 (patch) | |
tree | 248a85295aeff4022c9b395fc97748b0a0aa6b35 /src/armnn/optimizations | |
parent | bd18eab07a8f30492de1e462b1815189014cb8d5 (diff) | |
download | armnn-31441595009182c985dacbedc70c41ee6664d070.tar.gz |
IVGCVSW-7214 Disable BF16-Turbo-Mode and remove conversion layers
- Remove Bf16ToFp32 Conversion Layer
- Remove Fp32ToBf16 Conversion Layer
- Remove B16 Conversion tests
* Throw exception if m_ReduceFp32ToBf16 optimzer option is set to true
* Provide comments to enable fast math in order to use bf16
* Update docs to inform users to enable fast math for bf16
Execute Network Changes
* Require bf16_turbo_mode to also have fast_math_enabled set to true
- Remove setting m_ReduceFp32ToBf16 optimizer option
Signed-off-by: Ryan OShea <ryan.oshea3@arm.com>
Change-Id: Ibaa6da9d29c96a1ce32ff5196b0847fde9f04a1c
Diffstat (limited to 'src/armnn/optimizations')
-rw-r--r-- | src/armnn/optimizations/All.hpp | 2 | ||||
-rw-r--r-- | src/armnn/optimizations/ConvertConstants.hpp | 54 | ||||
-rw-r--r-- | src/armnn/optimizations/ConvertFp32NetworkToBf16.hpp | 79 | ||||
-rw-r--r-- | src/armnn/optimizations/FuseConvertFp32ToBf16IntoConstLayers.hpp | 89 |
4 files changed, 0 insertions, 224 deletions
diff --git a/src/armnn/optimizations/All.hpp b/src/armnn/optimizations/All.hpp index 0421f31973..a11dec9446 100644 --- a/src/armnn/optimizations/All.hpp +++ b/src/armnn/optimizations/All.hpp @@ -9,8 +9,6 @@ #include "ConvertConstants.hpp" #include "ConvertConstDequantisationLayersToConstLayers.hpp" #include "ConvertConstPermuteLayersToConstLayers.hpp" -#include "FuseConvertFp32ToBf16IntoConstLayers.hpp" -#include "ConvertFp32NetworkToBf16.hpp" #include "ConvertFp32NetworkToFp16.hpp" #include "FoldPadIntoLayer2d.hpp" #include "FuseBatchNorm.hpp" diff --git a/src/armnn/optimizations/ConvertConstants.hpp b/src/armnn/optimizations/ConvertConstants.hpp index 54c14e5c89..7b2f1fd291 100644 --- a/src/armnn/optimizations/ConvertConstants.hpp +++ b/src/armnn/optimizations/ConvertConstants.hpp @@ -11,7 +11,6 @@ #include <armnn/backends/TensorHandle.hpp> #include <armnn/utility/IgnoreUnused.hpp> -#include <BFloat16.hpp> #include <Half.hpp> namespace armnn @@ -19,27 +18,6 @@ namespace armnn namespace optimizations { -struct BFloat16ToFloat32 -{ - static void Func(std::shared_ptr<ConstTensorHandle>& handle) - { - const TensorInfo& info = handle->GetTensorInfo(); - - if (info.GetDataType() == DataType::BFloat16) - { - std::vector<float> newValues(info.GetNumElements()); - - armnnUtils::FloatingPointConverter::ConvertBFloat16ToFloat32(handle->GetConstTensor<BFloat16>(), - info.GetNumElements(), - newValues.data()); - - TensorInfo newInfo(info.GetShape(), DataType::Float32, 0.0f, 0, true); - ConstTensor newInput(newInfo, newValues); - handle.reset(new ScopedTensorHandle(newInput)); - } - } -}; - struct Float16ToFloat32 { static void Func(std::shared_ptr<ConstTensorHandle>& handle) @@ -61,27 +39,6 @@ struct Float16ToFloat32 } }; -struct Float32ToBFloat16 -{ - static void Func(std::shared_ptr<ConstTensorHandle>& handle) - { - const TensorInfo& info = handle->GetTensorInfo(); - - if (info.GetDataType() == DataType::Float32) - { - std::vector<BFloat16> newValues(info.GetNumElements()); - - armnnUtils::FloatingPointConverter::ConvertFloat32ToBFloat16(handle->GetConstTensor<float>(), - info.GetNumElements(), - newValues.data()); - - TensorInfo newInfo(info.GetShape(), DataType::BFloat16, 0.0f, 0, true); - ConstTensor newInput(newInfo, newValues); - handle.reset(new ScopedTensorHandle(newInput)); - } - } -}; - struct Float32ToFloat16 { static void Func(std::shared_ptr<ConstTensorHandle>& handle) @@ -138,17 +95,6 @@ struct IsFloat16Layer } }; -struct IsBFloat16Layer -{ - static bool Test(const Layer& layer) - { - return layer.GetDataType() == DataType::BFloat16; - } -}; - -using ConvertConstantsBFloatToFloat = ConvertConstants<BFloat16ToFloat32, IsFloat32Layer>; -using ConvertConstantsFloatToBFloat = ConvertConstants<Float32ToBFloat16, IsBFloat16Layer>; - using ConvertConstantsHalfToFloat = ConvertConstants<Float16ToFloat32, IsFloat32Layer>; using ConvertConstantsFloatToHalf = ConvertConstants<Float32ToFloat16, IsFloat16Layer>; diff --git a/src/armnn/optimizations/ConvertFp32NetworkToBf16.hpp b/src/armnn/optimizations/ConvertFp32NetworkToBf16.hpp deleted file mode 100644 index 6c80e740be..0000000000 --- a/src/armnn/optimizations/ConvertFp32NetworkToBf16.hpp +++ /dev/null @@ -1,79 +0,0 @@ -// -// Copyright © 2020 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// -#pragma once - -#include "NetworkUtils.hpp" -#include "Optimization.hpp" - -#include <armnn/utility/PolymorphicDowncast.hpp> - -namespace armnn -{ -namespace optimizations -{ - -template <typename LayerT> -inline LayerT* ConvertWeight(Layer* l) -{ - LayerT* layer = PolymorphicDowncast<LayerT*>(l); - if ((layer->GetType() == LayerType::Convolution2d || layer->GetType() == LayerType::FullyConnected) - && layer->m_Weight) - { - const TensorInfo& info = layer->m_Weight->GetTensorInfo(); - - if (info.GetDataType() == DataType::Float32) - { - std::vector<BFloat16> newValues(info.GetNumElements()); - - armnnUtils::FloatingPointConverter::ConvertFloat32ToBFloat16( - layer->m_Weight->template GetConstTensor<float>(), - info.GetNumElements(), - newValues.data()); - - TensorInfo newInfo(info); - newInfo.SetDataType(DataType::BFloat16); - ConstTensor newInput(newInfo, newValues); - layer->m_Weight.reset(new ScopedTensorHandle(newInput)); - } - } - return layer; -} - -class ConvertFp32NetworkToBf16Impl -{ -public: - - void Run(Graph& graph, Layer& layer) const - { - // Only convert Float32 To BFloat16 for the Input of Convolution2d layer and FullyConnected layer. - // And also convert weight data type from Float32 to Bfloat16. - // Do not convert bias data type. - if (layer.GetType() == LayerType::Convolution2d) - { - if (layer.GetDataType() == DataType::Float32) - { - InsertConvertFp32ToBf16LayersBefore(graph,layer); - ConvertWeight<Convolution2dLayer>(&layer); - } - } - else if (layer.GetType() == LayerType::FullyConnected) - { - if (layer.GetDataType() == DataType::Float32) - { - InsertConvertFp32ToBf16LayersBefore(graph,layer); - ConvertWeight<FullyConnectedLayer>(&layer); - } - } - } - -protected: - ConvertFp32NetworkToBf16Impl() = default; - ~ConvertFp32NetworkToBf16Impl() = default; -}; - -using Fp32NetworkToBf16Converter = OptimizeForType<Layer, ConvertFp32NetworkToBf16Impl>; - -} // namespace optimizations -} // namespace armnn diff --git a/src/armnn/optimizations/FuseConvertFp32ToBf16IntoConstLayers.hpp b/src/armnn/optimizations/FuseConvertFp32ToBf16IntoConstLayers.hpp deleted file mode 100644 index d112010539..0000000000 --- a/src/armnn/optimizations/FuseConvertFp32ToBf16IntoConstLayers.hpp +++ /dev/null @@ -1,89 +0,0 @@ -// -// Copyright © 2022 Arm Ltd and Contributors. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#pragma once - -#include "Optimization.hpp" -#include <armnnUtils/Permute.hpp> -#include <ResolveType.hpp> - -namespace armnn -{ -namespace optimizations -{ - -class FuseConvertFp32ToBf16IntoConstLayers -{ -public: - void Run(Graph& graph, InputSlot& connection) const - { - Layer& base = connection.GetConnectedOutputSlot()->GetOwningLayer(); - Layer& child = connection.GetOwningLayer(); - - ARMNN_ASSERT(base.GetType() == LayerType::Constant); - ARMNN_ASSERT(child.GetType() == LayerType::ConvertFp32ToBf16); - - auto dataType = base.GetDataType(); - switch (dataType) - { - case DataType::Float32: - ReplaceConvertFp32ToBf16Layer<DataType::BFloat16>( - graph, - PolymorphicDowncast<ConstantLayer*>(&base), - PolymorphicDowncast<ConvertFp32ToBf16Layer*>(&child)); - break; - default: - throw InvalidArgumentException(GetDataTypeName(dataType) + - std::string(" Constant Layer cannot be fused into ") + - GetDataTypeName(child.GetDataType()) + - std::string(" conversion layer.")); - } - } -protected: - FuseConvertFp32ToBf16IntoConstLayers() = default; - ~FuseConvertFp32ToBf16IntoConstLayers() = default; -private: - template<armnn::DataType ArmnnType, - typename T = armnn::ResolveType<ArmnnType>> - static void ReplaceConvertFp32ToBf16Layer(Graph& graph, - ConstantLayer* constantLayer, - ConvertFp32ToBf16Layer* convertFp32ToBf16layer) - { - IgnoreUnused(graph); - /** - * This optimisation is to find situations where a constant set of inputs is being provided to a - * ConvertFp32ToBf16 layer. In this case we don't want the overhead of Converting the values on - * every inference, instead we want to Convert them once and store them in a Const layer to be - * used everytime as they will not change. - */ - TensorInfo outputConvertFp32ToBf16Info = convertFp32ToBf16layer->GetOutputSlot(0).GetTensorInfo(); - std::vector<T> newValues(outputConvertFp32ToBf16Info.GetNumElements()); - - armnnUtils::FloatingPointConverter::ConvertFloat32ToBFloat16( - constantLayer->m_LayerOutput->GetConstTensor<float>(), - outputConvertFp32ToBf16Info.GetNumElements(), - newValues.data()); - TensorInfo newInfo = outputConvertFp32ToBf16Info; - newInfo.SetConstant(true); - ConstTensor newInput(newInfo, newValues); - - constantLayer->m_LayerOutput.reset(new ScopedTensorHandle(newInput)); - - // Moves connections in convertFp32ToBf16layer output slot to the constant layer. - // ConvertFp32ToBf16layer layer will be removed if left unconnected. - convertFp32ToBf16layer->GetOutputSlot().MoveAllConnections(constantLayer->GetOutputSlot()); - - // Updating the output tensor - constantLayer->GetOutputSlot(0).SetTensorInfo(newInfo); - ARMNN_ASSERT(constantLayer->GetOutputSlot(0).GetTensorInfo().IsConstant() == true); - } -}; - -using FuseConversionLayersIntoConstLayers = OptimizeForConnection<ConstantLayer, - ConvertFp32ToBf16Layer, - FuseConvertFp32ToBf16IntoConstLayers>; - -} // namespace optimizations -} // namespace armnn
\ No newline at end of file |