diff options
author | Ryan OShea <ryan.oshea3@arm.com> | 2022-11-07 16:20:48 +0000 |
---|---|---|
committer | ryan.oshea3 <ryan.oshea3@arm.com> | 2022-11-16 15:22:50 +0000 |
commit | 31441595009182c985dacbedc70c41ee6664d070 (patch) | |
tree | 248a85295aeff4022c9b395fc97748b0a0aa6b35 /src/armnn/NetworkUtils.cpp | |
parent | bd18eab07a8f30492de1e462b1815189014cb8d5 (diff) | |
download | armnn-31441595009182c985dacbedc70c41ee6664d070.tar.gz |
IVGCVSW-7214 Disable BF16-Turbo-Mode and remove conversion layers
- Remove Bf16ToFp32 Conversion Layer
- Remove Fp32ToBf16 Conversion Layer
- Remove B16 Conversion tests
* Throw exception if m_ReduceFp32ToBf16 optimzer option is set to true
* Provide comments to enable fast math in order to use bf16
* Update docs to inform users to enable fast math for bf16
Execute Network Changes
* Require bf16_turbo_mode to also have fast_math_enabled set to true
- Remove setting m_ReduceFp32ToBf16 optimizer option
Signed-off-by: Ryan OShea <ryan.oshea3@arm.com>
Change-Id: Ibaa6da9d29c96a1ce32ff5196b0847fde9f04a1c
Diffstat (limited to 'src/armnn/NetworkUtils.cpp')
-rw-r--r-- | src/armnn/NetworkUtils.cpp | 179 |
1 files changed, 0 insertions, 179 deletions
diff --git a/src/armnn/NetworkUtils.cpp b/src/armnn/NetworkUtils.cpp index aaee4eba1a..1d46f029dc 100644 --- a/src/armnn/NetworkUtils.cpp +++ b/src/armnn/NetworkUtils.cpp @@ -5,8 +5,6 @@ #include "NetworkUtils.hpp" -#include <armnnUtils/FloatingPointConverter.hpp> -#include <BFloat16.hpp> #include "SubgraphViewSelector.hpp" #include <armnn/Exceptions.hpp> @@ -26,17 +24,6 @@ void UpdateOutputSlotToFp32(OutputSlot& outputSlot) outputSlot.SetTensorInfo(newTensorInfo); } -void ChangeOutputBf16ToFp32(Layer& layer) -{ - for (auto&& outputSlot = layer.BeginOutputSlots(); outputSlot != layer.EndOutputSlots(); ++outputSlot) - { - if (outputSlot->GetTensorInfo().GetDataType() == DataType::BFloat16) - { - UpdateOutputSlotToFp32(*outputSlot); - } - } -} - void ChangeOutputFp16ToFp32(Layer& layer) { for (auto&& outputSlot = layer.BeginOutputSlots(); outputSlot != layer.EndOutputSlots(); ++outputSlot) @@ -50,93 +37,6 @@ void ChangeOutputFp16ToFp32(Layer& layer) } // anonymous namespace -std::vector<ConvertBf16ToFp32Layer*> InsertConvertBf16ToFp32LayersBefore(Graph& graph, - Layer& layer, - bool expectCorrectInputType) -{ - std::vector<ConvertBf16ToFp32Layer*> convertLayers; - convertLayers.reserve(layer.GetNumInputSlots()); - - // Insert a ConvertBf16ToFp32Layer before each input slot - for (auto&& inputSlot = layer.BeginInputSlots(); inputSlot != layer.EndInputSlots(); ++inputSlot) - { - bool allowInsert = true; - if (expectCorrectInputType) - { - // Only insert ConvertBf16ToFp32Layer before BF16 input slots - OutputSlot* connectedOutputSlot = inputSlot->GetConnectedOutputSlot(); - allowInsert = - connectedOutputSlot && connectedOutputSlot->GetTensorInfo().GetDataType() == DataType::BFloat16; - } - - if (allowInsert) - { - const std::string name = - std::string("convert_bf16_to_fp32-" + std::to_string(inputSlot->GetSlotIndex()) + "-") + - layer.GetName(); - ConvertBf16ToFp32Layer* convertLayer = - graph.InsertNewLayer<ConvertBf16ToFp32Layer>(*inputSlot, name.c_str()); - - TensorInfo convertInfo = convertLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(); - convertInfo.SetDataType(DataType::Float32); - - convertLayer->GetOutputSlot().SetTensorInfo(convertInfo); - - convertLayers.emplace_back(convertLayer); - } - } - - return convertLayers; -} - -std::vector<ConvertFp32ToBf16Layer*> InsertConvertFp32ToBf16LayersBefore(Graph& graph, - Layer& layer, - bool expectCorrectInputType) -{ - std::vector<ConvertFp32ToBf16Layer*> convertLayers; - convertLayers.reserve(layer.GetNumInputSlots()); - - // Insert a ConvertFp32ToBf16Layer before each input slot - for (auto&& inputSlot = layer.BeginInputSlots(); inputSlot != layer.EndInputSlots(); ++inputSlot) - { - bool allowInsert = true; - - if ((layer.GetType() == LayerType::Convolution2d || - layer.GetType() == LayerType::FullyConnected || - layer.GetType() == LayerType::DepthwiseConvolution2d) - && inputSlot->GetSlotIndex() == 2) - { - // Refrain from reducing bias to Bf16 - continue; - } - if (expectCorrectInputType) - { - // Only insert ConvertFp32ToBf16Layer before FP32 input slots - OutputSlot* connectedOutputSlot = inputSlot->GetConnectedOutputSlot(); - allowInsert = - connectedOutputSlot && connectedOutputSlot->GetTensorInfo().GetDataType() == DataType::Float32; - } - - if (allowInsert) - { - const std::string name = - std::string("convert_fp32_to_bf16-" + std::to_string(inputSlot->GetSlotIndex()) + "-") + - layer.GetName(); - ConvertFp32ToBf16Layer* convertLayer = - graph.InsertNewLayer<ConvertFp32ToBf16Layer>(*inputSlot, name.c_str()); - - TensorInfo convertInfo = convertLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(); - convertInfo.SetDataType(DataType::BFloat16); - - convertLayer->GetOutputSlot().SetTensorInfo(convertInfo); - - convertLayers.emplace_back(convertLayer); - } - } - - return convertLayers; -} - std::vector<ConvertFp16ToFp32Layer*> InsertConvertFp16ToFp32LayersBefore(Graph& graph, Layer& layer, bool expectCorrectInputType) @@ -176,39 +76,6 @@ std::vector<ConvertFp16ToFp32Layer*> InsertConvertFp16ToFp32LayersBefore(Graph& return convertLayers; } -std::vector<ConvertFp32ToBf16Layer*> InsertConvertFp32ToBf16LayersAfter(Graph& graph, Layer& layer) -{ - const unsigned int numOutputSlots = layer.GetNumOutputSlots(); - - std::vector<ConvertFp32ToBf16Layer*> convertLayers; - convertLayers.reserve(numOutputSlots); - - // Update Bf16 output slots to FP32 on current layer - ChangeOutputBf16ToFp32(layer); - - // Insert a ConvertFp32ToBf16Layer after each FP32 output slot - for (unsigned int slotIndex = 0u; slotIndex < numOutputSlots; ++slotIndex) - { - OutputSlot& outputSlot = layer.GetOutputSlot(slotIndex); - if(outputSlot.GetTensorInfo().GetDataType() == DataType::Float32) - { - const std::string name = - std::string("convert_fp32_to_bf16-" + std::to_string(slotIndex) + "-") + layer.GetName(); - ConvertFp32ToBf16Layer* convertLayer = - graph.InsertNewLayer<ConvertFp32ToBf16Layer>(outputSlot, name.c_str()); - - TensorInfo convertInfo = convertLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(); - convertInfo.SetDataType(DataType::BFloat16); - - convertLayer->GetOutputSlot().SetTensorInfo(convertInfo); - - convertLayers.emplace_back(convertLayer); - } - } - - return convertLayers; -} - std::vector<ConvertFp32ToFp16Layer*> InsertConvertFp32ToFp16LayersAfter(Graph& graph, Layer& layer) { const unsigned int numOutputSlots = layer.GetNumOutputSlots(); @@ -274,50 +141,4 @@ std::vector<DebugLayer*> InsertDebugLayerAfter(Graph& graph, Layer& layer, bool return debugLayers; } -bool RevertConstantWeightsToFP32(Layer* layer) -{ - if (layer->GetType() == LayerType::Convolution2d || layer->GetType() == LayerType::FullyConnected) - { - // Revert Weights on Constant Layer to FP32 so they can be accessed by Conv2d or FullyConnected - // This prevents a conversion layer being added in during backend assignment which blocks - // the RedirectMembersToConstantInputs backward compatibility workaround/optimization. - auto constantLayerInfo = layer->GetInputSlot(1).GetConnection()->GetTensorInfo(); - - if (constantLayerInfo.IsConstant() && constantLayerInfo.GetDataType() == DataType::BFloat16) - { - std::vector<float> newValues(constantLayerInfo.GetNumElements()); - - auto weightLayer = PolymorphicDowncast<ConstantLayer*>( - &layer->GetInputSlot(1).GetConnection()->GetOwningIConnectableLayer()); - armnnUtils::FloatingPointConverter::ConvertBFloat16ToFloat32( - weightLayer->m_LayerOutput->GetConstTensor<BFloat16>(), - constantLayerInfo.GetNumElements(), - newValues.data()); - - TensorInfo newInfo(constantLayerInfo.GetShape(), DataType::Float32); - newInfo.SetConstant(true); - ConstTensor newInput(newInfo, newValues); - weightLayer->m_LayerOutput.reset(new ScopedTensorHandle(newInput)); - weightLayer->GetOutputSlot(0).SetTensorInfo(newInfo); - - // Connect Conv2d/FullyConnected to InputLayer directly leaving out - // the ConversionLayer to be cleaned up later - auto& conversionLayer = layer->GetInputSlot(0).GetConnection()->GetOwningIConnectableLayer(); - auto actualInputOutputSlot = conversionLayer.GetInputSlot(0).GetConnection(); - - auto& conversionLayerOutputSlot = - layer->GetInputSlot(0).GetConnection()->GetOwningIConnectableLayer().GetOutputSlot(0); - auto& conversionLayerInputSlot = - layer->GetInputSlot(0).GetConnection()->GetOwningIConnectableLayer().GetInputSlot(0); - actualInputOutputSlot->Disconnect(conversionLayerInputSlot); - conversionLayerOutputSlot.Disconnect(layer->GetInputSlot(0)); - - actualInputOutputSlot->Connect(layer->GetInputSlot(0)); - - return true; - } - } - return false; -} - } // namespace armnn |