aboutsummaryrefslogtreecommitdiff
path: root/src/armnn/NetworkUtils.cpp
diff options
context:
space:
mode:
authorRyan OShea <ryan.oshea3@arm.com>2022-11-07 16:20:48 +0000
committerryan.oshea3 <ryan.oshea3@arm.com>2022-11-16 15:22:50 +0000
commit31441595009182c985dacbedc70c41ee6664d070 (patch)
tree248a85295aeff4022c9b395fc97748b0a0aa6b35 /src/armnn/NetworkUtils.cpp
parentbd18eab07a8f30492de1e462b1815189014cb8d5 (diff)
downloadarmnn-31441595009182c985dacbedc70c41ee6664d070.tar.gz
IVGCVSW-7214 Disable BF16-Turbo-Mode and remove conversion layers
- Remove Bf16ToFp32 Conversion Layer - Remove Fp32ToBf16 Conversion Layer - Remove B16 Conversion tests * Throw exception if m_ReduceFp32ToBf16 optimzer option is set to true * Provide comments to enable fast math in order to use bf16 * Update docs to inform users to enable fast math for bf16 Execute Network Changes * Require bf16_turbo_mode to also have fast_math_enabled set to true - Remove setting m_ReduceFp32ToBf16 optimizer option Signed-off-by: Ryan OShea <ryan.oshea3@arm.com> Change-Id: Ibaa6da9d29c96a1ce32ff5196b0847fde9f04a1c
Diffstat (limited to 'src/armnn/NetworkUtils.cpp')
-rw-r--r--src/armnn/NetworkUtils.cpp179
1 files changed, 0 insertions, 179 deletions
diff --git a/src/armnn/NetworkUtils.cpp b/src/armnn/NetworkUtils.cpp
index aaee4eba1a..1d46f029dc 100644
--- a/src/armnn/NetworkUtils.cpp
+++ b/src/armnn/NetworkUtils.cpp
@@ -5,8 +5,6 @@
#include "NetworkUtils.hpp"
-#include <armnnUtils/FloatingPointConverter.hpp>
-#include <BFloat16.hpp>
#include "SubgraphViewSelector.hpp"
#include <armnn/Exceptions.hpp>
@@ -26,17 +24,6 @@ void UpdateOutputSlotToFp32(OutputSlot& outputSlot)
outputSlot.SetTensorInfo(newTensorInfo);
}
-void ChangeOutputBf16ToFp32(Layer& layer)
-{
- for (auto&& outputSlot = layer.BeginOutputSlots(); outputSlot != layer.EndOutputSlots(); ++outputSlot)
- {
- if (outputSlot->GetTensorInfo().GetDataType() == DataType::BFloat16)
- {
- UpdateOutputSlotToFp32(*outputSlot);
- }
- }
-}
-
void ChangeOutputFp16ToFp32(Layer& layer)
{
for (auto&& outputSlot = layer.BeginOutputSlots(); outputSlot != layer.EndOutputSlots(); ++outputSlot)
@@ -50,93 +37,6 @@ void ChangeOutputFp16ToFp32(Layer& layer)
} // anonymous namespace
-std::vector<ConvertBf16ToFp32Layer*> InsertConvertBf16ToFp32LayersBefore(Graph& graph,
- Layer& layer,
- bool expectCorrectInputType)
-{
- std::vector<ConvertBf16ToFp32Layer*> convertLayers;
- convertLayers.reserve(layer.GetNumInputSlots());
-
- // Insert a ConvertBf16ToFp32Layer before each input slot
- for (auto&& inputSlot = layer.BeginInputSlots(); inputSlot != layer.EndInputSlots(); ++inputSlot)
- {
- bool allowInsert = true;
- if (expectCorrectInputType)
- {
- // Only insert ConvertBf16ToFp32Layer before BF16 input slots
- OutputSlot* connectedOutputSlot = inputSlot->GetConnectedOutputSlot();
- allowInsert =
- connectedOutputSlot && connectedOutputSlot->GetTensorInfo().GetDataType() == DataType::BFloat16;
- }
-
- if (allowInsert)
- {
- const std::string name =
- std::string("convert_bf16_to_fp32-" + std::to_string(inputSlot->GetSlotIndex()) + "-") +
- layer.GetName();
- ConvertBf16ToFp32Layer* convertLayer =
- graph.InsertNewLayer<ConvertBf16ToFp32Layer>(*inputSlot, name.c_str());
-
- TensorInfo convertInfo = convertLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo();
- convertInfo.SetDataType(DataType::Float32);
-
- convertLayer->GetOutputSlot().SetTensorInfo(convertInfo);
-
- convertLayers.emplace_back(convertLayer);
- }
- }
-
- return convertLayers;
-}
-
-std::vector<ConvertFp32ToBf16Layer*> InsertConvertFp32ToBf16LayersBefore(Graph& graph,
- Layer& layer,
- bool expectCorrectInputType)
-{
- std::vector<ConvertFp32ToBf16Layer*> convertLayers;
- convertLayers.reserve(layer.GetNumInputSlots());
-
- // Insert a ConvertFp32ToBf16Layer before each input slot
- for (auto&& inputSlot = layer.BeginInputSlots(); inputSlot != layer.EndInputSlots(); ++inputSlot)
- {
- bool allowInsert = true;
-
- if ((layer.GetType() == LayerType::Convolution2d ||
- layer.GetType() == LayerType::FullyConnected ||
- layer.GetType() == LayerType::DepthwiseConvolution2d)
- && inputSlot->GetSlotIndex() == 2)
- {
- // Refrain from reducing bias to Bf16
- continue;
- }
- if (expectCorrectInputType)
- {
- // Only insert ConvertFp32ToBf16Layer before FP32 input slots
- OutputSlot* connectedOutputSlot = inputSlot->GetConnectedOutputSlot();
- allowInsert =
- connectedOutputSlot && connectedOutputSlot->GetTensorInfo().GetDataType() == DataType::Float32;
- }
-
- if (allowInsert)
- {
- const std::string name =
- std::string("convert_fp32_to_bf16-" + std::to_string(inputSlot->GetSlotIndex()) + "-") +
- layer.GetName();
- ConvertFp32ToBf16Layer* convertLayer =
- graph.InsertNewLayer<ConvertFp32ToBf16Layer>(*inputSlot, name.c_str());
-
- TensorInfo convertInfo = convertLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo();
- convertInfo.SetDataType(DataType::BFloat16);
-
- convertLayer->GetOutputSlot().SetTensorInfo(convertInfo);
-
- convertLayers.emplace_back(convertLayer);
- }
- }
-
- return convertLayers;
-}
-
std::vector<ConvertFp16ToFp32Layer*> InsertConvertFp16ToFp32LayersBefore(Graph& graph,
Layer& layer,
bool expectCorrectInputType)
@@ -176,39 +76,6 @@ std::vector<ConvertFp16ToFp32Layer*> InsertConvertFp16ToFp32LayersBefore(Graph&
return convertLayers;
}
-std::vector<ConvertFp32ToBf16Layer*> InsertConvertFp32ToBf16LayersAfter(Graph& graph, Layer& layer)
-{
- const unsigned int numOutputSlots = layer.GetNumOutputSlots();
-
- std::vector<ConvertFp32ToBf16Layer*> convertLayers;
- convertLayers.reserve(numOutputSlots);
-
- // Update Bf16 output slots to FP32 on current layer
- ChangeOutputBf16ToFp32(layer);
-
- // Insert a ConvertFp32ToBf16Layer after each FP32 output slot
- for (unsigned int slotIndex = 0u; slotIndex < numOutputSlots; ++slotIndex)
- {
- OutputSlot& outputSlot = layer.GetOutputSlot(slotIndex);
- if(outputSlot.GetTensorInfo().GetDataType() == DataType::Float32)
- {
- const std::string name =
- std::string("convert_fp32_to_bf16-" + std::to_string(slotIndex) + "-") + layer.GetName();
- ConvertFp32ToBf16Layer* convertLayer =
- graph.InsertNewLayer<ConvertFp32ToBf16Layer>(outputSlot, name.c_str());
-
- TensorInfo convertInfo = convertLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo();
- convertInfo.SetDataType(DataType::BFloat16);
-
- convertLayer->GetOutputSlot().SetTensorInfo(convertInfo);
-
- convertLayers.emplace_back(convertLayer);
- }
- }
-
- return convertLayers;
-}
-
std::vector<ConvertFp32ToFp16Layer*> InsertConvertFp32ToFp16LayersAfter(Graph& graph, Layer& layer)
{
const unsigned int numOutputSlots = layer.GetNumOutputSlots();
@@ -274,50 +141,4 @@ std::vector<DebugLayer*> InsertDebugLayerAfter(Graph& graph, Layer& layer, bool
return debugLayers;
}
-bool RevertConstantWeightsToFP32(Layer* layer)
-{
- if (layer->GetType() == LayerType::Convolution2d || layer->GetType() == LayerType::FullyConnected)
- {
- // Revert Weights on Constant Layer to FP32 so they can be accessed by Conv2d or FullyConnected
- // This prevents a conversion layer being added in during backend assignment which blocks
- // the RedirectMembersToConstantInputs backward compatibility workaround/optimization.
- auto constantLayerInfo = layer->GetInputSlot(1).GetConnection()->GetTensorInfo();
-
- if (constantLayerInfo.IsConstant() && constantLayerInfo.GetDataType() == DataType::BFloat16)
- {
- std::vector<float> newValues(constantLayerInfo.GetNumElements());
-
- auto weightLayer = PolymorphicDowncast<ConstantLayer*>(
- &layer->GetInputSlot(1).GetConnection()->GetOwningIConnectableLayer());
- armnnUtils::FloatingPointConverter::ConvertBFloat16ToFloat32(
- weightLayer->m_LayerOutput->GetConstTensor<BFloat16>(),
- constantLayerInfo.GetNumElements(),
- newValues.data());
-
- TensorInfo newInfo(constantLayerInfo.GetShape(), DataType::Float32);
- newInfo.SetConstant(true);
- ConstTensor newInput(newInfo, newValues);
- weightLayer->m_LayerOutput.reset(new ScopedTensorHandle(newInput));
- weightLayer->GetOutputSlot(0).SetTensorInfo(newInfo);
-
- // Connect Conv2d/FullyConnected to InputLayer directly leaving out
- // the ConversionLayer to be cleaned up later
- auto& conversionLayer = layer->GetInputSlot(0).GetConnection()->GetOwningIConnectableLayer();
- auto actualInputOutputSlot = conversionLayer.GetInputSlot(0).GetConnection();
-
- auto& conversionLayerOutputSlot =
- layer->GetInputSlot(0).GetConnection()->GetOwningIConnectableLayer().GetOutputSlot(0);
- auto& conversionLayerInputSlot =
- layer->GetInputSlot(0).GetConnection()->GetOwningIConnectableLayer().GetInputSlot(0);
- actualInputOutputSlot->Disconnect(conversionLayerInputSlot);
- conversionLayerOutputSlot.Disconnect(layer->GetInputSlot(0));
-
- actualInputOutputSlot->Connect(layer->GetInputSlot(0));
-
- return true;
- }
- }
- return false;
-}
-
} // namespace armnn