From bc7ffb5e9e5f4c86280b20c65772eb12d8bb140e Mon Sep 17 00:00:00 2001 From: Narumol Prangnawarat Date: Fri, 20 Mar 2020 15:01:01 +0000 Subject: IVGCVSW-4520 Implement BFloat16 Optimizer * Add ReduceFp32ToBf16 to OptimizerOptions * Add ConvertFp32NetworkToBf16 * Add utility functions to insert conversion layers * Add constant conversion BF16 <-> FP32 * Unit tests Signed-off-by: Narumol Prangnawarat Change-Id: Iaff77e20c721400b052cb37eb9ef6fe16d7abaff --- src/armnn/NetworkUtils.cpp | 87 ++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 85 insertions(+), 2 deletions(-) (limited to 'src/armnn/NetworkUtils.cpp') diff --git a/src/armnn/NetworkUtils.cpp b/src/armnn/NetworkUtils.cpp index 1bbeaac005..8653a08510 100644 --- a/src/armnn/NetworkUtils.cpp +++ b/src/armnn/NetworkUtils.cpp @@ -16,7 +16,7 @@ namespace armnn namespace { -void UpdateOutputSlotFp16ToFp32(OutputSlot& outputSlot) +void UpdateOutputSlotToFp32(OutputSlot& outputSlot) { const TensorInfo& origTensorInfo = outputSlot.GetTensorInfo(); TensorInfo newTensorInfo(origTensorInfo); @@ -24,19 +24,69 @@ void UpdateOutputSlotFp16ToFp32(OutputSlot& outputSlot) outputSlot.SetTensorInfo(newTensorInfo); } +void ChangeOutputBf16ToFp32(Layer& layer) +{ + for (auto&& outputSlot = layer.BeginOutputSlots(); outputSlot != layer.EndOutputSlots(); ++outputSlot) + { + if (outputSlot->GetTensorInfo().GetDataType() == DataType::BFloat16) + { + UpdateOutputSlotToFp32(*outputSlot); + } + } +} + void ChangeOutputFp16ToFp32(Layer& layer) { for (auto&& outputSlot = layer.BeginOutputSlots(); outputSlot != layer.EndOutputSlots(); ++outputSlot) { if (outputSlot->GetTensorInfo().GetDataType() == DataType::Float16) { - UpdateOutputSlotFp16ToFp32(*outputSlot); + UpdateOutputSlotToFp32(*outputSlot); } } } } // anonymous namespace +std::vector InsertConvertBf16ToFp32LayersBefore(Graph& graph, + Layer& layer, + bool expectCorrectInputType) +{ + std::vector convertLayers; + convertLayers.reserve(layer.GetNumInputSlots()); + + // Insert a ConvertBf16ToFp32Layer before each input slot + for (auto&& inputSlot = layer.BeginInputSlots(); inputSlot != layer.EndInputSlots(); ++inputSlot) + { + bool allowInsert = true; + if (expectCorrectInputType) + { + // Only insert ConvertBf16ToFp32Layer before BF16 input slots + OutputSlot* connectedOutputSlot = inputSlot->GetConnectedOutputSlot(); + allowInsert = + connectedOutputSlot && connectedOutputSlot->GetTensorInfo().GetDataType() == DataType::BFloat16; + } + + if (allowInsert) + { + const std::string name = + std::string("convert_bf16_to_fp32-" + std::to_string(inputSlot->GetSlotIndex()) + "-") + + layer.GetName(); + ConvertBf16ToFp32Layer* convertLayer = + graph.InsertNewLayer(*inputSlot, name.c_str()); + + TensorInfo convertInfo = convertLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(); + convertInfo.SetDataType(DataType::Float32); + + convertLayer->GetOutputSlot().SetTensorInfo(convertInfo); + + convertLayers.emplace_back(convertLayer); + } + } + + return convertLayers; +} + std::vector InsertConvertFp16ToFp32LayersBefore(Graph& graph, Layer& layer, bool expectCorrectInputType) @@ -76,6 +126,39 @@ std::vector InsertConvertFp16ToFp32LayersBefore(Graph& return convertLayers; } +std::vector InsertConvertFp32ToBf16LayersAfter(Graph& graph, Layer& layer) +{ + const unsigned int numOutputSlots = layer.GetNumOutputSlots(); + + std::vector convertLayers; + convertLayers.reserve(numOutputSlots); + + // Update Bf16 output slots to FP32 on current layer + ChangeOutputBf16ToFp32(layer); + + // Insert a ConvertFp32ToBf16Layer after each FP32 output slot + for (unsigned int slotIndex = 0u; slotIndex < numOutputSlots; ++slotIndex) + { + OutputSlot& outputSlot = layer.GetOutputSlot(slotIndex); + if(outputSlot.GetTensorInfo().GetDataType() == DataType::Float32) + { + const std::string name = + std::string("convert_fp32_to_bf16-" + std::to_string(slotIndex) + "-") + layer.GetName(); + ConvertFp32ToBf16Layer* convertLayer = + graph.InsertNewLayer(outputSlot, name.c_str()); + + TensorInfo convertInfo = convertLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(); + convertInfo.SetDataType(DataType::BFloat16); + + convertLayer->GetOutputSlot().SetTensorInfo(convertInfo); + + convertLayers.emplace_back(convertLayer); + } + } + + return convertLayers; +} + std::vector InsertConvertFp32ToFp16LayersAfter(Graph& graph, Layer& layer) { const unsigned int numOutputSlots = layer.GetNumOutputSlots(); -- cgit v1.2.1