diff options
Diffstat (limited to 'src/armnn/NetworkUtils.cpp')
-rw-r--r-- | src/armnn/NetworkUtils.cpp | 87 |
1 files changed, 85 insertions, 2 deletions
diff --git a/src/armnn/NetworkUtils.cpp b/src/armnn/NetworkUtils.cpp index 1bbeaac005..8653a08510 100644 --- a/src/armnn/NetworkUtils.cpp +++ b/src/armnn/NetworkUtils.cpp @@ -16,7 +16,7 @@ namespace armnn namespace { -void UpdateOutputSlotFp16ToFp32(OutputSlot& outputSlot) +void UpdateOutputSlotToFp32(OutputSlot& outputSlot) { const TensorInfo& origTensorInfo = outputSlot.GetTensorInfo(); TensorInfo newTensorInfo(origTensorInfo); @@ -24,19 +24,69 @@ void UpdateOutputSlotFp16ToFp32(OutputSlot& outputSlot) outputSlot.SetTensorInfo(newTensorInfo); } +void ChangeOutputBf16ToFp32(Layer& layer) +{ + for (auto&& outputSlot = layer.BeginOutputSlots(); outputSlot != layer.EndOutputSlots(); ++outputSlot) + { + if (outputSlot->GetTensorInfo().GetDataType() == DataType::BFloat16) + { + UpdateOutputSlotToFp32(*outputSlot); + } + } +} + void ChangeOutputFp16ToFp32(Layer& layer) { for (auto&& outputSlot = layer.BeginOutputSlots(); outputSlot != layer.EndOutputSlots(); ++outputSlot) { if (outputSlot->GetTensorInfo().GetDataType() == DataType::Float16) { - UpdateOutputSlotFp16ToFp32(*outputSlot); + UpdateOutputSlotToFp32(*outputSlot); } } } } // anonymous namespace +std::vector<ConvertBf16ToFp32Layer*> InsertConvertBf16ToFp32LayersBefore(Graph& graph, + Layer& layer, + bool expectCorrectInputType) +{ + std::vector<ConvertBf16ToFp32Layer*> convertLayers; + convertLayers.reserve(layer.GetNumInputSlots()); + + // Insert a ConvertBf16ToFp32Layer before each input slot + for (auto&& inputSlot = layer.BeginInputSlots(); inputSlot != layer.EndInputSlots(); ++inputSlot) + { + bool allowInsert = true; + if (expectCorrectInputType) + { + // Only insert ConvertBf16ToFp32Layer before BF16 input slots + OutputSlot* connectedOutputSlot = inputSlot->GetConnectedOutputSlot(); + allowInsert = + connectedOutputSlot && connectedOutputSlot->GetTensorInfo().GetDataType() == DataType::BFloat16; + } + + if (allowInsert) + { + const std::string name = + std::string("convert_bf16_to_fp32-" + std::to_string(inputSlot->GetSlotIndex()) + "-") + + layer.GetName(); + ConvertBf16ToFp32Layer* convertLayer = + graph.InsertNewLayer<ConvertBf16ToFp32Layer>(*inputSlot, name.c_str()); + + TensorInfo convertInfo = convertLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(); + convertInfo.SetDataType(DataType::Float32); + + convertLayer->GetOutputSlot().SetTensorInfo(convertInfo); + + convertLayers.emplace_back(convertLayer); + } + } + + return convertLayers; +} + std::vector<ConvertFp16ToFp32Layer*> InsertConvertFp16ToFp32LayersBefore(Graph& graph, Layer& layer, bool expectCorrectInputType) @@ -76,6 +126,39 @@ std::vector<ConvertFp16ToFp32Layer*> InsertConvertFp16ToFp32LayersBefore(Graph& return convertLayers; } +std::vector<ConvertFp32ToBf16Layer*> InsertConvertFp32ToBf16LayersAfter(Graph& graph, Layer& layer) +{ + const unsigned int numOutputSlots = layer.GetNumOutputSlots(); + + std::vector<ConvertFp32ToBf16Layer*> convertLayers; + convertLayers.reserve(numOutputSlots); + + // Update Bf16 output slots to FP32 on current layer + ChangeOutputBf16ToFp32(layer); + + // Insert a ConvertFp32ToBf16Layer after each FP32 output slot + for (unsigned int slotIndex = 0u; slotIndex < numOutputSlots; ++slotIndex) + { + OutputSlot& outputSlot = layer.GetOutputSlot(slotIndex); + if(outputSlot.GetTensorInfo().GetDataType() == DataType::Float32) + { + const std::string name = + std::string("convert_fp32_to_bf16-" + std::to_string(slotIndex) + "-") + layer.GetName(); + ConvertFp32ToBf16Layer* convertLayer = + graph.InsertNewLayer<ConvertFp32ToBf16Layer>(outputSlot, name.c_str()); + + TensorInfo convertInfo = convertLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(); + convertInfo.SetDataType(DataType::BFloat16); + + convertLayer->GetOutputSlot().SetTensorInfo(convertInfo); + + convertLayers.emplace_back(convertLayer); + } + } + + return convertLayers; +} + std::vector<ConvertFp32ToFp16Layer*> InsertConvertFp32ToFp16LayersAfter(Graph& graph, Layer& layer) { const unsigned int numOutputSlots = layer.GetNumOutputSlots(); |