From 87972be8d838f6fde6f6e98dd81c422e85457a5e Mon Sep 17 00:00:00 2001 From: Aron Virginas-Tar Date: Wed, 13 Nov 2019 15:16:28 +0000 Subject: IVGCVSW-4119 Fix FP16 to FP32 fallback mechanism in optimizer to work with Dequantize * Check for output data type as well as input data type when determining whether we should attempt to fall back to FP32 if FP16 is not supported * Override output type for Dequantize in IsLayerSupported() instead of input type * Updated original input type from FP16 to FP32 in InsertConvertFp32ToFp16LayersAfter() Signed-off-by: Aron Virginas-Tar Change-Id: Ic6477fd17cea5a91bd8bf9ae0cf836520897d5b7 --- src/armnn/Network.cpp | 32 ++++-- src/armnn/NetworkUtils.cpp | 114 +++++++++++++-------- src/armnn/NetworkUtils.hpp | 4 +- .../optimizations/ConvertFp32NetworkToFp16.hpp | 5 +- src/backends/backendsCommon/WorkloadFactory.cpp | 4 +- 5 files changed, 102 insertions(+), 57 deletions(-) diff --git a/src/armnn/Network.cpp b/src/armnn/Network.cpp index 573f6a19e8..1797baf78e 100644 --- a/src/armnn/Network.cpp +++ b/src/armnn/Network.cpp @@ -71,8 +71,6 @@ Status OptimizedNetwork::SerializeToDot(std::ostream& stream) const return m_Graph->SerializeToDot(stream); } - - void ReportError(const std::string& errorMessage, Optional&> errorMessages) { @@ -166,7 +164,12 @@ OptimizationResult AssignBackends(OptimizedNetwork* optNetObjPtr, for (auto it = firstLayer; it != lastLayer; ++it) { auto layer = *it; - DataType dataType = layer->GetDataType(); + + DataType dataTypeIn = layer->GetNumInputSlots() == 0 ? DataType::Float32 : + layer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo().GetDataType(); + DataType dataTypeOut = layer->GetNumOutputSlots() == 0 ? DataType::Float32 : + layer->GetOutputSlot(0).GetTensorInfo().GetDataType(); + std::string reasonIfUnsupported; bool found = false; if (!CheckScaleSetOnQuantizedType(layer, errMessages)) @@ -181,21 +184,29 @@ OptimizationResult AssignBackends(OptimizedNetwork* optNetObjPtr, // need to set the compute device on the layer // before we can check if it is supported layer->SetBackendId(backend); - if (!IWorkloadFactory::IsLayerSupported(*layer, dataType, reasonIfUnsupported)) + if (!IWorkloadFactory::IsLayerSupported(*layer, EmptyOptional(), reasonIfUnsupported)) { - if (dataType == DataType::Float16) + if (dataTypeIn == DataType::Float16 || dataTypeOut == DataType::Float16) { if (IWorkloadFactory::IsLayerSupported(*layer, DataType::Float32, reasonIfUnsupported) && layer->GetType() != LayerType::ConvertFp32ToFp16 && layer->GetType() != LayerType::ConvertFp16ToFp32) { // Insert FP16 -> FP32 conversion layer before current layer - std::vector convertFp16ToFp32Layers = - InsertConvertFp16ToFp32LayersBefore(optNetObjPtr->GetGraph(), *layer); + std::vector convertFp16ToFp32Layers; + if (dataTypeIn == DataType::Float16) + { + convertFp16ToFp32Layers = + InsertConvertFp16ToFp32LayersBefore(optNetObjPtr->GetGraph(), *layer); + } // Insert FP32 -> FP16 conversion layer after current layer - std::vector convertFp32ToFp16Layers = - InsertConvertFp32ToFp16LayersAfter(optNetObjPtr->GetGraph(), *layer); + std::vector convertFp32ToFp16Layers; + if (dataTypeOut == DataType::Float16) + { + convertFp32ToFp16Layers = + InsertConvertFp32ToFp16LayersAfter(optNetObjPtr->GetGraph(), *layer); + } // Assign a supported backend to the newly introduced conversion layers auto AssignFirstSupportedBackend = [&](Layer* layer, BackendId preferredBackend) @@ -258,7 +269,8 @@ OptimizationResult AssignBackends(OptimizedNetwork* optNetObjPtr, std::stringstream warningMsg; warningMsg << "Layer of type " << GetLayerTypeAsCString(layer->GetType()) << " is not supported on requested backend " << layer->GetBackendId().Get() - << " for data type " << GetDataTypeName(dataType) + << " for input data type " << GetDataTypeName(dataTypeIn) + << " and output data type " << GetDataTypeName(dataTypeOut) << " (reason: " << reasonIfUnsupported << "), falling back to the next backend."; ReportWarning(warningMsg.str(), errMessages); diff --git a/src/armnn/NetworkUtils.cpp b/src/armnn/NetworkUtils.cpp index cfed6680ca..1bbeaac005 100644 --- a/src/armnn/NetworkUtils.cpp +++ b/src/armnn/NetworkUtils.cpp @@ -13,67 +13,97 @@ namespace armnn { -std::vector InsertConvertFp16ToFp32LayersBefore(Graph& graph, Layer& layer) +namespace { - std::vector convertLayers; - convertLayers.reserve(layer.GetNumInputSlots()); - - for (auto&& inputSlot = layer.BeginInputSlots(); inputSlot != layer.EndInputSlots(); ++inputSlot) - { - // Insert FP16 to FP32 converter layer before the layer - const std::string name = - std::string("convert_fp16_to_fp32-" + std::to_string(inputSlot->GetSlotIndex()) + "-") + layer.GetName(); - ConvertFp16ToFp32Layer* convertLayer = - graph.InsertNewLayer(*inputSlot, name.c_str()); - - // Sets output tensor info for the convert layer - TensorInfo convertInfo = convertLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(); - convertInfo.SetDataType(DataType::Float32); - convertLayer->GetOutputSlot().SetTensorInfo(convertInfo); +void UpdateOutputSlotFp16ToFp32(OutputSlot& outputSlot) +{ + const TensorInfo& origTensorInfo = outputSlot.GetTensorInfo(); + TensorInfo newTensorInfo(origTensorInfo); + newTensorInfo.SetDataType(DataType::Float32); + outputSlot.SetTensorInfo(newTensorInfo); +} - convertLayers.emplace_back(convertLayer); +void ChangeOutputFp16ToFp32(Layer& layer) +{ + for (auto&& outputSlot = layer.BeginOutputSlots(); outputSlot != layer.EndOutputSlots(); ++outputSlot) + { + if (outputSlot->GetTensorInfo().GetDataType() == DataType::Float16) + { + UpdateOutputSlotFp16ToFp32(*outputSlot); + } } +} - // Sets the output tensor info for the unsupported layer - auto UpdateTensorInfo = [](auto& outputSlot) - { - // Copy original tensor info and change data type to FP32 - TensorInfo newTensorInfo = outputSlot.GetTensorInfo(); - newTensorInfo.SetDataType(DataType::Float32); +} // anonymous namespace - outputSlot.SetTensorInfo(newTensorInfo); - }; +std::vector InsertConvertFp16ToFp32LayersBefore(Graph& graph, + Layer& layer, + bool expectCorrectInputType) +{ + std::vector convertLayers; + convertLayers.reserve(layer.GetNumInputSlots()); - std::for_each(layer.BeginOutputSlots(), layer.EndOutputSlots(), UpdateTensorInfo); + // Insert a ConvertFp16ToFp32Layer before each input slot + for (auto&& inputSlot = layer.BeginInputSlots(); inputSlot != layer.EndInputSlots(); ++inputSlot) + { + bool allowInsert = true; + if (expectCorrectInputType) + { + // Only insert ConvertFp16ToFp32Layer before FP16 input slots + OutputSlot* connectedOutputSlot = inputSlot->GetConnectedOutputSlot(); + allowInsert = + connectedOutputSlot && connectedOutputSlot->GetTensorInfo().GetDataType() == DataType::Float16; + } + + if (allowInsert) + { + const std::string name = + std::string("convert_fp16_to_fp32-" + std::to_string(inputSlot->GetSlotIndex()) + "-") + + layer.GetName(); + ConvertFp16ToFp32Layer* convertLayer = + graph.InsertNewLayer(*inputSlot, name.c_str()); + + TensorInfo convertInfo = convertLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(); + convertInfo.SetDataType(DataType::Float32); + + convertLayer->GetOutputSlot().SetTensorInfo(convertInfo); + + convertLayers.emplace_back(convertLayer); + } + } return convertLayers; } std::vector InsertConvertFp32ToFp16LayersAfter(Graph& graph, Layer& layer) { + const unsigned int numOutputSlots = layer.GetNumOutputSlots(); + std::vector convertLayers; - convertLayers.reserve(layer.GetNumOutputSlots()); + convertLayers.reserve(numOutputSlots); - int index = 0; - // Change outputs to DataType::Float16 - for (auto&& outputSlot = layer.BeginOutputSlots(); outputSlot != layer.EndOutputSlots(); ++outputSlot) - { - BOOST_ASSERT(outputSlot->GetTensorInfo().GetDataType() == DataType::Float32); + // Update FP16 output slots to FP32 on current layer + ChangeOutputFp16ToFp32(layer); - // Insert FP32 to FP16 converter layer after the layer - const std::string name = - std::string("convert_fp32_to_fp16-" + std::to_string(index++) + "-") + layer.GetName(); - ConvertFp32ToFp16Layer* convertLayer = - graph.InsertNewLayer(*outputSlot, name.c_str()); + // Insert a ConvertFp32ToFp16Layer after each FP32 output slot + for (unsigned int slotIndex = 0u; slotIndex < numOutputSlots; ++slotIndex) + { + OutputSlot& outputSlot = layer.GetOutputSlot(slotIndex); + if(outputSlot.GetTensorInfo().GetDataType() == DataType::Float32) + { + const std::string name = + std::string("convert_fp32_to_fp16-" + std::to_string(slotIndex) + "-") + layer.GetName(); + ConvertFp32ToFp16Layer* convertLayer = + graph.InsertNewLayer(outputSlot, name.c_str()); - // Sets output tensor info for the convert layer. - TensorInfo convertInfo = convertLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(); - convertInfo.SetDataType(DataType::Float16); + TensorInfo convertInfo = convertLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(); + convertInfo.SetDataType(DataType::Float16); - convertLayer->GetOutputSlot().SetTensorInfo(convertInfo); + convertLayer->GetOutputSlot().SetTensorInfo(convertInfo); - convertLayers.emplace_back(convertLayer); + convertLayers.emplace_back(convertLayer); + } } return convertLayers; diff --git a/src/armnn/NetworkUtils.hpp b/src/armnn/NetworkUtils.hpp index 421c52a6a7..38fb22350d 100644 --- a/src/armnn/NetworkUtils.hpp +++ b/src/armnn/NetworkUtils.hpp @@ -11,7 +11,9 @@ namespace armnn { -std::vector InsertConvertFp16ToFp32LayersBefore(Graph& graph, Layer& layer); +std::vector InsertConvertFp16ToFp32LayersBefore(Graph& graph, + Layer& layer, + bool expectCorrectInputType = true); std::vector InsertConvertFp32ToFp16LayersAfter(Graph& graph, Layer& layer); diff --git a/src/armnn/optimizations/ConvertFp32NetworkToFp16.hpp b/src/armnn/optimizations/ConvertFp32NetworkToFp16.hpp index 729b76ad6b..9658a35560 100644 --- a/src/armnn/optimizations/ConvertFp32NetworkToFp16.hpp +++ b/src/armnn/optimizations/ConvertFp32NetworkToFp16.hpp @@ -15,7 +15,6 @@ namespace optimizations class ConvertFp32NetworkToFp16Impl { public: - void Run(Graph& graph, Layer& layer) const { if(layer.GetType() == LayerType::Input) @@ -33,7 +32,9 @@ public: // add a ConvertFloat16ToFloat32 layer before each of the inputs if (layer.GetDataType() == DataType::Float32) { - InsertConvertFp16ToFp32LayersBefore(graph, layer); + // NOTE: We need to call InsertConvertFp16ToFp32LayersBefore with expectCorrectInputType = false + // here, otherwise it will expect the inputs to be DataType::Float16 + InsertConvertFp16ToFp32LayersBefore(graph, layer, false); } } else if (layer.GetType() != LayerType::ConvertFp32ToFp16 && layer.GetType() != LayerType::ConvertFp16ToFp32) diff --git a/src/backends/backendsCommon/WorkloadFactory.cpp b/src/backends/backendsCommon/WorkloadFactory.cpp index 4a7f007c2e..9901dcb7c1 100644 --- a/src/backends/backendsCommon/WorkloadFactory.cpp +++ b/src/backends/backendsCommon/WorkloadFactory.cpp @@ -265,8 +265,8 @@ bool IWorkloadFactory::IsLayerSupported(const BackendId& backendId, const TensorInfo& input = layer.GetInputSlot(0).GetConnection()->GetTensorInfo(); const TensorInfo& output = layer.GetOutputSlot(0).GetTensorInfo(); - result = layerSupportObject->IsDequantizeSupported(OverrideDataType(input, dataType), - output, + result = layerSupportObject->IsDequantizeSupported(input, + OverrideDataType(output, dataType), reason); break; } -- cgit v1.2.1