diff options
author | Aron Virginas-Tar <Aron.Virginas-Tar@arm.com> | 2019-11-13 15:16:28 +0000 |
---|---|---|
committer | Áron Virginás-Tar <aron.virginas-tar@arm.com> | 2019-11-15 16:54:47 +0000 |
commit | 87972be8d838f6fde6f6e98dd81c422e85457a5e (patch) | |
tree | 78e8a9abfefc6db67f9a71f6c1fddb0444daac5f /src/armnn/NetworkUtils.cpp | |
parent | 5716de25c6981d004e32b81dc65b4869eda25f7c (diff) | |
download | armnn-87972be8d838f6fde6f6e98dd81c422e85457a5e.tar.gz |
IVGCVSW-4119 Fix FP16 to FP32 fallback mechanism in optimizer to work with Dequantize
* Check for output data type as well as input data type when determining
whether we should attempt to fall back to FP32 if FP16 is not supported
* Override output type for Dequantize in IsLayerSupported() instead of
input type
* Updated original input type from FP16 to FP32 in InsertConvertFp32ToFp16LayersAfter()
Signed-off-by: Aron Virginas-Tar <Aron.Virginas-Tar@arm.com>
Change-Id: Ic6477fd17cea5a91bd8bf9ae0cf836520897d5b7
Diffstat (limited to 'src/armnn/NetworkUtils.cpp')
-rw-r--r-- | src/armnn/NetworkUtils.cpp | 114 |
1 files changed, 72 insertions, 42 deletions
diff --git a/src/armnn/NetworkUtils.cpp b/src/armnn/NetworkUtils.cpp index cfed6680ca..1bbeaac005 100644 --- a/src/armnn/NetworkUtils.cpp +++ b/src/armnn/NetworkUtils.cpp @@ -13,67 +13,97 @@ namespace armnn { -std::vector<ConvertFp16ToFp32Layer*> InsertConvertFp16ToFp32LayersBefore(Graph& graph, Layer& layer) +namespace { - std::vector<ConvertFp16ToFp32Layer*> convertLayers; - convertLayers.reserve(layer.GetNumInputSlots()); - - for (auto&& inputSlot = layer.BeginInputSlots(); inputSlot != layer.EndInputSlots(); ++inputSlot) - { - // Insert FP16 to FP32 converter layer before the layer - const std::string name = - std::string("convert_fp16_to_fp32-" + std::to_string(inputSlot->GetSlotIndex()) + "-") + layer.GetName(); - ConvertFp16ToFp32Layer* convertLayer = - graph.InsertNewLayer<ConvertFp16ToFp32Layer>(*inputSlot, name.c_str()); - - // Sets output tensor info for the convert layer - TensorInfo convertInfo = convertLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(); - convertInfo.SetDataType(DataType::Float32); - convertLayer->GetOutputSlot().SetTensorInfo(convertInfo); +void UpdateOutputSlotFp16ToFp32(OutputSlot& outputSlot) +{ + const TensorInfo& origTensorInfo = outputSlot.GetTensorInfo(); + TensorInfo newTensorInfo(origTensorInfo); + newTensorInfo.SetDataType(DataType::Float32); + outputSlot.SetTensorInfo(newTensorInfo); +} - convertLayers.emplace_back(convertLayer); +void ChangeOutputFp16ToFp32(Layer& layer) +{ + for (auto&& outputSlot = layer.BeginOutputSlots(); outputSlot != layer.EndOutputSlots(); ++outputSlot) + { + if (outputSlot->GetTensorInfo().GetDataType() == DataType::Float16) + { + UpdateOutputSlotFp16ToFp32(*outputSlot); + } } +} - // Sets the output tensor info for the unsupported layer - auto UpdateTensorInfo = [](auto& outputSlot) - { - // Copy original tensor info and change data type to FP32 - TensorInfo newTensorInfo = outputSlot.GetTensorInfo(); - newTensorInfo.SetDataType(DataType::Float32); +} // anonymous namespace - outputSlot.SetTensorInfo(newTensorInfo); - }; +std::vector<ConvertFp16ToFp32Layer*> InsertConvertFp16ToFp32LayersBefore(Graph& graph, + Layer& layer, + bool expectCorrectInputType) +{ + std::vector<ConvertFp16ToFp32Layer*> convertLayers; + convertLayers.reserve(layer.GetNumInputSlots()); - std::for_each(layer.BeginOutputSlots(), layer.EndOutputSlots(), UpdateTensorInfo); + // Insert a ConvertFp16ToFp32Layer before each input slot + for (auto&& inputSlot = layer.BeginInputSlots(); inputSlot != layer.EndInputSlots(); ++inputSlot) + { + bool allowInsert = true; + if (expectCorrectInputType) + { + // Only insert ConvertFp16ToFp32Layer before FP16 input slots + OutputSlot* connectedOutputSlot = inputSlot->GetConnectedOutputSlot(); + allowInsert = + connectedOutputSlot && connectedOutputSlot->GetTensorInfo().GetDataType() == DataType::Float16; + } + + if (allowInsert) + { + const std::string name = + std::string("convert_fp16_to_fp32-" + std::to_string(inputSlot->GetSlotIndex()) + "-") + + layer.GetName(); + ConvertFp16ToFp32Layer* convertLayer = + graph.InsertNewLayer<ConvertFp16ToFp32Layer>(*inputSlot, name.c_str()); + + TensorInfo convertInfo = convertLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(); + convertInfo.SetDataType(DataType::Float32); + + convertLayer->GetOutputSlot().SetTensorInfo(convertInfo); + + convertLayers.emplace_back(convertLayer); + } + } return convertLayers; } std::vector<ConvertFp32ToFp16Layer*> InsertConvertFp32ToFp16LayersAfter(Graph& graph, Layer& layer) { + const unsigned int numOutputSlots = layer.GetNumOutputSlots(); + std::vector<ConvertFp32ToFp16Layer*> convertLayers; - convertLayers.reserve(layer.GetNumOutputSlots()); + convertLayers.reserve(numOutputSlots); - int index = 0; - // Change outputs to DataType::Float16 - for (auto&& outputSlot = layer.BeginOutputSlots(); outputSlot != layer.EndOutputSlots(); ++outputSlot) - { - BOOST_ASSERT(outputSlot->GetTensorInfo().GetDataType() == DataType::Float32); + // Update FP16 output slots to FP32 on current layer + ChangeOutputFp16ToFp32(layer); - // Insert FP32 to FP16 converter layer after the layer - const std::string name = - std::string("convert_fp32_to_fp16-" + std::to_string(index++) + "-") + layer.GetName(); - ConvertFp32ToFp16Layer* convertLayer = - graph.InsertNewLayer<ConvertFp32ToFp16Layer>(*outputSlot, name.c_str()); + // Insert a ConvertFp32ToFp16Layer after each FP32 output slot + for (unsigned int slotIndex = 0u; slotIndex < numOutputSlots; ++slotIndex) + { + OutputSlot& outputSlot = layer.GetOutputSlot(slotIndex); + if(outputSlot.GetTensorInfo().GetDataType() == DataType::Float32) + { + const std::string name = + std::string("convert_fp32_to_fp16-" + std::to_string(slotIndex) + "-") + layer.GetName(); + ConvertFp32ToFp16Layer* convertLayer = + graph.InsertNewLayer<ConvertFp32ToFp16Layer>(outputSlot, name.c_str()); - // Sets output tensor info for the convert layer. - TensorInfo convertInfo = convertLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(); - convertInfo.SetDataType(DataType::Float16); + TensorInfo convertInfo = convertLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(); + convertInfo.SetDataType(DataType::Float16); - convertLayer->GetOutputSlot().SetTensorInfo(convertInfo); + convertLayer->GetOutputSlot().SetTensorInfo(convertInfo); - convertLayers.emplace_back(convertLayer); + convertLayers.emplace_back(convertLayer); + } } return convertLayers; |