IVGCVSW-4597 Modify BF16 optimizer to Convert only inputs and weights of

Convolution2d and FullyConnected layers * Add InsertConvertFp32ToBf16LayersBefore * Add ConvertWeight to ConvertFp32NetworkToBf16Impl for Conv2d and FullyConnected * Allow different input and output when input is BF16 and output is FP32 Conv2d and FullyConnected layers * Unit tests Signed-off-by: Narumol Prangnawarat <narumol.prangnawarat@arm.com> Change-Id: Ic8f92ff28edcae08a72a3114a28f50c4619f919b
author: Narumol Prangnawarat <narumol.prangnawarat@arm.com> 2020-03-26 09:20:43 +0000
committer: Narumol Prangnawarat <narumol.prangnawarat@arm.com> 2020-03-26 16:16:55 +0000
commit: 57ef0088d20dd708ff92222d244ea02f1e1e5216 (patch)
tree: ae11f55f6bac939a51d5182eae441d322efb3e0e /src/armnn/NetworkUtils.cpp
parent: 9272f8b9050096f39796227c5d89ed7b9905146d (diff)
download: armnn-57ef0088d20dd708ff92222d244ea02f1e1e5216.tar.gz
1 files changed, 39 insertions, 0 deletions
diff --git a/src/armnn/NetworkUtils.cpp b/src/armnn/NetworkUtils.cpp
index 8653a08510..0549a115d4 100644
--- a/src/armnn/NetworkUtils.cpp
+++ b/src/armnn/NetworkUtils.cpp
@@ -87,6 +87,45 @@ std::vector<ConvertBf16ToFp32Layer*> InsertConvertBf16ToFp32LayersBefore(Graph&
     return convertLayers;
 }
 
+std::vector<ConvertFp32ToBf16Layer*> InsertConvertFp32ToBf16LayersBefore(Graph& graph,
+                                                                         Layer& layer,
+                                                                         bool expectCorrectInputType)
+{
+    std::vector<ConvertFp32ToBf16Layer*> convertLayers;
+    convertLayers.reserve(layer.GetNumInputSlots());
+
+    // Insert a ConvertFp32ToBf16Layer before each input slot
+    for (auto&& inputSlot = layer.BeginInputSlots(); inputSlot != layer.EndInputSlots(); ++inputSlot)
+    {
+        bool allowInsert = true;
+        if (expectCorrectInputType)
+        {
+            // Only insert ConvertFp32ToBf16Layer before FP32 input slots
+            OutputSlot* connectedOutputSlot = inputSlot->GetConnectedOutputSlot();
+            allowInsert =
+                connectedOutputSlot && connectedOutputSlot->GetTensorInfo().GetDataType() == DataType::Float32;
+        }
+
+        if (allowInsert)
+        {
+            const std::string name =
+                std::string("convert_fp32_to_bf16-" + std::to_string(inputSlot->GetSlotIndex()) + "-") +
+                layer.GetName();
+            ConvertFp32ToBf16Layer* convertLayer =
+                graph.InsertNewLayer<ConvertFp32ToBf16Layer>(*inputSlot, name.c_str());
+
+            TensorInfo convertInfo = convertLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo();
+            convertInfo.SetDataType(DataType::BFloat16);
+
+            convertLayer->GetOutputSlot().SetTensorInfo(convertInfo);
+
+            convertLayers.emplace_back(convertLayer);
+        }
+    }
+
+    return convertLayers;
+}
+
 std::vector<ConvertFp16ToFp32Layer*> InsertConvertFp16ToFp32LayersBefore(Graph& graph,
                                                                          Layer& layer,
                                                                          bool expectCorrectInputType)
author	Narumol Prangnawarat <narumol.prangnawarat@arm.com>	2020-03-26 09:20:43 +0000
committer	Narumol Prangnawarat <narumol.prangnawarat@arm.com>	2020-03-26 16:16:55 +0000
commit	57ef0088d20dd708ff92222d244ea02f1e1e5216 (patch)
tree	ae11f55f6bac939a51d5182eae441d322efb3e0e /src/armnn/NetworkUtils.cpp
parent	9272f8b9050096f39796227c5d89ed7b9905146d (diff)
download	armnn-57ef0088d20dd708ff92222d244ea02f1e1e5216.tar.gz