From 57ef0088d20dd708ff92222d244ea02f1e1e5216 Mon Sep 17 00:00:00 2001
From: Narumol Prangnawarat <narumol.prangnawarat@arm.com>
Date: Thu, 26 Mar 2020 09:20:43 +0000
Subject: IVGCVSW-4597 Modify BF16 optimizer to Convert only inputs and weights
 of Convolution2d and FullyConnected layers

 * Add InsertConvertFp32ToBf16LayersBefore
 * Add ConvertWeight to ConvertFp32NetworkToBf16Impl for Conv2d and FullyConnected
 * Allow different input and output when input is BF16 and output is FP32
Conv2d and FullyConnected layers
 * Unit tests

Signed-off-by: Narumol Prangnawarat <narumol.prangnawarat@arm.com>
Change-Id: Ic8f92ff28edcae08a72a3114a28f50c4619f919b
---
 .../optimizations/ConvertFp32NetworkToBf16.hpp     | 78 ++++++++++------------
 1 file changed, 36 insertions(+), 42 deletions(-)

(limited to 'src/armnn/optimizations/ConvertFp32NetworkToBf16.hpp')
diff --git a/src/armnn/optimizations/ConvertFp32NetworkToBf16.hpp b/src/armnn/optimizations/ConvertFp32NetworkToBf16.hpp
index d6350c3af3..222414c8c5 100644
--- a/src/armnn/optimizations/ConvertFp32NetworkToBf16.hpp
+++ b/src/armnn/optimizations/ConvertFp32NetworkToBf16.hpp
@@ -4,68 +4,62 @@
 //
 #pragma once
 
-#include "Optimization.hpp"
 #include "NetworkUtils.hpp"
+#include "Optimization.hpp"
 
 namespace armnn
 {
 namespace optimizations
 {
 
+template <typename LayerT>
+inline LayerT* ConvertWeight(Layer* l)
+{
+    LayerT* layer = boost::polymorphic_downcast<LayerT*>(l);
+    if ((layer->GetType() == LayerType::Convolution2d || layer->GetType() == LayerType::FullyConnected)
+         && layer->m_Weight)
+    {
+        const TensorInfo& info = layer->m_Weight->GetTensorInfo();
+
+        if (info.GetDataType() == DataType::Float32)
+        {
+            std::vector<BFloat16> newValues(info.GetNumElements());
+
+            armnnUtils::FloatingPointConverter::ConvertFloat32ToBFloat16(layer->m_Weight->template GetTensor<float>(),
+                                                                         info.GetNumElements(),
+                                                                         newValues.data());
+
+            TensorInfo newInfo(info.GetShape(), DataType::BFloat16);
+            ConstTensor newInput(newInfo, newValues);
+            layer->m_Weight.reset(new ScopedCpuTensorHandle(newInput));
+        }
+    }
+    return layer;
+}
+
 class ConvertFp32NetworkToBf16Impl
 {
 public:
+
     void Run(Graph& graph, Layer& layer) const
     {
-        if(layer.GetType() == LayerType::Input)
+        // Only convert Float32 To BFloat16 for the Input of Convolution2d layer and FullyConnected layer.
+        // And also convert weight data type from Float32 to Bfloat16.
+        // Do not convert bias data type.
+        if (layer.GetType() == LayerType::Convolution2d)
         {
-            // if the outputs of this layer are DataType::Float32
-            // add a ConvertFloat32ToBFloat16 layer after each of the outputs
             if (layer.GetDataType() == DataType::Float32)
             {
-                InsertConvertFp32ToBf16LayersAfter(graph, layer);
+                InsertConvertFp32ToBf16LayersBefore(graph,layer);
+                ConvertWeight<Convolution2dLayer>(&layer);
             }
         }
-        else if (layer.GetType() == LayerType::Output)
+        else if (layer.GetType() == LayerType::FullyConnected)
         {
-            // if the inputs of this layer are DataType::Float32
-            // add a ConvertBFloat16ToFloat32 layer before each of the inputs
             if (layer.GetDataType() == DataType::Float32)
             {
-                // NOTE: We need to call InsertConvertBf16ToFp32LayersBefore with expectCorrectInputType = false
-                // here, otherwise it will expect the inputs to be DataType::BFloat16
-                InsertConvertBf16ToFp32LayersBefore(graph, layer, false);
-            }
-        }
-        else if (layer.GetType() != LayerType::ConvertFp32ToBf16 && layer.GetType() != LayerType::ConvertBf16ToFp32)
-        {
-            // if the inputs/outputs of this layer are DataType::Float32
-            // change the data type for all inputs and outputs to DataType::BFloat16
-            for (auto&& input = layer.BeginInputSlots(); input != layer.EndInputSlots(); ++input)
-            {
-                // if it is connected to OutputSlot of the InputLayer do not change the DataType of connection
-                // InputSlots of the current layer will be updated when conversion layer is inserted after InputLayer
-                Layer& base = input->GetConnectedOutputSlot()->GetOwningLayer();
-                if (base.GetType() != LayerType::Input)
-                {
-                    TensorInfo convertInfo = input->GetConnection()->GetTensorInfo();
-                    if (convertInfo.GetDataType() == DataType::Float32)
-                    {
-                        convertInfo.SetDataType(DataType::BFloat16);
-                        input->GetConnection()->SetTensorInfo(convertInfo);
-                    }
-                }
-            }
-
-            // change outputs to DataType::BFloat16
-            for (auto&& output = layer.BeginOutputSlots(); output != layer.EndOutputSlots(); ++output)
-            {
-                TensorInfo convertInfo = output->GetTensorInfo();
-                if (convertInfo.GetDataType() == DataType::Float32)
-                {
-                    convertInfo.SetDataType(DataType::BFloat16);
-                    output->SetTensorInfo(convertInfo);
-                }
+                InsertConvertFp32ToBf16LayersBefore(graph,layer);
+                ConvertWeight<FullyConnectedLayer>(&layer);
             }
         }
     }
-- 
cgit v1.2.1