From 57ef0088d20dd708ff92222d244ea02f1e1e5216 Mon Sep 17 00:00:00 2001 From: Narumol Prangnawarat Date: Thu, 26 Mar 2020 09:20:43 +0000 Subject: IVGCVSW-4597 Modify BF16 optimizer to Convert only inputs and weights of Convolution2d and FullyConnected layers * Add InsertConvertFp32ToBf16LayersBefore * Add ConvertWeight to ConvertFp32NetworkToBf16Impl for Conv2d and FullyConnected * Allow different input and output when input is BF16 and output is FP32 Conv2d and FullyConnected layers * Unit tests Signed-off-by: Narumol Prangnawarat Change-Id: Ic8f92ff28edcae08a72a3114a28f50c4619f919b --- .../optimizations/ConvertFp32NetworkToBf16.hpp | 78 ++++++++++------------ 1 file changed, 36 insertions(+), 42 deletions(-) (limited to 'src/armnn/optimizations/ConvertFp32NetworkToBf16.hpp') diff --git a/src/armnn/optimizations/ConvertFp32NetworkToBf16.hpp b/src/armnn/optimizations/ConvertFp32NetworkToBf16.hpp index d6350c3af3..222414c8c5 100644 --- a/src/armnn/optimizations/ConvertFp32NetworkToBf16.hpp +++ b/src/armnn/optimizations/ConvertFp32NetworkToBf16.hpp @@ -4,68 +4,62 @@ // #pragma once -#include "Optimization.hpp" #include "NetworkUtils.hpp" +#include "Optimization.hpp" namespace armnn { namespace optimizations { +template +inline LayerT* ConvertWeight(Layer* l) +{ + LayerT* layer = boost::polymorphic_downcast(l); + if ((layer->GetType() == LayerType::Convolution2d || layer->GetType() == LayerType::FullyConnected) + && layer->m_Weight) + { + const TensorInfo& info = layer->m_Weight->GetTensorInfo(); + + if (info.GetDataType() == DataType::Float32) + { + std::vector newValues(info.GetNumElements()); + + armnnUtils::FloatingPointConverter::ConvertFloat32ToBFloat16(layer->m_Weight->template GetTensor(), + info.GetNumElements(), + newValues.data()); + + TensorInfo newInfo(info.GetShape(), DataType::BFloat16); + ConstTensor newInput(newInfo, newValues); + layer->m_Weight.reset(new ScopedCpuTensorHandle(newInput)); + } + } + return layer; +} + class ConvertFp32NetworkToBf16Impl { public: + void Run(Graph& graph, Layer& layer) const { - if(layer.GetType() == LayerType::Input) + // Only convert Float32 To BFloat16 for the Input of Convolution2d layer and FullyConnected layer. + // And also convert weight data type from Float32 to Bfloat16. + // Do not convert bias data type. + if (layer.GetType() == LayerType::Convolution2d) { - // if the outputs of this layer are DataType::Float32 - // add a ConvertFloat32ToBFloat16 layer after each of the outputs if (layer.GetDataType() == DataType::Float32) { - InsertConvertFp32ToBf16LayersAfter(graph, layer); + InsertConvertFp32ToBf16LayersBefore(graph,layer); + ConvertWeight(&layer); } } - else if (layer.GetType() == LayerType::Output) + else if (layer.GetType() == LayerType::FullyConnected) { - // if the inputs of this layer are DataType::Float32 - // add a ConvertBFloat16ToFloat32 layer before each of the inputs if (layer.GetDataType() == DataType::Float32) { - // NOTE: We need to call InsertConvertBf16ToFp32LayersBefore with expectCorrectInputType = false - // here, otherwise it will expect the inputs to be DataType::BFloat16 - InsertConvertBf16ToFp32LayersBefore(graph, layer, false); - } - } - else if (layer.GetType() != LayerType::ConvertFp32ToBf16 && layer.GetType() != LayerType::ConvertBf16ToFp32) - { - // if the inputs/outputs of this layer are DataType::Float32 - // change the data type for all inputs and outputs to DataType::BFloat16 - for (auto&& input = layer.BeginInputSlots(); input != layer.EndInputSlots(); ++input) - { - // if it is connected to OutputSlot of the InputLayer do not change the DataType of connection - // InputSlots of the current layer will be updated when conversion layer is inserted after InputLayer - Layer& base = input->GetConnectedOutputSlot()->GetOwningLayer(); - if (base.GetType() != LayerType::Input) - { - TensorInfo convertInfo = input->GetConnection()->GetTensorInfo(); - if (convertInfo.GetDataType() == DataType::Float32) - { - convertInfo.SetDataType(DataType::BFloat16); - input->GetConnection()->SetTensorInfo(convertInfo); - } - } - } - - // change outputs to DataType::BFloat16 - for (auto&& output = layer.BeginOutputSlots(); output != layer.EndOutputSlots(); ++output) - { - TensorInfo convertInfo = output->GetTensorInfo(); - if (convertInfo.GetDataType() == DataType::Float32) - { - convertInfo.SetDataType(DataType::BFloat16); - output->SetTensorInfo(convertInfo); - } + InsertConvertFp32ToBf16LayersBefore(graph,layer); + ConvertWeight(&layer); } } } -- cgit v1.2.1