diff options
Diffstat (limited to 'src/armnn/optimizations/ConvertFp32NetworkToBf16.hpp')
-rw-r--r-- | src/armnn/optimizations/ConvertFp32NetworkToBf16.hpp | 78 |
1 files changed, 36 insertions, 42 deletions
diff --git a/src/armnn/optimizations/ConvertFp32NetworkToBf16.hpp b/src/armnn/optimizations/ConvertFp32NetworkToBf16.hpp index d6350c3af3..222414c8c5 100644 --- a/src/armnn/optimizations/ConvertFp32NetworkToBf16.hpp +++ b/src/armnn/optimizations/ConvertFp32NetworkToBf16.hpp @@ -4,68 +4,62 @@ // #pragma once -#include "Optimization.hpp" #include "NetworkUtils.hpp" +#include "Optimization.hpp" namespace armnn { namespace optimizations { +template <typename LayerT> +inline LayerT* ConvertWeight(Layer* l) +{ + LayerT* layer = boost::polymorphic_downcast<LayerT*>(l); + if ((layer->GetType() == LayerType::Convolution2d || layer->GetType() == LayerType::FullyConnected) + && layer->m_Weight) + { + const TensorInfo& info = layer->m_Weight->GetTensorInfo(); + + if (info.GetDataType() == DataType::Float32) + { + std::vector<BFloat16> newValues(info.GetNumElements()); + + armnnUtils::FloatingPointConverter::ConvertFloat32ToBFloat16(layer->m_Weight->template GetTensor<float>(), + info.GetNumElements(), + newValues.data()); + + TensorInfo newInfo(info.GetShape(), DataType::BFloat16); + ConstTensor newInput(newInfo, newValues); + layer->m_Weight.reset(new ScopedCpuTensorHandle(newInput)); + } + } + return layer; +} + class ConvertFp32NetworkToBf16Impl { public: + void Run(Graph& graph, Layer& layer) const { - if(layer.GetType() == LayerType::Input) + // Only convert Float32 To BFloat16 for the Input of Convolution2d layer and FullyConnected layer. + // And also convert weight data type from Float32 to Bfloat16. + // Do not convert bias data type. + if (layer.GetType() == LayerType::Convolution2d) { - // if the outputs of this layer are DataType::Float32 - // add a ConvertFloat32ToBFloat16 layer after each of the outputs if (layer.GetDataType() == DataType::Float32) { - InsertConvertFp32ToBf16LayersAfter(graph, layer); + InsertConvertFp32ToBf16LayersBefore(graph,layer); + ConvertWeight<Convolution2dLayer>(&layer); } } - else if (layer.GetType() == LayerType::Output) + else if (layer.GetType() == LayerType::FullyConnected) { - // if the inputs of this layer are DataType::Float32 - // add a ConvertBFloat16ToFloat32 layer before each of the inputs if (layer.GetDataType() == DataType::Float32) { - // NOTE: We need to call InsertConvertBf16ToFp32LayersBefore with expectCorrectInputType = false - // here, otherwise it will expect the inputs to be DataType::BFloat16 - InsertConvertBf16ToFp32LayersBefore(graph, layer, false); - } - } - else if (layer.GetType() != LayerType::ConvertFp32ToBf16 && layer.GetType() != LayerType::ConvertBf16ToFp32) - { - // if the inputs/outputs of this layer are DataType::Float32 - // change the data type for all inputs and outputs to DataType::BFloat16 - for (auto&& input = layer.BeginInputSlots(); input != layer.EndInputSlots(); ++input) - { - // if it is connected to OutputSlot of the InputLayer do not change the DataType of connection - // InputSlots of the current layer will be updated when conversion layer is inserted after InputLayer - Layer& base = input->GetConnectedOutputSlot()->GetOwningLayer(); - if (base.GetType() != LayerType::Input) - { - TensorInfo convertInfo = input->GetConnection()->GetTensorInfo(); - if (convertInfo.GetDataType() == DataType::Float32) - { - convertInfo.SetDataType(DataType::BFloat16); - input->GetConnection()->SetTensorInfo(convertInfo); - } - } - } - - // change outputs to DataType::BFloat16 - for (auto&& output = layer.BeginOutputSlots(); output != layer.EndOutputSlots(); ++output) - { - TensorInfo convertInfo = output->GetTensorInfo(); - if (convertInfo.GetDataType() == DataType::Float32) - { - convertInfo.SetDataType(DataType::BFloat16); - output->SetTensorInfo(convertInfo); - } + InsertConvertFp32ToBf16LayersBefore(graph,layer); + ConvertWeight<FullyConnectedLayer>(&layer); } } } |