aboutsummaryrefslogtreecommitdiff
path: root/src/armnn/optimizations
diff options
context:
space:
mode:
authorNarumol Prangnawarat <narumol.prangnawarat@arm.com>2020-03-26 09:20:43 +0000
committerNarumol Prangnawarat <narumol.prangnawarat@arm.com>2020-03-26 16:16:55 +0000
commit57ef0088d20dd708ff92222d244ea02f1e1e5216 (patch)
treeae11f55f6bac939a51d5182eae441d322efb3e0e /src/armnn/optimizations
parent9272f8b9050096f39796227c5d89ed7b9905146d (diff)
downloadarmnn-57ef0088d20dd708ff92222d244ea02f1e1e5216.tar.gz
IVGCVSW-4597 Modify BF16 optimizer to Convert only inputs and weights of
Convolution2d and FullyConnected layers * Add InsertConvertFp32ToBf16LayersBefore * Add ConvertWeight to ConvertFp32NetworkToBf16Impl for Conv2d and FullyConnected * Allow different input and output when input is BF16 and output is FP32 Conv2d and FullyConnected layers * Unit tests Signed-off-by: Narumol Prangnawarat <narumol.prangnawarat@arm.com> Change-Id: Ic8f92ff28edcae08a72a3114a28f50c4619f919b
Diffstat (limited to 'src/armnn/optimizations')
-rw-r--r--src/armnn/optimizations/ConvertFp32NetworkToBf16.hpp78
1 files changed, 36 insertions, 42 deletions
diff --git a/src/armnn/optimizations/ConvertFp32NetworkToBf16.hpp b/src/armnn/optimizations/ConvertFp32NetworkToBf16.hpp
index d6350c3af3..222414c8c5 100644
--- a/src/armnn/optimizations/ConvertFp32NetworkToBf16.hpp
+++ b/src/armnn/optimizations/ConvertFp32NetworkToBf16.hpp
@@ -4,68 +4,62 @@
//
#pragma once
-#include "Optimization.hpp"
#include "NetworkUtils.hpp"
+#include "Optimization.hpp"
namespace armnn
{
namespace optimizations
{
+template <typename LayerT>
+inline LayerT* ConvertWeight(Layer* l)
+{
+ LayerT* layer = boost::polymorphic_downcast<LayerT*>(l);
+ if ((layer->GetType() == LayerType::Convolution2d || layer->GetType() == LayerType::FullyConnected)
+ && layer->m_Weight)
+ {
+ const TensorInfo& info = layer->m_Weight->GetTensorInfo();
+
+ if (info.GetDataType() == DataType::Float32)
+ {
+ std::vector<BFloat16> newValues(info.GetNumElements());
+
+ armnnUtils::FloatingPointConverter::ConvertFloat32ToBFloat16(layer->m_Weight->template GetTensor<float>(),
+ info.GetNumElements(),
+ newValues.data());
+
+ TensorInfo newInfo(info.GetShape(), DataType::BFloat16);
+ ConstTensor newInput(newInfo, newValues);
+ layer->m_Weight.reset(new ScopedCpuTensorHandle(newInput));
+ }
+ }
+ return layer;
+}
+
class ConvertFp32NetworkToBf16Impl
{
public:
+
void Run(Graph& graph, Layer& layer) const
{
- if(layer.GetType() == LayerType::Input)
+ // Only convert Float32 To BFloat16 for the Input of Convolution2d layer and FullyConnected layer.
+ // And also convert weight data type from Float32 to Bfloat16.
+ // Do not convert bias data type.
+ if (layer.GetType() == LayerType::Convolution2d)
{
- // if the outputs of this layer are DataType::Float32
- // add a ConvertFloat32ToBFloat16 layer after each of the outputs
if (layer.GetDataType() == DataType::Float32)
{
- InsertConvertFp32ToBf16LayersAfter(graph, layer);
+ InsertConvertFp32ToBf16LayersBefore(graph,layer);
+ ConvertWeight<Convolution2dLayer>(&layer);
}
}
- else if (layer.GetType() == LayerType::Output)
+ else if (layer.GetType() == LayerType::FullyConnected)
{
- // if the inputs of this layer are DataType::Float32
- // add a ConvertBFloat16ToFloat32 layer before each of the inputs
if (layer.GetDataType() == DataType::Float32)
{
- // NOTE: We need to call InsertConvertBf16ToFp32LayersBefore with expectCorrectInputType = false
- // here, otherwise it will expect the inputs to be DataType::BFloat16
- InsertConvertBf16ToFp32LayersBefore(graph, layer, false);
- }
- }
- else if (layer.GetType() != LayerType::ConvertFp32ToBf16 && layer.GetType() != LayerType::ConvertBf16ToFp32)
- {
- // if the inputs/outputs of this layer are DataType::Float32
- // change the data type for all inputs and outputs to DataType::BFloat16
- for (auto&& input = layer.BeginInputSlots(); input != layer.EndInputSlots(); ++input)
- {
- // if it is connected to OutputSlot of the InputLayer do not change the DataType of connection
- // InputSlots of the current layer will be updated when conversion layer is inserted after InputLayer
- Layer& base = input->GetConnectedOutputSlot()->GetOwningLayer();
- if (base.GetType() != LayerType::Input)
- {
- TensorInfo convertInfo = input->GetConnection()->GetTensorInfo();
- if (convertInfo.GetDataType() == DataType::Float32)
- {
- convertInfo.SetDataType(DataType::BFloat16);
- input->GetConnection()->SetTensorInfo(convertInfo);
- }
- }
- }
-
- // change outputs to DataType::BFloat16
- for (auto&& output = layer.BeginOutputSlots(); output != layer.EndOutputSlots(); ++output)
- {
- TensorInfo convertInfo = output->GetTensorInfo();
- if (convertInfo.GetDataType() == DataType::Float32)
- {
- convertInfo.SetDataType(DataType::BFloat16);
- output->SetTensorInfo(convertInfo);
- }
+ InsertConvertFp32ToBf16LayersBefore(graph,layer);
+ ConvertWeight<FullyConnectedLayer>(&layer);
}
}
}