ArmNN
 22.11
NetworkUtils.cpp
Go to the documentation of this file.
1 //
2 // Copyright © 2022 Arm Ltd and Contributors. All rights reserved.
3 // SPDX-License-Identifier: MIT
4 //
5 
6 #include "NetworkUtils.hpp"
7 
9 #include <BFloat16.hpp>
10 #include "SubgraphViewSelector.hpp"
11 
12 #include <armnn/Exceptions.hpp>
14 
15 namespace armnn
16 {
17 
18 namespace
19 {
20 
21 void UpdateOutputSlotToFp32(OutputSlot& outputSlot)
22 {
23  const TensorInfo& origTensorInfo = outputSlot.GetTensorInfo();
24  TensorInfo newTensorInfo(origTensorInfo);
25  newTensorInfo.SetDataType(DataType::Float32);
26  outputSlot.SetTensorInfo(newTensorInfo);
27 }
28 
29 void ChangeOutputBf16ToFp32(Layer& layer)
30 {
31  for (auto&& outputSlot = layer.BeginOutputSlots(); outputSlot != layer.EndOutputSlots(); ++outputSlot)
32  {
33  if (outputSlot->GetTensorInfo().GetDataType() == DataType::BFloat16)
34  {
35  UpdateOutputSlotToFp32(*outputSlot);
36  }
37  }
38 }
39 
40 void ChangeOutputFp16ToFp32(Layer& layer)
41 {
42  for (auto&& outputSlot = layer.BeginOutputSlots(); outputSlot != layer.EndOutputSlots(); ++outputSlot)
43  {
44  if (outputSlot->GetTensorInfo().GetDataType() == DataType::Float16)
45  {
46  UpdateOutputSlotToFp32(*outputSlot);
47  }
48  }
49 }
50 
51 } // anonymous namespace
52 
53 std::vector<ConvertBf16ToFp32Layer*> InsertConvertBf16ToFp32LayersBefore(Graph& graph,
54  Layer& layer,
55  bool expectCorrectInputType)
56 {
57  std::vector<ConvertBf16ToFp32Layer*> convertLayers;
58  convertLayers.reserve(layer.GetNumInputSlots());
59 
60  // Insert a ConvertBf16ToFp32Layer before each input slot
61  for (auto&& inputSlot = layer.BeginInputSlots(); inputSlot != layer.EndInputSlots(); ++inputSlot)
62  {
63  bool allowInsert = true;
64  if (expectCorrectInputType)
65  {
66  // Only insert ConvertBf16ToFp32Layer before BF16 input slots
67  OutputSlot* connectedOutputSlot = inputSlot->GetConnectedOutputSlot();
68  allowInsert =
69  connectedOutputSlot && connectedOutputSlot->GetTensorInfo().GetDataType() == DataType::BFloat16;
70  }
71 
72  if (allowInsert)
73  {
74  const std::string name =
75  std::string("convert_bf16_to_fp32-" + std::to_string(inputSlot->GetSlotIndex()) + "-") +
76  layer.GetName();
77  ConvertBf16ToFp32Layer* convertLayer =
78  graph.InsertNewLayer<ConvertBf16ToFp32Layer>(*inputSlot, name.c_str());
79 
80  TensorInfo convertInfo = convertLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo();
81  convertInfo.SetDataType(DataType::Float32);
82 
83  convertLayer->GetOutputSlot().SetTensorInfo(convertInfo);
84 
85  convertLayers.emplace_back(convertLayer);
86  }
87  }
88 
89  return convertLayers;
90 }
91 
92 std::vector<ConvertFp32ToBf16Layer*> InsertConvertFp32ToBf16LayersBefore(Graph& graph,
93  Layer& layer,
94  bool expectCorrectInputType)
95 {
96  std::vector<ConvertFp32ToBf16Layer*> convertLayers;
97  convertLayers.reserve(layer.GetNumInputSlots());
98 
99  // Insert a ConvertFp32ToBf16Layer before each input slot
100  for (auto&& inputSlot = layer.BeginInputSlots(); inputSlot != layer.EndInputSlots(); ++inputSlot)
101  {
102  bool allowInsert = true;
103 
104  if ((layer.GetType() == LayerType::Convolution2d ||
105  layer.GetType() == LayerType::FullyConnected ||
107  && inputSlot->GetSlotIndex() == 2)
108  {
109  // Refrain from reducing bias to Bf16
110  continue;
111  }
112  if (expectCorrectInputType)
113  {
114  // Only insert ConvertFp32ToBf16Layer before FP32 input slots
115  OutputSlot* connectedOutputSlot = inputSlot->GetConnectedOutputSlot();
116  allowInsert =
117  connectedOutputSlot && connectedOutputSlot->GetTensorInfo().GetDataType() == DataType::Float32;
118  }
119 
120  if (allowInsert)
121  {
122  const std::string name =
123  std::string("convert_fp32_to_bf16-" + std::to_string(inputSlot->GetSlotIndex()) + "-") +
124  layer.GetName();
125  ConvertFp32ToBf16Layer* convertLayer =
126  graph.InsertNewLayer<ConvertFp32ToBf16Layer>(*inputSlot, name.c_str());
127 
128  TensorInfo convertInfo = convertLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo();
129  convertInfo.SetDataType(DataType::BFloat16);
130 
131  convertLayer->GetOutputSlot().SetTensorInfo(convertInfo);
132 
133  convertLayers.emplace_back(convertLayer);
134  }
135  }
136 
137  return convertLayers;
138 }
139 
140 std::vector<ConvertFp16ToFp32Layer*> InsertConvertFp16ToFp32LayersBefore(Graph& graph,
141  Layer& layer,
142  bool expectCorrectInputType)
143 {
144  std::vector<ConvertFp16ToFp32Layer*> convertLayers;
145  convertLayers.reserve(layer.GetNumInputSlots());
146 
147  // Insert a ConvertFp16ToFp32Layer before each input slot
148  for (auto&& inputSlot = layer.BeginInputSlots(); inputSlot != layer.EndInputSlots(); ++inputSlot)
149  {
150  bool allowInsert = true;
151  if (expectCorrectInputType)
152  {
153  // Only insert ConvertFp16ToFp32Layer before FP16 input slots
154  OutputSlot* connectedOutputSlot = inputSlot->GetConnectedOutputSlot();
155  allowInsert =
156  connectedOutputSlot && connectedOutputSlot->GetTensorInfo().GetDataType() == DataType::Float16;
157  }
158 
159  if (allowInsert)
160  {
161  const std::string name =
162  std::string("convert_fp16_to_fp32-" + std::to_string(inputSlot->GetSlotIndex()) + "-") +
163  layer.GetName();
164  ConvertFp16ToFp32Layer* convertLayer =
165  graph.InsertNewLayer<ConvertFp16ToFp32Layer>(*inputSlot, name.c_str());
166 
167  TensorInfo convertInfo = convertLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo();
168  convertInfo.SetDataType(DataType::Float32);
169 
170  convertLayer->GetOutputSlot().SetTensorInfo(convertInfo);
171 
172  convertLayers.emplace_back(convertLayer);
173  }
174  }
175 
176  return convertLayers;
177 }
178 
179 std::vector<ConvertFp32ToBf16Layer*> InsertConvertFp32ToBf16LayersAfter(Graph& graph, Layer& layer)
180 {
181  const unsigned int numOutputSlots = layer.GetNumOutputSlots();
182 
183  std::vector<ConvertFp32ToBf16Layer*> convertLayers;
184  convertLayers.reserve(numOutputSlots);
185 
186  // Update Bf16 output slots to FP32 on current layer
187  ChangeOutputBf16ToFp32(layer);
188 
189  // Insert a ConvertFp32ToBf16Layer after each FP32 output slot
190  for (unsigned int slotIndex = 0u; slotIndex < numOutputSlots; ++slotIndex)
191  {
192  OutputSlot& outputSlot = layer.GetOutputSlot(slotIndex);
193  if(outputSlot.GetTensorInfo().GetDataType() == DataType::Float32)
194  {
195  const std::string name =
196  std::string("convert_fp32_to_bf16-" + std::to_string(slotIndex) + "-") + layer.GetName();
197  ConvertFp32ToBf16Layer* convertLayer =
198  graph.InsertNewLayer<ConvertFp32ToBf16Layer>(outputSlot, name.c_str());
199 
200  TensorInfo convertInfo = convertLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo();
201  convertInfo.SetDataType(DataType::BFloat16);
202 
203  convertLayer->GetOutputSlot().SetTensorInfo(convertInfo);
204 
205  convertLayers.emplace_back(convertLayer);
206  }
207  }
208 
209  return convertLayers;
210 }
211 
212 std::vector<ConvertFp32ToFp16Layer*> InsertConvertFp32ToFp16LayersAfter(Graph& graph, Layer& layer)
213 {
214  const unsigned int numOutputSlots = layer.GetNumOutputSlots();
215 
216  std::vector<ConvertFp32ToFp16Layer*> convertLayers;
217  convertLayers.reserve(numOutputSlots);
218 
219  // Update FP16 output slots to FP32 on current layer
220  ChangeOutputFp16ToFp32(layer);
221 
222  // Insert a ConvertFp32ToFp16Layer after each FP32 output slot
223  for (unsigned int slotIndex = 0u; slotIndex < numOutputSlots; ++slotIndex)
224  {
225  OutputSlot& outputSlot = layer.GetOutputSlot(slotIndex);
226  if(outputSlot.GetTensorInfo().GetDataType() == DataType::Float32)
227  {
228  const std::string name =
229  std::string("convert_fp32_to_fp16-" + std::to_string(slotIndex) + "-") + layer.GetName();
230  ConvertFp32ToFp16Layer* convertLayer =
231  graph.InsertNewLayer<ConvertFp32ToFp16Layer>(outputSlot, name.c_str());
232 
233  TensorInfo convertInfo = convertLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo();
234  convertInfo.SetDataType(DataType::Float16);
235 
236  convertLayer->GetOutputSlot().SetTensorInfo(convertInfo);
237 
238  convertLayers.emplace_back(convertLayer);
239  }
240  }
241 
242  return convertLayers;
243 }
244 
245 std::vector<DebugLayer*> InsertDebugLayerAfter(Graph& graph, Layer& layer, bool toFile)
246 {
247  std::vector<DebugLayer*> debugLayers;
248  debugLayers.reserve(layer.GetNumOutputSlots());
249 
250  // Connect a DebugLayer to each output slot of the layer
251  uint32_t outputSlotIdx = 0;
252  for (auto outputSlot = layer.BeginOutputSlots(); outputSlot != layer.EndOutputSlots(); ++outputSlot)
253  {
254  const std::string debugName = std::string("DebugLayerAfter") + layer.GetNameStr() + "_" +
255  std::to_string(outputSlotIdx);
256 
257  DebugLayer* debugLayer =
258  graph.InsertNewLayer<DebugLayer>(*outputSlot, debugName.c_str(), toFile);
259 
260  // Sets output tensor info for the debug layer.
261  ARMNN_ASSERT(debugLayer->GetInputSlot(0).GetConnectedOutputSlot() == &(*outputSlot));
262  TensorInfo debugInfo = debugLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo();
263 
264  debugLayer->GetOutputSlot().SetTensorInfo(debugInfo);
265 
266  // NOTE: It is OK to do this because DebugLayer is only supported on CpuRef
267  debugLayer->SetBackendId(Compute::CpuRef);
268 
269  debugLayers.emplace_back(debugLayer);
270 
271  ++outputSlotIdx;
272  }
273 
274  return debugLayers;
275 }
276 
278 {
279  if (layer->GetType() == LayerType::Convolution2d || layer->GetType() == LayerType::FullyConnected)
280  {
281  // Revert Weights on Constant Layer to FP32 so they can be accessed by Conv2d or FullyConnected
282  // This prevents a conversion layer being added in during backend assignment which blocks
283  // the RedirectMembersToConstantInputs backward compatibility workaround/optimization.
284  auto constantLayerInfo = layer->GetInputSlot(1).GetConnection()->GetTensorInfo();
285 
286  if (constantLayerInfo.IsConstant() && constantLayerInfo.GetDataType() == DataType::BFloat16)
287  {
288  std::vector<float> newValues(constantLayerInfo.GetNumElements());
289 
290  auto weightLayer = PolymorphicDowncast<ConstantLayer*>(
293  weightLayer->m_LayerOutput->GetConstTensor<BFloat16>(),
294  constantLayerInfo.GetNumElements(),
295  newValues.data());
296 
297  TensorInfo newInfo(constantLayerInfo.GetShape(), DataType::Float32);
298  newInfo.SetConstant(true);
299  ConstTensor newInput(newInfo, newValues);
300  weightLayer->m_LayerOutput.reset(new ScopedTensorHandle(newInput));
301  weightLayer->GetOutputSlot(0).SetTensorInfo(newInfo);
302 
303  // Connect Conv2d/FullyConnected to InputLayer directly leaving out
304  // the ConversionLayer to be cleaned up later
305  auto& conversionLayer = layer->GetInputSlot(0).GetConnection()->GetOwningIConnectableLayer();
306  auto actualInputOutputSlot = conversionLayer.GetInputSlot(0).GetConnection();
307 
308  auto& conversionLayerOutputSlot =
310  auto& conversionLayerInputSlot =
312  actualInputOutputSlot->Disconnect(conversionLayerInputSlot);
313  conversionLayerOutputSlot.Disconnect(layer->GetInputSlot(0));
314 
315  actualInputOutputSlot->Connect(layer->GetInputSlot(0));
316 
317  return true;
318  }
319  }
320  return false;
321 }
322 
323 } // namespace armnn
std::vector< InputSlot >::iterator EndInputSlots()
Definition: Layer.hpp:250
std::vector< ConvertFp32ToFp16Layer * > InsertConvertFp32ToFp16LayersAfter(Graph &graph, Layer &layer)
std::vector< ConvertFp16ToFp32Layer * > InsertConvertFp16ToFp32LayersBefore(Graph &graph, Layer &layer, bool expectCorrectInputType)
unsigned int GetNumInputSlots() const override
Returns the number of connectable input slots.
Definition: Layer.hpp:321
CPU Execution: Reference C++ kernels.
static void ConvertBFloat16ToFloat32(const void *srcBFloat16Buffer, size_t numElements, float *dstFloat32Buffer)
This layer converts data type Float 16 to Float 32.
This layer converts data type BFloat16 to Float32.
Copyright (c) 2021 ARM Limited and Contributors.
void SetBackendId(const BackendId &id)
Definition: Layer.hpp:278
const IOutputSlot * GetConnection() const override
Definition: Layer.hpp:206
unsigned int GetNumOutputSlots() const override
Returns the number of connectable output slots.
Definition: Layer.hpp:322
std::vector< InputSlot >::iterator BeginInputSlots()
Definition: Layer.hpp:249
const InputSlot & GetInputSlot(unsigned int index) const override
Get a const input slot handle by slot index.
Definition: Layer.hpp:324
std::vector< DebugLayer * > InsertDebugLayerAfter(Graph &graph, Layer &layer, bool toFile)
DataType GetDataType() const
Definition: Tensor.hpp:198
A tensor defined by a TensorInfo (shape and data type) and an immutable backing store.
Definition: Tensor.hpp:327
bool RevertConstantWeightsToFP32(Layer *layer)
const std::string & GetNameStr() const
Definition: Layer.hpp:227
LayerType GetType() const override
Returns the armnn::LayerType of this layer.
Definition: Layer.hpp:273
std::vector< ConvertBf16ToFp32Layer * > InsertConvertBf16ToFp32LayersBefore(Graph &graph, Layer &layer, bool expectCorrectInputType)
virtual const IConnectableLayer & GetOwningIConnectableLayer() const =0
This layer visualizes the data flowing through the network.
Definition: DebugLayer.hpp:13
#define ARMNN_ASSERT(COND)
Definition: Assert.hpp:14
const OutputSlot * GetConnectedOutputSlot() const
Definition: Layer.hpp:56
void SetDataType(DataType type)
Definition: Tensor.hpp:199
std::vector< ConvertFp32ToBf16Layer * > InsertConvertFp32ToBf16LayersAfter(Graph &graph, Layer &layer)
This layer converts data type Float 32 to Float 16.
std::vector< OutputSlot >::iterator BeginOutputSlots()
Definition: Layer.hpp:253
std::vector< ConvertFp32ToBf16Layer * > InsertConvertFp32ToBf16LayersBefore(Graph &graph, Layer &layer, bool expectCorrectInputType)
void SetTensorInfo(const TensorInfo &tensorInfo) override
Definition: Layer.cpp:87
virtual const IInputSlot & GetInputSlot(unsigned int index) const =0
Get a const input slot handle by slot index.
void SetConstant(const bool IsConstant=true)
Marks the data corresponding to this tensor info as constant.
Definition: Tensor.cpp:514
std::vector< OutputSlot >::iterator EndOutputSlots()
Definition: Layer.hpp:254
virtual const IOutputSlot * GetConnection() const =0
const OutputSlot & GetOutputSlot(unsigned int index=0) const override
Get the const output slot handle by slot index.
Definition: Layer.hpp:326
virtual const TensorInfo & GetTensorInfo() const =0
virtual const IOutputSlot & GetOutputSlot(unsigned int index) const =0
Get the const output slot handle by slot index.
const char * GetName() const override
Returns the name of the layer.
Definition: Layer.hpp:319
This layer converts data type Float32 to BFloat16.
LayerT * InsertNewLayer(InputSlot &insertBefore, Args &&... args)
Inserts a new layer between the output slot currently connected to insertBefore and insertBefore itse...
Definition: Graph.hpp:471
const TensorInfo & GetTensorInfo() const override
Definition: Layer.cpp:92