ArmNN
 22.05
NetworkUtils.cpp
Go to the documentation of this file.
1 //
2 // Copyright © 2017 Arm Ltd. All rights reserved.
3 // SPDX-License-Identifier: MIT
4 //
5 
6 #include "NetworkUtils.hpp"
7 
9 
10 #include <armnn/Exceptions.hpp>
12 
13 namespace armnn
14 {
15 
16 namespace
17 {
18 
19 void UpdateOutputSlotToFp32(OutputSlot& outputSlot)
20 {
21  const TensorInfo& origTensorInfo = outputSlot.GetTensorInfo();
22  TensorInfo newTensorInfo(origTensorInfo);
23  newTensorInfo.SetDataType(DataType::Float32);
24  outputSlot.SetTensorInfo(newTensorInfo);
25 }
26 
27 void ChangeOutputBf16ToFp32(Layer& layer)
28 {
29  for (auto&& outputSlot = layer.BeginOutputSlots(); outputSlot != layer.EndOutputSlots(); ++outputSlot)
30  {
31  if (outputSlot->GetTensorInfo().GetDataType() == DataType::BFloat16)
32  {
33  UpdateOutputSlotToFp32(*outputSlot);
34  }
35  }
36 }
37 
38 void ChangeOutputFp16ToFp32(Layer& layer)
39 {
40  for (auto&& outputSlot = layer.BeginOutputSlots(); outputSlot != layer.EndOutputSlots(); ++outputSlot)
41  {
42  if (outputSlot->GetTensorInfo().GetDataType() == DataType::Float16)
43  {
44  UpdateOutputSlotToFp32(*outputSlot);
45  }
46  }
47 }
48 
49 } // anonymous namespace
50 
51 std::vector<ConvertBf16ToFp32Layer*> InsertConvertBf16ToFp32LayersBefore(Graph& graph,
52  Layer& layer,
53  bool expectCorrectInputType)
54 {
55  std::vector<ConvertBf16ToFp32Layer*> convertLayers;
56  convertLayers.reserve(layer.GetNumInputSlots());
57 
58  // Insert a ConvertBf16ToFp32Layer before each input slot
59  for (auto&& inputSlot = layer.BeginInputSlots(); inputSlot != layer.EndInputSlots(); ++inputSlot)
60  {
61  bool allowInsert = true;
62  if (expectCorrectInputType)
63  {
64  // Only insert ConvertBf16ToFp32Layer before BF16 input slots
65  OutputSlot* connectedOutputSlot = inputSlot->GetConnectedOutputSlot();
66  allowInsert =
67  connectedOutputSlot && connectedOutputSlot->GetTensorInfo().GetDataType() == DataType::BFloat16;
68  }
69 
70  if (allowInsert)
71  {
72  const std::string name =
73  std::string("convert_bf16_to_fp32-" + std::to_string(inputSlot->GetSlotIndex()) + "-") +
74  layer.GetName();
75  ConvertBf16ToFp32Layer* convertLayer =
76  graph.InsertNewLayer<ConvertBf16ToFp32Layer>(*inputSlot, name.c_str());
77 
78  TensorInfo convertInfo = convertLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo();
79  convertInfo.SetDataType(DataType::Float32);
80 
81  convertLayer->GetOutputSlot().SetTensorInfo(convertInfo);
82 
83  convertLayers.emplace_back(convertLayer);
84  }
85  }
86 
87  return convertLayers;
88 }
89 
90 std::vector<ConvertFp32ToBf16Layer*> InsertConvertFp32ToBf16LayersBefore(Graph& graph,
91  Layer& layer,
92  bool expectCorrectInputType)
93 {
94  std::vector<ConvertFp32ToBf16Layer*> convertLayers;
95  convertLayers.reserve(layer.GetNumInputSlots());
96 
97  // Insert a ConvertFp32ToBf16Layer before each input slot
98  for (auto&& inputSlot = layer.BeginInputSlots(); inputSlot != layer.EndInputSlots(); ++inputSlot)
99  {
100  bool allowInsert = true;
101 
102  if ((layer.GetType() == LayerType::Convolution2d ||
103  layer.GetType() == LayerType::FullyConnected ||
105  && inputSlot->GetSlotIndex() == 2)
106  {
107  // Refrain from reducing bias to Bf16
108  continue;
109  }
110  if (expectCorrectInputType)
111  {
112  // Only insert ConvertFp32ToBf16Layer before FP32 input slots
113  OutputSlot* connectedOutputSlot = inputSlot->GetConnectedOutputSlot();
114  allowInsert =
115  connectedOutputSlot && connectedOutputSlot->GetTensorInfo().GetDataType() == DataType::Float32;
116  }
117 
118  if (allowInsert)
119  {
120  const std::string name =
121  std::string("convert_fp32_to_bf16-" + std::to_string(inputSlot->GetSlotIndex()) + "-") +
122  layer.GetName();
123  ConvertFp32ToBf16Layer* convertLayer =
124  graph.InsertNewLayer<ConvertFp32ToBf16Layer>(*inputSlot, name.c_str());
125 
126  TensorInfo convertInfo = convertLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo();
127  convertInfo.SetDataType(DataType::BFloat16);
128 
129  convertLayer->GetOutputSlot().SetTensorInfo(convertInfo);
130 
131  convertLayers.emplace_back(convertLayer);
132  }
133  }
134 
135  return convertLayers;
136 }
137 
138 std::vector<ConvertFp16ToFp32Layer*> InsertConvertFp16ToFp32LayersBefore(Graph& graph,
139  Layer& layer,
140  bool expectCorrectInputType)
141 {
142  std::vector<ConvertFp16ToFp32Layer*> convertLayers;
143  convertLayers.reserve(layer.GetNumInputSlots());
144 
145  // Insert a ConvertFp16ToFp32Layer before each input slot
146  for (auto&& inputSlot = layer.BeginInputSlots(); inputSlot != layer.EndInputSlots(); ++inputSlot)
147  {
148  bool allowInsert = true;
149  if (expectCorrectInputType)
150  {
151  // Only insert ConvertFp16ToFp32Layer before FP16 input slots
152  OutputSlot* connectedOutputSlot = inputSlot->GetConnectedOutputSlot();
153  allowInsert =
154  connectedOutputSlot && connectedOutputSlot->GetTensorInfo().GetDataType() == DataType::Float16;
155  }
156 
157  if (allowInsert)
158  {
159  const std::string name =
160  std::string("convert_fp16_to_fp32-" + std::to_string(inputSlot->GetSlotIndex()) + "-") +
161  layer.GetName();
162  ConvertFp16ToFp32Layer* convertLayer =
163  graph.InsertNewLayer<ConvertFp16ToFp32Layer>(*inputSlot, name.c_str());
164 
165  TensorInfo convertInfo = convertLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo();
166  convertInfo.SetDataType(DataType::Float32);
167 
168  convertLayer->GetOutputSlot().SetTensorInfo(convertInfo);
169 
170  convertLayers.emplace_back(convertLayer);
171  }
172  }
173 
174  return convertLayers;
175 }
176 
177 std::vector<ConvertFp32ToBf16Layer*> InsertConvertFp32ToBf16LayersAfter(Graph& graph, Layer& layer)
178 {
179  const unsigned int numOutputSlots = layer.GetNumOutputSlots();
180 
181  std::vector<ConvertFp32ToBf16Layer*> convertLayers;
182  convertLayers.reserve(numOutputSlots);
183 
184  // Update Bf16 output slots to FP32 on current layer
185  ChangeOutputBf16ToFp32(layer);
186 
187  // Insert a ConvertFp32ToBf16Layer after each FP32 output slot
188  for (unsigned int slotIndex = 0u; slotIndex < numOutputSlots; ++slotIndex)
189  {
190  OutputSlot& outputSlot = layer.GetOutputSlot(slotIndex);
191  if(outputSlot.GetTensorInfo().GetDataType() == DataType::Float32)
192  {
193  const std::string name =
194  std::string("convert_fp32_to_bf16-" + std::to_string(slotIndex) + "-") + layer.GetName();
195  ConvertFp32ToBf16Layer* convertLayer =
196  graph.InsertNewLayer<ConvertFp32ToBf16Layer>(outputSlot, name.c_str());
197 
198  TensorInfo convertInfo = convertLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo();
199  convertInfo.SetDataType(DataType::BFloat16);
200 
201  convertLayer->GetOutputSlot().SetTensorInfo(convertInfo);
202 
203  convertLayers.emplace_back(convertLayer);
204  }
205  }
206 
207  return convertLayers;
208 }
209 
210 std::vector<ConvertFp32ToFp16Layer*> InsertConvertFp32ToFp16LayersAfter(Graph& graph, Layer& layer)
211 {
212  const unsigned int numOutputSlots = layer.GetNumOutputSlots();
213 
214  std::vector<ConvertFp32ToFp16Layer*> convertLayers;
215  convertLayers.reserve(numOutputSlots);
216 
217  // Update FP16 output slots to FP32 on current layer
218  ChangeOutputFp16ToFp32(layer);
219 
220  // Insert a ConvertFp32ToFp16Layer after each FP32 output slot
221  for (unsigned int slotIndex = 0u; slotIndex < numOutputSlots; ++slotIndex)
222  {
223  OutputSlot& outputSlot = layer.GetOutputSlot(slotIndex);
224  if(outputSlot.GetTensorInfo().GetDataType() == DataType::Float32)
225  {
226  const std::string name =
227  std::string("convert_fp32_to_fp16-" + std::to_string(slotIndex) + "-") + layer.GetName();
228  ConvertFp32ToFp16Layer* convertLayer =
229  graph.InsertNewLayer<ConvertFp32ToFp16Layer>(outputSlot, name.c_str());
230 
231  TensorInfo convertInfo = convertLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo();
232  convertInfo.SetDataType(DataType::Float16);
233 
234  convertLayer->GetOutputSlot().SetTensorInfo(convertInfo);
235 
236  convertLayers.emplace_back(convertLayer);
237  }
238  }
239 
240  return convertLayers;
241 }
242 
243 std::vector<DebugLayer*> InsertDebugLayerAfter(Graph& graph, Layer& layer)
244 {
245  std::vector<DebugLayer*> debugLayers;
246  debugLayers.reserve(layer.GetNumOutputSlots());
247 
248  // Connect a DebugLayer to each output slot of the layer
249  uint32_t outputSlotIdx = 0;
250  for (auto outputSlot = layer.BeginOutputSlots(); outputSlot != layer.EndOutputSlots(); ++outputSlot)
251  {
252  const std::string debugName = std::string("DebugLayerAfter") + layer.GetNameStr() + "_" +
253  std::to_string(outputSlotIdx);
254 
255  DebugLayer* debugLayer =
256  graph.InsertNewLayer<DebugLayer>(*outputSlot, debugName.c_str());
257 
258  // Sets output tensor info for the debug layer.
259  ARMNN_ASSERT(debugLayer->GetInputSlot(0).GetConnectedOutputSlot() == &(*outputSlot));
260  TensorInfo debugInfo = debugLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo();
261 
262  debugLayer->GetOutputSlot().SetTensorInfo(debugInfo);
263 
264  // NOTE: It is OK to do this because DebugLayer is only supported on CpuRef
265  debugLayer->SetBackendId(Compute::CpuRef);
266 
267  debugLayers.emplace_back(debugLayer);
268 
269  ++outputSlotIdx;
270  }
271 
272  return debugLayers;
273 }
274 
275 } // namespace armnn
std::vector< InputSlot >::iterator EndInputSlots()
Definition: Layer.hpp:248
std::vector< ConvertFp32ToFp16Layer * > InsertConvertFp32ToFp16LayersAfter(Graph &graph, Layer &layer)
std::vector< ConvertFp16ToFp32Layer * > InsertConvertFp16ToFp32LayersBefore(Graph &graph, Layer &layer, bool expectCorrectInputType)
unsigned int GetNumInputSlots() const override
Returns the number of connectable input slots.
Definition: Layer.hpp:319
CPU Execution: Reference C++ kernels.
This layer converts data type Float 16 to Float 32.
This layer converts data type BFloat16 to Float32.
Copyright (c) 2021 ARM Limited and Contributors.
void SetBackendId(const BackendId &id)
Definition: Layer.hpp:276
unsigned int GetNumOutputSlots() const override
Returns the number of connectable output slots.
Definition: Layer.hpp:320
std::vector< InputSlot >::iterator BeginInputSlots()
Definition: Layer.hpp:247
const InputSlot & GetInputSlot(unsigned int index) const override
Get a const input slot handle by slot index.
Definition: Layer.hpp:322
DataType GetDataType() const
Definition: Tensor.hpp:198
const std::string & GetNameStr() const
Definition: Layer.hpp:225
LayerType GetType() const override
Returns the armnn::LayerType of this layer.
Definition: Layer.hpp:271
std::vector< ConvertBf16ToFp32Layer * > InsertConvertBf16ToFp32LayersBefore(Graph &graph, Layer &layer, bool expectCorrectInputType)
This layer visualizes the data flowing through the network.
Definition: DebugLayer.hpp:13
#define ARMNN_ASSERT(COND)
Definition: Assert.hpp:14
const OutputSlot * GetConnectedOutputSlot() const
Definition: Layer.hpp:56
void SetDataType(DataType type)
Definition: Tensor.hpp:199
std::vector< ConvertFp32ToBf16Layer * > InsertConvertFp32ToBf16LayersAfter(Graph &graph, Layer &layer)
This layer converts data type Float 32 to Float 16.
std::vector< OutputSlot >::iterator BeginOutputSlots()
Definition: Layer.hpp:251
std::vector< ConvertFp32ToBf16Layer * > InsertConvertFp32ToBf16LayersBefore(Graph &graph, Layer &layer, bool expectCorrectInputType)
void SetTensorInfo(const TensorInfo &tensorInfo) override
Definition: Layer.cpp:87
std::vector< OutputSlot >::iterator EndOutputSlots()
Definition: Layer.hpp:252
const OutputSlot & GetOutputSlot(unsigned int index=0) const override
Get the const output slot handle by slot index.
Definition: Layer.hpp:324
const char * GetName() const override
Returns the name of the layer.
Definition: Layer.hpp:317
This layer converts data type Float32 to BFloat16.
LayerT * InsertNewLayer(InputSlot &insertBefore, Args &&... args)
Inserts a new layer between the output slot currently connected to insertBefore and insertBefore itse...
Definition: Graph.hpp:440
std::vector< DebugLayer * > InsertDebugLayerAfter(Graph &graph, Layer &layer)
const TensorInfo & GetTensorInfo() const override
Definition: Layer.cpp:92