ArmNN
 21.02
DynamicQuantizationStrategy.cpp
Go to the documentation of this file.
1 //
2 // Copyright © 2021 Arm Ltd and Contributors. All rights reserved.
3 // SPDX-License-Identifier: MIT
4 //
5 
7 #include "NetworkUtils.hpp"
8 
9 #include <armnn/Descriptors.hpp>
12 #include <armnn/Types.hpp>
13 
14 #include <limits>
15 
16 namespace armnn
17 {
19  : m_RangeTracker(rangeTracker),
20  m_Graph(graph)
21 {}
22 
23 void DynamicQuantizationStrategy::SetRange(const IConnectableLayer* layer, unsigned int outputIdx, float min, float max)
24 {
25  m_RangeTracker.SetRange(layer, outputIdx, min, max);
26 }
27 
28 void DynamicQuantizationStrategy::ForwardParentParameters(const IConnectableLayer* layer)
29 {
30  for (unsigned int i = 0; i < layer->GetNumInputSlots(); ++i)
31  {
32  const IOutputSlot *outputSlot = layer->GetInputSlot(i).GetConnection();
33  LayerGuid previousLayerId = outputSlot->GetOwningLayerGuid();
34  unsigned int ownerIndex = outputSlot->CalculateIndexOnOwner();
35  const auto parentRange = m_RangeTracker.GetRange(previousLayerId, ownerIndex);
36  SetRange(layer, i, parentRange.first, parentRange.second);
37  }
38 }
39 
40 void DynamicQuantizationStrategy::AddToCalibratedLayers(const IConnectableLayer* layer)
41 {
42  m_LayersToCalibrate.push_back(layer);
43 }
44 
45 void DynamicQuantizationStrategy::AddToNonCalibratedLayers(const IConnectableLayer* layer)
46 {
47  m_LayersNotToCalibrate.push_back(layer);
48 }
49 
51 {
52  for (const IConnectableLayer* layer : m_LayersToCalibrate)
53  {
54  std::vector<DebugLayer*> newDebugLayers = InsertDebugLayerAfter(
55  m_Graph, *PolymorphicDowncast<Layer*>(const_cast<IConnectableLayer*>(layer)));
56  // record them so we can take them out again efficiently afterward
57  m_DebugLayers.insert(std::end(m_DebugLayers), std::begin(newDebugLayers), std::end(newDebugLayers));
58  }
59 }
60 
61 void DynamicQuantizationStrategy::RemoveDebugLayers()
62 {
63  for (DebugLayer* debugLayer : m_DebugLayers)
64  {
65  OutputSlot& proceedingOutputSlot = *debugLayer->GetInputSlot(0).GetConnectedOutputSlot();
66  proceedingOutputSlot.Disconnect(debugLayer->GetInputSlot(0));
67 
68  for (InputSlot* succeedingInputSlot : debugLayer->GetOutputSlot(0).GetConnections())
69  {
70  debugLayer->GetOutputSlot(0).Disconnect(*succeedingInputSlot);
71  proceedingOutputSlot.Connect(*succeedingInputSlot);
72  }
73  m_Graph.EraseLayer(debugLayer);
74  }
75  m_DebugLayers.clear();
76 }
77 
79  RemoveDebugLayers();
80  for (const IConnectableLayer* layer : m_LayersNotToCalibrate)
81  {
82  ForwardParentParameters(layer);
83  }
84 }
85 
86 
88  const BaseDescriptor& descriptor,
89  const std::vector<armnn::ConstTensor>& constants,
90  const char* name,
91  const armnn::LayerBindingId id)
92 {
93  IgnoreUnused(name);
94  IgnoreUnused(id);
95  IgnoreUnused(descriptor);
96 
97  switch (layer->GetType())
98  {
100  {
101  const ActivationDescriptor& activationDescriptor = static_cast<const ActivationDescriptor&>(descriptor);
102  switch (activationDescriptor.m_Function)
103  {
104  // Range is 0, 15 for Abs, Linear, ReLu and Soft ReLu
109  SetRange(layer, 0, 0.f, 15.f);
110  break;
112  SetRange(layer, 0, 0.f, activationDescriptor.m_A);
113  break;
115  SetRange(layer, 0, -1.f, 1.f);
116  break;
118  SetRange(layer, 0, -5.f, 15.f);
119  break;
120  default:
121  SetRange(layer, 0, -15.f, 15.f);
122  break;
123  }
124  break;
125  }
127  {
128  SetRange(layer, 0, -20.f, 20.f);
129  AddToCalibratedLayers(layer);
130  break;
131  }
133  {
134  AddToNonCalibratedLayers(layer);
135  break;
136  }
138  {
139  SetRange(layer, 0, -15.0f, 15.0f);
140  AddToCalibratedLayers(layer);
141  break;
142  }
144  {
145  SetRange(layer, 0, -15.0f, 15.0f);
146  AddToCalibratedLayers(layer);
147  break;
148  }
150  {
151  SetRange(layer, 0, -15.0f, 15.0f);
152  AddToCalibratedLayers(layer);
153  break;
154  }
156  {
157  SetRange(layer, 0, -15.0f, 15.0f);
158  AddToCalibratedLayers(layer);
159  break;
160  }
162  {
163  SetRange(layer, 0, -15.0f, 15.0f);
164  AddToCalibratedLayers(layer);
165  break;
166  }
168  {
169  AddToNonCalibratedLayers(layer);
170  break;
171  }
173  {
174  AddToNonCalibratedLayers(layer);
175  break;
176  }
178  {
179  AddToNonCalibratedLayers(layer);
180  break;
181  }
183  {
184  SetRange(layer, 0, 0.f, 1.f);
185  AddToCalibratedLayers(layer);
186  break;
187  }
189  {
190  if (constants[0].GetDataType() != DataType::Float32)
191  {
192  throw InvalidArgumentException("Quantization is supported only for FP32 tensors");
193  }
194 
195  // Work out the range based on the input constants
196  unsigned int inputNumElements = constants[0].GetNumElements();
197  const float* inputData = reinterpret_cast<const float*>(constants[0].GetMemoryArea());
198 
199  float min = std::numeric_limits<float>::max();
200  float max = std::numeric_limits<float>::lowest();
201 
202  for (unsigned int i = 0; i < inputNumElements; i++)
203  {
204  const float inputValue = inputData[i];
205 
206  min = std::min(min, inputValue);
207  max = std::max(max, inputValue);
208  }
209  SetRange(layer, 0, min, max);
210  break;
211  }
213  {
214  float min = std::numeric_limits<float>::max();
215  float max = std::numeric_limits<float>::lowest();
216  for (unsigned int i = 0; i < layer->GetNumInputSlots(); ++i)
217  {
218  const IOutputSlot* outputSlot = layer->GetInputSlot(i).GetConnection();
219  LayerGuid layerId = outputSlot->GetOwningLayerGuid();
220  unsigned int slotIndex = outputSlot->CalculateIndexOnOwner();
221  RangeTracker::MinMaxRange range = m_RangeTracker.GetRange(layerId, slotIndex);
222  min = std::min(min, range.first);
223  max = std::max(max, range.second);
224  }
225  SetRange(layer, 0, min, max);
226  AddToCalibratedLayers(layer);
227  break;
228  }
230  {
231  AddToNonCalibratedLayers(layer);
232  break;
233  }
235  {
236  AddToNonCalibratedLayers(layer);
237  break;
238  }
240  {
241  AddToNonCalibratedLayers(layer);
242  break;
243  }
245  {
246  AddToNonCalibratedLayers(layer);
247  break;
248  }
250  {
251  AddToNonCalibratedLayers(layer);
252  break;
253  }
255  {
256  SetRange(layer, 0, -0.0f, 0.0f);
257  AddToCalibratedLayers(layer);
258  break;
259  }
261  {
262  AddToNonCalibratedLayers(layer);
263  m_OutputLayers.push_back(id);
264  break;
265  }
266  default:
267  {}
268  }
269 }
270 
271 const std::vector<LayerBindingId>& DynamicQuantizationStrategy::GetOutputLayers()
272 {
273  return m_OutputLayers;
274 }
275 
276 } //namespace armnn
Interface for a layer that is connectable to other layers via InputSlots and OutputSlots.
Definition: INetwork.hpp:62
virtual unsigned int GetNumInputSlots() const =0
Returns the number of connectable input slots.
int Connect(InputSlot &destination)
Definition: Layer.cpp:83
void EraseLayer(Iterator pos)
Deletes the layer at the specified position.
Definition: Graph.hpp:449
Copyright (c) 2021 ARM Limited and Contributors.
void IgnoreUnused(Ts &&...)
std::pair< float, float > MinMaxRange
void Disconnect(InputSlot &slot)
Definition: Layer.cpp:91
int LayerBindingId
Type of identifiers for bindable layers (inputs, outputs).
Definition: Types.hpp:210
MinMaxRange GetRange(LayerGuid guid, unsigned int idx) const
Retrieve the Range for a particular output slot on a particular layer.
Base class for all descriptors.
Definition: Descriptors.hpp:22
virtual void ExecuteStrategy(const armnn::IConnectableLayer *layer, const armnn::BaseDescriptor &descriptor, const std::vector< armnn::ConstTensor > &constants, const char *name, const armnn::LayerBindingId id=0) override
DynamicQuantizationStrategy(RangeTracker &rangeTracker, Graph &graph)
void SetRange(const IConnectableLayer *layer, unsigned int outputIdx, float min, float max)
Set the range for an output slot on a layer.
An output connection slot for a layer.
Definition: INetwork.hpp:38
This layer visualizes the data flowing through the network.
Definition: DebugLayer.hpp:13
virtual unsigned int CalculateIndexOnOwner() const =0
An ActivationDescriptor for the ActivationLayer.
Definition: Descriptors.hpp:25
min(a, max(b, input)) ReLu1 & ReLu6.
virtual LayerType GetType() const =0
Returns the armnn::LayerType of this layer.
float m_A
Alpha upper bound value used by the activation functions. (BoundedReLu, Linear, TanH, Elu).
Definition: Descriptors.hpp:50
virtual const IInputSlot & GetInputSlot(unsigned int index) const =0
Get a const input slot handle by slot index.
virtual const IOutputSlot * GetConnection() const =0
virtual LayerGuid GetOwningLayerGuid() const =0
const std::vector< armnn::LayerBindingId > & GetOutputLayers()
std::vector< DebugLayer * > InsertDebugLayerAfter(Graph &graph, Layer &layer)
ActivationFunction m_Function
The activation function to use (Sigmoid, TanH, Linear, ReLu, BoundedReLu, SoftReLu, LeakyReLu, Abs, Sqrt, Square, Elu).
Definition: Descriptors.hpp:48