ArmNN
 20.05
DynamicQuantizationVisitor.cpp
Go to the documentation of this file.
1 //
2 // Copyright © 2017 Arm Ltd. All rights reserved.
3 // SPDX-License-Identifier: MIT
4 //
5 
7 #include "NetworkUtils.hpp"
8 
9 #include <armnn/Descriptors.hpp>
12 #include <armnn/Types.hpp>
13 
14 #include <limits>
15 
16 namespace armnn
17 {
18 
20  : m_RangeTracker(rangeTracker),
21  m_Graph(graph)
22 {}
23 
24 void DynamicQuantizationVisitor::SetRange(const IConnectableLayer* layer, unsigned int outputIdx, float min, float max)
25 {
26  m_RangeTracker.SetRange(layer, outputIdx, min, max);
27 }
28 
29 void DynamicQuantizationVisitor::ForwardParentParameters(const IConnectableLayer* layer)
30 {
31  for (unsigned int i = 0; i < layer->GetNumInputSlots(); ++i)
32  {
33  const IOutputSlot *outputSlot = layer->GetInputSlot(i).GetConnection();
34  LayerGuid previousLayerId = outputSlot->GetOwningLayerGuid();
35  unsigned int ownerIndex = outputSlot->CalculateIndexOnOwner();
36  const auto parentRange = m_RangeTracker.GetRange(previousLayerId, ownerIndex);
37  SetRange(layer, i, parentRange.first, parentRange.second);
38  }
39 }
40 
41 void DynamicQuantizationVisitor::AddToCalibratedLayers(const IConnectableLayer* layer)
42 {
43  m_LayersToCalibrate.push_back(layer);
44 }
45 
46 void DynamicQuantizationVisitor::AddToNonCalibratedLayers(const IConnectableLayer* layer)
47 {
48  m_LayersNotToCalibrate.push_back(layer);
49 }
50 
52 {
53  for (const IConnectableLayer* layer : m_LayersToCalibrate)
54  {
55  std::vector<DebugLayer*> newDebugLayers = InsertDebugLayerAfter(
56  m_Graph, *PolymorphicDowncast<Layer*>(const_cast<IConnectableLayer*>(layer)));
57  // record them so we can take them out again efficiently afterward
58  m_DebugLayers.insert(std::end(m_DebugLayers), std::begin(newDebugLayers), std::end(newDebugLayers));
59  }
60 }
61 
62 void DynamicQuantizationVisitor::RemoveDebugLayers()
63 {
64  for (DebugLayer* debugLayer : m_DebugLayers)
65  {
66  OutputSlot& proceedingOutputSlot = *debugLayer->GetInputSlot(0).GetConnectedOutputSlot();
67  proceedingOutputSlot.Disconnect(debugLayer->GetInputSlot(0));
68 
69  for (InputSlot* succeedingInputSlot : debugLayer->GetOutputSlot(0).GetConnections())
70  {
71  debugLayer->GetOutputSlot(0).Disconnect(*succeedingInputSlot);
72  proceedingOutputSlot.Connect(*succeedingInputSlot);
73  }
74  m_Graph.EraseLayer(debugLayer);
75  }
76  m_DebugLayers.clear();
77 }
78 
80  RemoveDebugLayers();
81  for (const IConnectableLayer* layer : m_LayersNotToCalibrate)
82  {
83  ForwardParentParameters(layer);
84  }
85 }
86 
88  const char* name)
89 {
90  IgnoreUnused(name);
91  SetRange(layer, 0, -20.f, 20.f);
92  AddToCalibratedLayers(layer);
93 }
94 
96  const char* name)
97 {
98  IgnoreUnused(name);
99  SetRange(layer, 0, -20.f, 20.f);
100  AddToCalibratedLayers(layer);
101 }
102 
104  const ArgMinMaxDescriptor& desc,
105  const char* name)
106 {
107  IgnoreUnused(name);
108  IgnoreUnused(desc);
109  SetRange(layer, 0, -20.f, 20.f);
110  AddToCalibratedLayers(layer);
111 }
112 
114  const BatchNormalizationDescriptor& desc,
115  const ConstTensor& mean,
116  const ConstTensor& variance,
117  const ConstTensor& beta,
118  const ConstTensor& gamma,
119  const char* name)
120 {
121  IgnoreUnused(desc);
122  IgnoreUnused(mean);
123  IgnoreUnused(variance);
124  IgnoreUnused(beta);
125  IgnoreUnused(gamma);
126  IgnoreUnused(name);
127  SetRange(layer, 0, -15.0f, 15.0f);
128  AddToCalibratedLayers(layer);
129 }
130 
132  const NormalizationDescriptor& desc,
133  const char* name)
134 {
135  IgnoreUnused(desc);
136  IgnoreUnused(name);
137  SetRange(layer, 0, -15.0f, 15.0f);
138  AddToCalibratedLayers(layer);
139 }
140 
142  const Convolution2dDescriptor& convolution2dDescriptor,
143  const ConstTensor& weights,
144  const Optional<ConstTensor>& biases,
145  const char* name)
146 {
147  IgnoreUnused(convolution2dDescriptor);
148  IgnoreUnused(weights);
149  IgnoreUnused(biases);
150  IgnoreUnused(name);
151  SetRange(layer, 0, -15.0f, 15.0f);
152  AddToCalibratedLayers(layer);
153 }
154 
157  const ConstTensor& weights,
158  const Optional<ConstTensor>& biases,
159  const char* name)
160 {
161  IgnoreUnused(desc);
162  IgnoreUnused(weights);
163  IgnoreUnused(biases);
164  IgnoreUnused(name);
165  SetRange(layer, 0, -15.0f, 15.0f);
166  AddToCalibratedLayers(layer);
167 }
168 
170  const ActivationDescriptor& activationDescriptor,
171  const char* name)
172 {
173  IgnoreUnused(name, activationDescriptor);
174  switch (activationDescriptor.m_Function)
175  {
176  // Range is 0, 15 for Abs, Linear, ReLu and Soft ReLu
181  SetRange(layer, 0, 0.f, 15.f);
182  break;
184  SetRange(layer, 0, 0.f, activationDescriptor.m_A);
185  break;
187  SetRange(layer, 0, -1.f, 1.f);
188  break;
190  SetRange(layer, 0, -5.f, 15.f);
191  break;
192  default:
193  SetRange(layer, 0, -15.f, 15.f);
194  break;
195  }
196  AddToCalibratedLayers(layer);
197 }
198 
200  const FullyConnectedDescriptor& desc,
201  const ConstTensor& weights,
202  const Optional<ConstTensor>& biases,
203  const char *name)
204 {
205  IgnoreUnused(desc);
206  IgnoreUnused(weights);
207  IgnoreUnused(biases);
208  IgnoreUnused(name);
209  SetRange(layer, 0, -15.0f, 15.0f);
210  AddToCalibratedLayers(layer);
211 }
212 
214  const PermuteDescriptor& permuteDescriptor,
215  const char* name)
216 {
217  IgnoreUnused(permuteDescriptor);
218  IgnoreUnused(name);
219  AddToNonCalibratedLayers(layer);
220 }
221 
223  const SpaceToBatchNdDescriptor& spaceToBatchNdDescriptor,
224  const char* name)
225 {
226  IgnoreUnused(spaceToBatchNdDescriptor);
227  IgnoreUnused(name);
228  AddToNonCalibratedLayers(layer);
229 }
230 
232  const Pooling2dDescriptor& pooling2dDescriptor,
233  const char* name)
234 {
235  IgnoreUnused(pooling2dDescriptor);
236  IgnoreUnused(name);
237  AddToNonCalibratedLayers(layer);
238 }
239 
241  const SoftmaxDescriptor& softmaxDescriptor,
242  const char* name)
243 {
244  IgnoreUnused(softmaxDescriptor);
245  IgnoreUnused(name);
246  SetRange(layer, 0, 0.f, 1.f);
247  AddToCalibratedLayers(layer);
248 }
249 
251  const ConstTensor& input,
252  const char* name)
253 {
254  IgnoreUnused(name);
255 
256  if (input.GetDataType() != DataType::Float32)
257  {
258  throw InvalidArgumentException("Quantization is supported only for FP32 tensors");
259  }
260 
261  // Work out the range based on the input constants
262  unsigned int inputNumElements = input.GetNumElements();
263  const float* inputData = reinterpret_cast<const float*>(input.GetMemoryArea());
264 
265  float min = std::numeric_limits<float>::max();
266  float max = std::numeric_limits<float>::lowest();
267 
268  for (unsigned int i = 0; i < inputNumElements; i++)
269  {
270  const float inputValue = inputData[i];
271 
272  min = std::min(min, inputValue);
273  max = std::max(max, inputValue);
274  }
275  SetRange(layer, 0, min, max);
276 }
277 
279  const ConcatDescriptor& originsDescriptor,
280  const char* name)
281 {
282  IgnoreUnused(name);
283  IgnoreUnused(originsDescriptor);
284  float min = std::numeric_limits<float>::max();
285  float max = std::numeric_limits<float>::lowest();
286  for (unsigned int i = 0; i < layer->GetNumInputSlots(); ++i)
287  {
288  const IOutputSlot* outputSlot = layer->GetInputSlot(i).GetConnection();
289  LayerGuid layerId = outputSlot->GetOwningLayerGuid();
290  unsigned int slotIndex = outputSlot->CalculateIndexOnOwner();
291  RangeTracker::MinMaxRange range = m_RangeTracker.GetRange(layerId, slotIndex);
292  min = std::min(min, range.first);
293  max = std::max(max, range.second);
294  }
295  SetRange(layer, 0, min, max);
296  AddToCalibratedLayers(layer);
297 }
298 
300  const ReshapeDescriptor& reshapeDescriptor,
301  const char* name)
302 {
303  IgnoreUnused(reshapeDescriptor);
304  IgnoreUnused(name);
305  AddToNonCalibratedLayers(layer);
306 }
307 
309  const SplitterDescriptor& splitterDescriptor,
310  const char* name)
311 {
312  IgnoreUnused(splitterDescriptor);
313  IgnoreUnused(name);
314  AddToNonCalibratedLayers(layer);
315 }
316 
318  const ResizeBilinearDescriptor& resizeDesc,
319  const char* name)
320 {
321  IgnoreUnused(resizeDesc);
322  IgnoreUnused(name);
323  AddToNonCalibratedLayers(layer);
324 }
325 
327  const StridedSliceDescriptor& stridedSliceDescriptor,
328  const char* name)
329 {
330  IgnoreUnused(stridedSliceDescriptor);
331  IgnoreUnused(name);
332  AddToNonCalibratedLayers(layer);
333 }
334 
336  const BatchToSpaceNdDescriptor& batchToSpaceNdDescriptor,
337  const char* name)
338 {
339  IgnoreUnused(batchToSpaceNdDescriptor);
340  IgnoreUnused(name);
341  AddToNonCalibratedLayers(layer);
342 }
343 
345 {
346  IgnoreUnused(id);
347  IgnoreUnused(name);
348  SetRange(layer, 0, -0.0f, 0.0f);
349  AddToCalibratedLayers(layer);
350 }
351 
353 {
354  IgnoreUnused(id);
355  IgnoreUnused(name);
356  AddToNonCalibratedLayers(layer);
357  m_OutputLayers.push_back(id);
358 }
359 
360 const std::vector<LayerBindingId>& DynamicQuantizationVisitor::GetOutputLayers()
361 {
362  return m_OutputLayers;
363 }
364 
365 } //namespace armnn
void VisitReshapeLayer(const IConnectableLayer *layer, const ReshapeDescriptor &reshapeDescriptor, const char *name=nullptr) override
Function a reshape layer should call back to when its Accept(ILayerVisitor&) function is invoked...
void VisitBatchNormalizationLayer(const IConnectableLayer *layer, const BatchNormalizationDescriptor &desc, const ConstTensor &mean, const ConstTensor &variance, const ConstTensor &beta, const ConstTensor &gamma, const char *name=nullptr) override
Function that a batch normalization layer should call back to when its Accept(ILayerVisitor&) functio...
void VisitNormalizationLayer(const IConnectableLayer *layer, const NormalizationDescriptor &desc, const char *name=nullptr) override
Function that a normalization layer should call back to when its Accept(ILayerVisitor&) function is i...
void VisitPermuteLayer(const IConnectableLayer *layer, const PermuteDescriptor &permuteDescriptor, const char *name) override
Function that a permute layer should call back to when its Accept(ILayerVisitor&) function is invoked...
A ViewsDescriptor for the SplitterLayer.
Interface for a layer that is connectable to other layers via InputSlots and OutputSlots.
Definition: INetwork.hpp:61
virtual unsigned int GetNumInputSlots() const =0
Returns the number of connectable input slots.
void VisitConstantLayer(const IConnectableLayer *layer, const ConstTensor &input, const char *name=nullptr) override
Function a layer with no inputs and a single output, which always corresponds to the passed in consta...
A ReshapeDescriptor for the ReshapeLayer.
void VisitConcatLayer(const IConnectableLayer *layer, const ConcatDescriptor &originsDescriptor, const char *name=nullptr) override
Function that a concat layer should call back to when its Accept(ILayerVisitor&) function is invoked...
void VisitSpaceToBatchNdLayer(const IConnectableLayer *layer, const SpaceToBatchNdDescriptor &spaceToBatchNdDescriptor, const char *name=nullptr) override
Function a space to batch layer should call back to when its Accept(ILayerVisitor&) function is invok...
A Convolution2dDescriptor for the Convolution2dLayer.
int Connect(InputSlot &destination)
Definition: Layer.cpp:79
void EraseLayer(Iterator pos)
Deletes the layer at the specified position.
Definition: Graph.hpp:443
void VisitBatchToSpaceNdLayer(const IConnectableLayer *layer, const BatchToSpaceNdDescriptor &batchToSpaceNdDescriptor, const char *name=nullptr) override
Function that a batch to space ND layer should call back to when its Accept(ILayerVisitor&) function ...
unsigned int GetNumElements() const
Definition: Tensor.hpp:175
void VisitResizeBilinearLayer(const IConnectableLayer *layer, const ResizeBilinearDescriptor &resizeDesc, const char *name=nullptr) override
Function that a resize bilinear layer should call back to when its Accept(ILayerVisitor&) function is...
MemoryType GetMemoryArea() const
Definition: Tensor.hpp:177
Copyright (c) 2020 ARM Limited.
void IgnoreUnused(Ts &&...)
void VisitConvolution2dLayer(const IConnectableLayer *layer, const Convolution2dDescriptor &convolution2dDescriptor, const ConstTensor &weights, const Optional< ConstTensor > &biases, const char *name=nullptr) override
Function that a 2D convolution layer should call back to when its Accept(ILayerVisitor&) function is ...
void VisitSoftmaxLayer(const IConnectableLayer *layer, const SoftmaxDescriptor &softmaxDescriptor, const char *name=nullptr) override
Function that a softmax layer should call back to when its Accept(ILayerVisitor&) function is invoked...
A BatchToSpaceNdDescriptor for the BatchToSpaceNdLayer.
void VisitFullyConnectedLayer(const IConnectableLayer *layer, const FullyConnectedDescriptor &desc, const ConstTensor &weights, const Optional< ConstTensor > &biases, const char *name) override
Function that a fully connected layer should call back to when its Accept(ILayerVisitor&) function is...
std::pair< float, float > MinMaxRange
void Disconnect(InputSlot &slot)
Definition: Layer.cpp:87
int LayerBindingId
Type of identifiers for bindable layers (inputs, outputs).
Definition: Types.hpp:171
MinMaxRange GetRange(LayerGuid guid, unsigned int idx) const
Retrieve the Range for a particular output slot on a particular layer.
void VisitDepthwiseConvolution2dLayer(const IConnectableLayer *layer, const DepthwiseConvolution2dDescriptor &desc, const ConstTensor &weights, const Optional< ConstTensor > &biases, const char *name=nullptr) override
Function that a 2D depthwise convolution layer with biases should call back to when its Accept(ILayer...
DynamicQuantizationVisitor(RangeTracker &rangeTracker, Graph &graph)
void SetRange(const IConnectableLayer *layer, unsigned int outputIdx, float min, float max)
Set the range for an output slot on a layer.
void VisitActivationLayer(const IConnectableLayer *layer, const ActivationDescriptor &activationDescriptor, const char *name=nullptr) override
Function that an activation layer should call back to when its Accept(ILayerVisitor&) function is inv...
void VisitAdditionLayer(const IConnectableLayer *layer, const char *name=nullptr) override
Function that an addition layer should call back to when its Accept(ILayerVisitor&) function is invok...
An output connection slot for a layer.
Definition: INetwork.hpp:37
An ArgMinMaxDescriptor for ArgMinMaxLayer.
Definition: Descriptors.hpp:51
An OriginsDescriptor for the ConcatLayer.
A FullyConnectedDescriptor for the FullyConnectedLayer.
A tensor defined by a TensorInfo (shape and data type) and an immutable backing store.
Definition: Tensor.hpp:199
This layer visualizes the data flowing through the network.
Definition: DebugLayer.hpp:13
virtual unsigned int CalculateIndexOnOwner() const =0
void VisitArgMinMaxLayer(const IConnectableLayer *layer, const ArgMinMaxDescriptor &desc, const char *name=nullptr) override
Function that an arg min max layer should call back to when its Accept(ILayerVisitor&) function is in...
An ActivationDescriptor for the ActivationLayer.
Definition: Descriptors.hpp:20
min(a, max(b, input)) ReLu1 & ReLu6.
A SpaceToBatchNdDescriptor for the SpaceToBatchNdLayer.
float m_A
Alpha upper bound value used by the activation functions. (BoundedReLu, Linear, TanH, Elu).
Definition: Descriptors.hpp:45
void VisitAbsLayer(const IConnectableLayer *layer, const char *name=nullptr) override
Functions to set the Range on a per-layer-type basis.
virtual const IInputSlot & GetInputSlot(unsigned int index) const =0
Get a const input slot handle by slot index.
virtual const IOutputSlot * GetConnection() const =0
void VisitInputLayer(const IConnectableLayer *layer, LayerBindingId id, const char *name=nullptr) override
Function that an InputLayer should call back to when its Accept(ILayerVisitor&) function is invoked...
A StridedSliceDescriptor for the StridedSliceLayer.
DataType GetDataType() const
Definition: Tensor.hpp:172
virtual LayerGuid GetOwningLayerGuid() const =0
A Pooling2dDescriptor for the Pooling2dLayer.
void VisitPooling2dLayer(const IConnectableLayer *layer, const Pooling2dDescriptor &pooling2dDescriptor, const char *name) override
Function that a pooling layer should call back to when its Accept(ILayerVisitor&) function is invoked...
A NormalizationDescriptor for the NormalizationLayer.
void VisitOutputLayer(const IConnectableLayer *layer, LayerBindingId id, const char *name=nullptr) override
Function an output layer should call back to when its Accept(ILayerVisitor&) function is invoked...
std::vector< DebugLayer * > InsertDebugLayerAfter(Graph &graph, Layer &layer)
A ResizeBilinearDescriptor for the ResizeBilinearLayer.
void VisitSplitterLayer(const IConnectableLayer *layer, const SplitterDescriptor &splitterDescriptor, const char *name=nullptr) override
Function that a splitter layer should call back to when its Accept(ILayerVisitor&) function is invoke...
A SoftmaxDescriptor for the SoftmaxLayer.
void VisitStridedSliceLayer(const IConnectableLayer *layer, const StridedSliceDescriptor &stridedSliceDescriptor, const char *name=nullptr) override
Function a strided slice layer should call back to when its Accept(ILayerVisitor&) function is invoke...
const std::vector< armnn::LayerBindingId > & GetOutputLayers()
ActivationFunction m_Function
The activation function to use (Sigmoid, TanH, Linear, ReLu, BoundedReLu, SoftReLu, LeakyReLu, Abs, Sqrt, Square, Elu).
Definition: Descriptors.hpp:43
A DepthwiseConvolution2dDescriptor for the DepthwiseConvolution2dLayer.
A BatchNormalizationDescriptor for the BatchNormalizationLayer.
A PermuteDescriptor for the PermuteLayer.