ArmNN
 20.02
DynamicQuantizationVisitor.cpp
Go to the documentation of this file.
1 //
2 // Copyright © 2017 Arm Ltd. All rights reserved.
3 // SPDX-License-Identifier: MIT
4 //
5 
7 #include "NetworkUtils.hpp"
8 
10 #include <armnn/Descriptors.hpp>
11 #include <armnn/Types.hpp>
12 
13 #include <limits>
14 
15 namespace armnn
16 {
17 
19  : m_RangeTracker(rangeTracker),
20  m_Graph(graph)
21 {}
22 
23 void DynamicQuantizationVisitor::SetRange(const IConnectableLayer* layer, unsigned int outputIdx, float min, float max)
24 {
25  m_RangeTracker.SetRange(layer, outputIdx, min, max);
26 }
27 
28 void DynamicQuantizationVisitor::ForwardParentParameters(const IConnectableLayer* layer)
29 {
30  for (unsigned int i = 0; i < layer->GetNumInputSlots(); ++i)
31  {
32  const IOutputSlot *outputSlot = layer->GetInputSlot(i).GetConnection();
33  LayerGuid previousLayerId = outputSlot->GetOwningLayerGuid();
34  unsigned int ownerIndex = outputSlot->CalculateIndexOnOwner();
35  const auto parentRange = m_RangeTracker.GetRange(previousLayerId, ownerIndex);
36  SetRange(layer, i, parentRange.first, parentRange.second);
37  }
38 }
39 
40 void DynamicQuantizationVisitor::AddToCalibratedLayers(const IConnectableLayer* layer)
41 {
42  m_LayersToCalibrate.push_back(layer);
43 }
44 
45 void DynamicQuantizationVisitor::AddToNonCalibratedLayers(const IConnectableLayer* layer)
46 {
47  m_LayersNotToCalibrate.push_back(layer);
48 }
49 
51 {
52  for (const IConnectableLayer* layer : m_LayersToCalibrate)
53  {
54  std::vector<DebugLayer*> newDebugLayers = InsertDebugLayerAfter(
55  m_Graph, *boost::polymorphic_downcast<Layer*>(const_cast<IConnectableLayer*>(layer)));
56  // record them so we can take them out again efficiently afterward
57  m_DebugLayers.insert(std::end(m_DebugLayers), std::begin(newDebugLayers), std::end(newDebugLayers));
58  }
59 }
60 
61 void DynamicQuantizationVisitor::RemoveDebugLayers()
62 {
63  for (DebugLayer* debugLayer : m_DebugLayers)
64  {
65  OutputSlot& proceedingOutputSlot = *debugLayer->GetInputSlot(0).GetConnectedOutputSlot();
66  proceedingOutputSlot.Disconnect(debugLayer->GetInputSlot(0));
67 
68  for (InputSlot* succeedingInputSlot : debugLayer->GetOutputSlot(0).GetConnections())
69  {
70  debugLayer->GetOutputSlot(0).Disconnect(*succeedingInputSlot);
71  proceedingOutputSlot.Connect(*succeedingInputSlot);
72  }
73  m_Graph.EraseLayer(debugLayer);
74  }
75  m_DebugLayers.clear();
76 }
77 
79  RemoveDebugLayers();
80  for (const IConnectableLayer* layer : m_LayersNotToCalibrate)
81  {
82  ForwardParentParameters(layer);
83  }
84 }
85 
87 {
88  IgnoreUnused(name);
89  SetRange(layer, 0, -20.f, 20.f);
90  AddToCalibratedLayers(layer);
91 }
92 
94  const BatchNormalizationDescriptor& desc,
95  const ConstTensor& mean,
96  const ConstTensor& variance,
97  const ConstTensor& beta,
98  const ConstTensor& gamma,
99  const char* name)
100 {
101  IgnoreUnused(desc);
102  IgnoreUnused(mean);
103  IgnoreUnused(variance);
104  IgnoreUnused(beta);
105  IgnoreUnused(gamma);
106  IgnoreUnused(name);
107  SetRange(layer, 0, -15.0f, 15.0f);
108  AddToCalibratedLayers(layer);
109 }
110 
112  const Convolution2dDescriptor& convolution2dDescriptor,
113  const ConstTensor& weights,
114  const Optional<ConstTensor>& biases,
115  const char* name)
116 {
117  IgnoreUnused(convolution2dDescriptor);
118  IgnoreUnused(weights);
119  IgnoreUnused(biases);
120  IgnoreUnused(name);
121  SetRange(layer, 0, -15.0f, 15.0f);
122  AddToCalibratedLayers(layer);
123 }
124 
127  const ConstTensor& weights,
128  const Optional<ConstTensor>& biases,
129  const char* name)
130 {
131  IgnoreUnused(desc);
132  IgnoreUnused(weights);
133  IgnoreUnused(biases);
134  IgnoreUnused(name);
135  SetRange(layer, 0, -15.0f, 15.0f);
136  AddToCalibratedLayers(layer);
137 }
138 
140  const ActivationDescriptor& activationDescriptor,
141  const char* name)
142 {
143  IgnoreUnused(name, activationDescriptor);
144  switch (activationDescriptor.m_Function)
145  {
146  // Range is 0, 15 for Abs, Linear, ReLu and Soft ReLu
151  SetRange(layer, 0, 0.f, 15.f);
152  break;
154  SetRange(layer, 0, 0.f, activationDescriptor.m_A);
155  break;
157  SetRange(layer, 0, -1.f, 1.f);
158  break;
160  SetRange(layer, 0, -5.f, 15.f);
161  break;
162  default:
163  SetRange(layer, 0, -15.f, 15.f);
164  break;
165  }
166  AddToCalibratedLayers(layer);
167 }
168 
170  const FullyConnectedDescriptor& desc,
171  const ConstTensor& weights,
172  const Optional<ConstTensor>& biases,
173  const char *name)
174 {
175  IgnoreUnused(desc);
176  IgnoreUnused(weights);
177  IgnoreUnused(biases);
178  IgnoreUnused(name);
179  SetRange(layer, 0, -15.0f, 15.0f);
180  AddToCalibratedLayers(layer);
181 }
182 
184  const PermuteDescriptor& permuteDescriptor,
185  const char* name)
186 {
187  IgnoreUnused(permuteDescriptor);
188  IgnoreUnused(name);
189  AddToNonCalibratedLayers(layer);
190 }
191 
193  const SpaceToBatchNdDescriptor& spaceToBatchNdDescriptor,
194  const char* name)
195 {
196  IgnoreUnused(spaceToBatchNdDescriptor);
197  IgnoreUnused(name);
198  AddToNonCalibratedLayers(layer);
199 }
200 
202  const Pooling2dDescriptor& pooling2dDescriptor,
203  const char* name)
204 {
205  IgnoreUnused(pooling2dDescriptor);
206  IgnoreUnused(name);
207  AddToNonCalibratedLayers(layer);
208 }
209 
211  const SoftmaxDescriptor& softmaxDescriptor,
212  const char* name)
213 {
214  IgnoreUnused(softmaxDescriptor);
215  IgnoreUnused(name);
216  SetRange(layer, 0, 0.f, 1.f);
217  AddToCalibratedLayers(layer);
218 }
219 
221  const ConstTensor& input,
222  const char* name)
223 {
224  IgnoreUnused(name);
225 
226  if (input.GetDataType() != DataType::Float32)
227  {
228  throw InvalidArgumentException("Quantization is supported only for FP32 tensors");
229  }
230 
231  // Work out the range based on the input constants
232  unsigned int inputNumElements = input.GetNumElements();
233  const float* inputData = reinterpret_cast<const float*>(input.GetMemoryArea());
234 
235  float min = std::numeric_limits<float>::max();
236  float max = std::numeric_limits<float>::lowest();
237 
238  for (unsigned int i = 0; i < inputNumElements; i++)
239  {
240  const float inputValue = inputData[i];
241 
242  min = std::min(min, inputValue);
243  max = std::max(max, inputValue);
244  }
245  SetRange(layer, 0, min, max);
246 }
247 
249  const ConcatDescriptor& originsDescriptor,
250  const char* name)
251 {
252  IgnoreUnused(name);
253  IgnoreUnused(originsDescriptor);
254  float min = std::numeric_limits<float>::max();
255  float max = std::numeric_limits<float>::lowest();
256  for (unsigned int i = 0; i < layer->GetNumInputSlots(); ++i)
257  {
258  const IOutputSlot* outputSlot = layer->GetInputSlot(i).GetConnection();
259  LayerGuid layerId = outputSlot->GetOwningLayerGuid();
260  unsigned int slotIndex = outputSlot->CalculateIndexOnOwner();
261  RangeTracker::MinMaxRange range = m_RangeTracker.GetRange(layerId, slotIndex);
262  min = std::min(min, range.first);
263  max = std::max(max, range.second);
264  }
265  SetRange(layer, 0, min, max);
266  AddToCalibratedLayers(layer);
267 }
268 
270  const ReshapeDescriptor& reshapeDescriptor,
271  const char* name)
272 {
273  IgnoreUnused(reshapeDescriptor);
274  IgnoreUnused(name);
275  AddToNonCalibratedLayers(layer);
276 }
277 
279  const SplitterDescriptor& splitterDescriptor,
280  const char* name)
281 {
282  IgnoreUnused(splitterDescriptor);
283  IgnoreUnused(name);
284  AddToNonCalibratedLayers(layer);
285 }
286 
288  const ResizeBilinearDescriptor& resizeDesc,
289  const char* name)
290 {
291  IgnoreUnused(resizeDesc);
292  IgnoreUnused(name);
293  AddToNonCalibratedLayers(layer);
294 }
295 
297  const StridedSliceDescriptor& stridedSliceDescriptor,
298  const char* name)
299 {
300  IgnoreUnused(stridedSliceDescriptor);
301  IgnoreUnused(name);
302  AddToNonCalibratedLayers(layer);
303 }
304 
306  const BatchToSpaceNdDescriptor& batchToSpaceNdDescriptor,
307  const char* name)
308 {
309  IgnoreUnused(batchToSpaceNdDescriptor);
310  IgnoreUnused(name);
311  AddToNonCalibratedLayers(layer);
312 }
313 
315 {
316  IgnoreUnused(id);
317  IgnoreUnused(name);
318  SetRange(layer, 0, -0.0f, 0.0f);
319  AddToCalibratedLayers(layer);
320 }
321 
323 {
324  IgnoreUnused(id);
325  IgnoreUnused(name);
326  AddToNonCalibratedLayers(layer);
327  m_OutputLayers.push_back(id);
328 }
329 
330 const std::vector<LayerBindingId>& DynamicQuantizationVisitor::GetOutputLayers()
331 {
332  return m_OutputLayers;
333 }
334 
335 } //namespace armnn
void VisitReshapeLayer(const IConnectableLayer *layer, const ReshapeDescriptor &reshapeDescriptor, const char *name=nullptr) override
Function a reshape layer should call back to when its Accept(ILayerVisitor&) function is invoked...
void VisitBatchNormalizationLayer(const IConnectableLayer *layer, const BatchNormalizationDescriptor &desc, const ConstTensor &mean, const ConstTensor &variance, const ConstTensor &beta, const ConstTensor &gamma, const char *name=nullptr) override
Function that a batch normalization layer should call back to when its Accept(ILayerVisitor&) functio...
void VisitPermuteLayer(const IConnectableLayer *layer, const PermuteDescriptor &permuteDescriptor, const char *name) override
Function that a permute layer should call back to when its Accept(ILayerVisitor&) function is invoked...
A ViewsDescriptor for the SplitterLayer.
Interface for a layer that is connectable to other layers via InputSlots and OutputSlots.
Definition: INetwork.hpp:61
virtual unsigned int GetNumInputSlots() const =0
Returns the number of connectable input slots.
void VisitConstantLayer(const IConnectableLayer *layer, const ConstTensor &input, const char *name=nullptr) override
Function a layer with no inputs and a single output, which always corresponds to the passed in consta...
A ReshapeDescriptor for the ReshapeLayer.
void VisitConcatLayer(const IConnectableLayer *layer, const ConcatDescriptor &originsDescriptor, const char *name=nullptr) override
Function that a concat layer should call back to when its Accept(ILayerVisitor&) function is invoked...
void VisitSpaceToBatchNdLayer(const IConnectableLayer *layer, const SpaceToBatchNdDescriptor &spaceToBatchNdDescriptor, const char *name=nullptr) override
Function a space to batch layer should call back to when its Accept(ILayerVisitor&) function is invok...
A Convolution2dDescriptor for the Convolution2dLayer.
int Connect(InputSlot &destination)
Definition: Layer.cpp:79
void EraseLayer(Iterator pos)
Deletes the layer at the specified position.
Definition: Graph.hpp:442
void VisitBatchToSpaceNdLayer(const IConnectableLayer *layer, const BatchToSpaceNdDescriptor &batchToSpaceNdDescriptor, const char *name=nullptr) override
Function that a batch to space ND layer should call back to when its Accept(ILayerVisitor&) function ...
unsigned int GetNumElements() const
Definition: Tensor.hpp:175
void VisitResizeBilinearLayer(const IConnectableLayer *layer, const ResizeBilinearDescriptor &resizeDesc, const char *name=nullptr) override
Function that a resize bilinear layer should call back to when its Accept(ILayerVisitor&) function is...
MemoryType GetMemoryArea() const
Definition: Tensor.hpp:177
Copyright (c) 2020 ARM Limited.
void IgnoreUnused(Ts &&...)
void VisitConvolution2dLayer(const IConnectableLayer *layer, const Convolution2dDescriptor &convolution2dDescriptor, const ConstTensor &weights, const Optional< ConstTensor > &biases, const char *name=nullptr) override
Function that a 2D convolution layer should call back to when its Accept(ILayerVisitor&) function is ...
void VisitSoftmaxLayer(const IConnectableLayer *layer, const SoftmaxDescriptor &softmaxDescriptor, const char *name=nullptr) override
Function that a softmax layer should call back to when its Accept(ILayerVisitor&) function is invoked...
A BatchToSpaceNdDescriptor for the BatchToSpaceNdLayer.
void VisitFullyConnectedLayer(const IConnectableLayer *layer, const FullyConnectedDescriptor &desc, const ConstTensor &weights, const Optional< ConstTensor > &biases, const char *name) override
Function that a fully connected layer should call back to when its Accept(ILayerVisitor&) function is...
std::pair< float, float > MinMaxRange
void Disconnect(InputSlot &slot)
Definition: Layer.cpp:87
int LayerBindingId
Type of identifiers for bindable layers (inputs, outputs).
Definition: Types.hpp:171
MinMaxRange GetRange(LayerGuid guid, unsigned int idx) const
Retrieve the Range for a particular output slot on a particular layer.
void VisitDepthwiseConvolution2dLayer(const IConnectableLayer *layer, const DepthwiseConvolution2dDescriptor &desc, const ConstTensor &weights, const Optional< ConstTensor > &biases, const char *name=nullptr) override
Function that a 2D depthwise convolution layer with biases should call back to when its Accept(ILayer...
DynamicQuantizationVisitor(RangeTracker &rangeTracker, Graph &graph)
void SetRange(const IConnectableLayer *layer, unsigned int outputIdx, float min, float max)
Set the range for an output slot on a layer.
void VisitActivationLayer(const IConnectableLayer *layer, const ActivationDescriptor &activationDescriptor, const char *name=nullptr) override
Function that an activation layer should call back to when its Accept(ILayerVisitor&) function is inv...
void VisitAdditionLayer(const IConnectableLayer *layer, const char *name=nullptr) override
Functions to set the Range on a per-layer-type basis.
An output connection slot for a layer.
Definition: INetwork.hpp:37
An OriginsDescriptor for the ConcatLayer.
A FullyConnectedDescriptor for the FullyConnectedLayer.
A tensor defined by a TensorInfo (shape and data type) and an immutable backing store.
Definition: Tensor.hpp:199
This layer visualizes the data flowing through the network.
Definition: DebugLayer.hpp:13
virtual unsigned int CalculateIndexOnOwner() const =0
An ActivationDescriptor for the ActivationLayer.
Definition: Descriptors.hpp:20
min(a, max(b, input)) ReLu1 & ReLu6.
A SpaceToBatchNdDescriptor for the SpaceToBatchNdLayer.
float m_A
Alpha upper bound value used by the activation functions. (BoundedReLu, Linear, TanH).
Definition: Descriptors.hpp:37
virtual const IInputSlot & GetInputSlot(unsigned int index) const =0
Get a const input slot handle by slot index.
virtual const IOutputSlot * GetConnection() const =0
void VisitInputLayer(const IConnectableLayer *layer, LayerBindingId id, const char *name=nullptr) override
Function that an InputLayer should call back to when its Accept(ILayerVisitor&) function is invoked...
A StridedSliceDescriptor for the StridedSliceLayer.
DataType GetDataType() const
Definition: Tensor.hpp:172
virtual LayerGuid GetOwningLayerGuid() const =0
A Pooling2dDescriptor for the Pooling2dLayer.
void VisitPooling2dLayer(const IConnectableLayer *layer, const Pooling2dDescriptor &pooling2dDescriptor, const char *name) override
Function that a pooling layer should call back to when its Accept(ILayerVisitor&) function is invoked...
void VisitOutputLayer(const IConnectableLayer *layer, LayerBindingId id, const char *name=nullptr) override
Function an output layer should call back to when its Accept(ILayerVisitor&) function is invoked...
std::vector< DebugLayer * > InsertDebugLayerAfter(Graph &graph, Layer &layer)
A ResizeBilinearDescriptor for the ResizeBilinearLayer.
void VisitSplitterLayer(const IConnectableLayer *layer, const SplitterDescriptor &splitterDescriptor, const char *name=nullptr) override
Function that a splitter layer should call back to when its Accept(ILayerVisitor&) function is invoke...
A SoftmaxDescriptor for the SoftmaxLayer.
void VisitStridedSliceLayer(const IConnectableLayer *layer, const StridedSliceDescriptor &stridedSliceDescriptor, const char *name=nullptr) override
Function a strided slice layer should call back to when its Accept(ILayerVisitor&) function is invoke...
const std::vector< armnn::LayerBindingId > & GetOutputLayers()
ActivationFunction m_Function
The activation function to use (Sigmoid, TanH, Linear, ReLu, BoundedReLu, SoftReLu, LeakyReLu, Abs, Sqrt, Square).
Definition: Descriptors.hpp:35
A DepthwiseConvolution2dDescriptor for the DepthwiseConvolution2dLayer.
A BatchNormalizationDescriptor for the BatchNormalizationLayer.
A PermuteDescriptor for the PermuteLayer.