ArmNN  NotReleased
DynamicQuantizationVisitor.cpp
Go to the documentation of this file.
1 //
2 // Copyright © 2017 Arm Ltd. All rights reserved.
3 // SPDX-License-Identifier: MIT
4 //
5 
7 #include "NetworkUtils.hpp"
8 
9 #include <boost/core/ignore_unused.hpp>
10 #include <armnn/Descriptors.hpp>
11 #include <armnn/Types.hpp>
12 
13 #include <limits>
14 
15 namespace armnn
16 {
17 
19  : m_RangeTracker(rangeTracker),
20  m_Graph(graph)
21 {}
22 
23 void DynamicQuantizationVisitor::SetRange(const IConnectableLayer* layer, unsigned int outputIdx, float min, float max)
24 {
25  m_RangeTracker.SetRange(layer, outputIdx, min, max);
26 }
27 
28 void DynamicQuantizationVisitor::ForwardParentParameters(const IConnectableLayer* layer)
29 {
30  for (unsigned int i = 0; i < layer->GetNumInputSlots(); ++i)
31  {
32  const IOutputSlot *outputSlot = layer->GetInputSlot(i).GetConnection();
33  LayerGuid previousLayerId = outputSlot->GetOwningLayerGuid();
34  unsigned int ownerIndex = outputSlot->CalculateIndexOnOwner();
35  const auto parentRange = m_RangeTracker.GetRange(previousLayerId, ownerIndex);
36  SetRange(layer, i, parentRange.first, parentRange.second);
37  }
38 }
39 
40 void DynamicQuantizationVisitor::AddToCalibratedLayers(const IConnectableLayer* layer)
41 {
42  m_LayersToCalibrate.push_back(layer);
43 }
44 
45 void DynamicQuantizationVisitor::AddToNonCalibratedLayers(const IConnectableLayer* layer)
46 {
47  m_LayersNotToCalibrate.push_back(layer);
48 }
49 
51 {
52  for (const IConnectableLayer* layer : m_LayersToCalibrate)
53  {
54  std::vector<DebugLayer*> newDebugLayers = InsertDebugLayerAfter(
55  m_Graph, *boost::polymorphic_downcast<Layer*>(const_cast<IConnectableLayer*>(layer)));
56  // record them so we can take them out again efficiently afterward
57  m_DebugLayers.insert(std::end(m_DebugLayers), std::begin(newDebugLayers), std::end(newDebugLayers));
58  }
59 }
60 
61 void DynamicQuantizationVisitor::RemoveDebugLayers()
62 {
63  for (DebugLayer* debugLayer : m_DebugLayers)
64  {
65  OutputSlot& proceedingOutputSlot = *debugLayer->GetInputSlot(0).GetConnectedOutputSlot();
66  proceedingOutputSlot.Disconnect(debugLayer->GetInputSlot(0));
67 
68  for (InputSlot* succeedingInputSlot : debugLayer->GetOutputSlot(0).GetConnections())
69  {
70  debugLayer->GetOutputSlot(0).Disconnect(*succeedingInputSlot);
71  proceedingOutputSlot.Connect(*succeedingInputSlot);
72  }
73  m_Graph.EraseLayer(debugLayer);
74  }
75  m_DebugLayers.clear();
76 }
77 
79  RemoveDebugLayers();
80  for (const IConnectableLayer* layer : m_LayersNotToCalibrate)
81  {
82  ForwardParentParameters(layer);
83  }
84 }
85 
87 {
88  boost::ignore_unused(name);
89  SetRange(layer, 0, -20.f, 20.f);
90  AddToCalibratedLayers(layer);
91 }
92 
94  const BatchNormalizationDescriptor& desc,
95  const ConstTensor& mean,
96  const ConstTensor& variance,
97  const ConstTensor& beta,
98  const ConstTensor& gamma,
99  const char* name)
100 {
101  boost::ignore_unused(desc);
102  boost::ignore_unused(mean);
103  boost::ignore_unused(variance);
104  boost::ignore_unused(beta);
105  boost::ignore_unused(gamma);
106  boost::ignore_unused(name);
107  SetRange(layer, 0, -15.0f, 15.0f);
108  AddToCalibratedLayers(layer);
109 }
110 
112  const Convolution2dDescriptor& convolution2dDescriptor,
113  const ConstTensor& weights,
114  const Optional<ConstTensor>& biases,
115  const char* name)
116 {
117  boost::ignore_unused(convolution2dDescriptor);
118  boost::ignore_unused(weights);
119  boost::ignore_unused(biases);
120  boost::ignore_unused(name);
121  SetRange(layer, 0, -15.0f, 15.0f);
122  AddToCalibratedLayers(layer);
123 }
124 
127  const ConstTensor& weights,
128  const Optional<ConstTensor>& biases,
129  const char* name)
130 {
131  boost::ignore_unused(desc);
132  boost::ignore_unused(weights);
133  boost::ignore_unused(biases);
134  boost::ignore_unused(name);
135  SetRange(layer, 0, -15.0f, 15.0f);
136  AddToCalibratedLayers(layer);
137 }
138 
140  const ActivationDescriptor& activationDescriptor,
141  const char* name)
142 {
143  boost::ignore_unused(name, activationDescriptor);
144  switch (activationDescriptor.m_Function)
145  {
146  // Range is 0, 15 for Abs, Linear, ReLu and Soft ReLu
151  SetRange(layer, 0, 0.f, 15.f);
152  break;
154  SetRange(layer, 0, 0.f, activationDescriptor.m_A);
155  break;
157  SetRange(layer, 0, -1.f, 1.f);
158  break;
160  SetRange(layer, 0, -5.f, 15.f);
161  break;
162  default:
163  SetRange(layer, 0, -15.f, 15.f);
164  break;
165  }
166  AddToCalibratedLayers(layer);
167 }
168 
170  const FullyConnectedDescriptor& desc,
171  const ConstTensor& weights,
172  const Optional<ConstTensor>& biases,
173  const char *name)
174 {
175  boost::ignore_unused(desc);
176  boost::ignore_unused(weights);
177  boost::ignore_unused(biases);
178  boost::ignore_unused(name);
179  SetRange(layer, 0, -15.0f, 15.0f);
180  AddToCalibratedLayers(layer);
181 }
182 
184  const PermuteDescriptor& permuteDescriptor,
185  const char* name)
186 {
187  boost::ignore_unused(permuteDescriptor);
188  boost::ignore_unused(name);
189  AddToNonCalibratedLayers(layer);
190 }
191 
193  const SpaceToBatchNdDescriptor& spaceToBatchNdDescriptor,
194  const char* name)
195 {
196  boost::ignore_unused(spaceToBatchNdDescriptor);
197  boost::ignore_unused(name);
198  AddToNonCalibratedLayers(layer);
199 }
200 
202  const Pooling2dDescriptor& pooling2dDescriptor,
203  const char* name)
204 {
205  boost::ignore_unused(pooling2dDescriptor);
206  boost::ignore_unused(name);
207  AddToNonCalibratedLayers(layer);
208 }
209 
211  const SoftmaxDescriptor& softmaxDescriptor,
212  const char* name)
213 {
214  boost::ignore_unused(softmaxDescriptor);
215  boost::ignore_unused(name);
216  SetRange(layer, 0, 0.f, 1.f);
217  AddToCalibratedLayers(layer);
218 }
219 
221  const ConstTensor& input,
222  const char* name)
223 {
224  boost::ignore_unused(name);
225 
226  if (input.GetDataType() != DataType::Float32)
227  {
228  throw InvalidArgumentException("Quantization is supported only for FP32 tensors");
229  }
230 
231  // Work out the range based on the input constants
232  unsigned int inputNumElements = input.GetNumElements();
233  const float* inputData = reinterpret_cast<const float*>(input.GetMemoryArea());
234 
235  float min = std::numeric_limits<float>::max();
236  float max = std::numeric_limits<float>::lowest();
237 
238  for (unsigned int i = 0; i < inputNumElements; i++)
239  {
240  const float inputValue = inputData[i];
241 
242  min = std::min(min, inputValue);
243  max = std::max(max, inputValue);
244  }
245  SetRange(layer, 0, min, max);
246 }
247 
249  const ConcatDescriptor& originsDescriptor,
250  const char* name)
251 {
252  boost::ignore_unused(name);
253  boost::ignore_unused(originsDescriptor);
254  float min = std::numeric_limits<float>::max();
255  float max = std::numeric_limits<float>::lowest();
256  for (unsigned int i = 0; i < layer->GetNumInputSlots(); ++i)
257  {
258  const IOutputSlot* outputSlot = layer->GetInputSlot(i).GetConnection();
259  LayerGuid layerId = outputSlot->GetOwningLayerGuid();
260  unsigned int slotIndex = outputSlot->CalculateIndexOnOwner();
261  RangeTracker::MinMaxRange range = m_RangeTracker.GetRange(layerId, slotIndex);
262  min = std::min(min, range.first);
263  max = std::max(max, range.second);
264  }
265  SetRange(layer, 0, min, max);
266  AddToCalibratedLayers(layer);
267 }
268 
270  const ReshapeDescriptor& reshapeDescriptor,
271  const char* name)
272 {
273  boost::ignore_unused(reshapeDescriptor);
274  boost::ignore_unused(name);
275  AddToNonCalibratedLayers(layer);
276 }
277 
279  const SplitterDescriptor& splitterDescriptor,
280  const char* name)
281 {
282  boost::ignore_unused(splitterDescriptor);
283  boost::ignore_unused(name);
284  AddToNonCalibratedLayers(layer);
285 }
286 
288  const ResizeBilinearDescriptor& resizeDesc,
289  const char* name)
290 {
291  boost::ignore_unused(resizeDesc);
292  boost::ignore_unused(name);
293  AddToNonCalibratedLayers(layer);
294 }
295 
297  const StridedSliceDescriptor& stridedSliceDescriptor,
298  const char* name)
299 {
300  boost::ignore_unused(stridedSliceDescriptor);
301  boost::ignore_unused(name);
302  AddToNonCalibratedLayers(layer);
303 }
304 
306  const BatchToSpaceNdDescriptor& batchToSpaceNdDescriptor,
307  const char* name)
308 {
309  boost::ignore_unused(batchToSpaceNdDescriptor);
310  boost::ignore_unused(name);
311  AddToNonCalibratedLayers(layer);
312 }
313 
315 {
316  boost::ignore_unused(id);
317  boost::ignore_unused(name);
318  SetRange(layer, 0, -0.0f, 0.0f);
319  AddToCalibratedLayers(layer);
320 }
321 
323 {
324  boost::ignore_unused(id);
325  boost::ignore_unused(name);
326  AddToNonCalibratedLayers(layer);
327  m_OutputLayers.push_back(id);
328 }
329 
330 const std::vector<LayerBindingId>& DynamicQuantizationVisitor::GetOutputLayers()
331 {
332  return m_OutputLayers;
333 }
334 
335 } //namespace armnn
float m_A
Alpha upper bound value used by the activation functions. (BoundedReLu, Linear, TanH).
Definition: Descriptors.hpp:37
virtual const IOutputSlot * GetConnection() const =0
void VisitSoftmaxLayer(const IConnectableLayer *layer, const SoftmaxDescriptor &softmaxDescriptor, const char *name=nullptr) override
virtual const IInputSlot & GetInputSlot(unsigned int index) const =0
A BatchToSpaceNdDescriptor for the BatchToSpaceNdLayer.
DataType GetDataType() const
Definition: Tensor.hpp:172
A tensor defined by a TensorInfo (shape and data type) and an immutable backing store.
Definition: Tensor.hpp:199
unsigned int GetNumElements() const
Definition: Tensor.hpp:175
void VisitConvolution2dLayer(const IConnectableLayer *layer, const Convolution2dDescriptor &convolution2dDescriptor, const ConstTensor &weights, const Optional< ConstTensor > &biases, const char *name=nullptr) override
An ActivationDescriptor for the ActivationLayer.
Definition: Descriptors.hpp:20
std::pair< float, float > MinMaxRange
A BatchNormalizationDescriptor for the BatchNormalizationLayer.
int Connect(InputSlot &destination)
Definition: Layer.cpp:79
void VisitSpaceToBatchNdLayer(const IConnectableLayer *layer, const SpaceToBatchNdDescriptor &spaceToBatchNdDescriptor, const char *name=nullptr) override
A ViewsDescriptor for the SplitterLayer. Descriptor to configure the splitting process. Number of Views must be equal to the number of outputs, and their order must match - e.g. first view corresponds to the first output, second view to the second output, etc.
void VisitPooling2dLayer(const IConnectableLayer *layer, const Pooling2dDescriptor &pooling2dDescriptor, const char *name) override
void VisitBatchToSpaceNdLayer(const IConnectableLayer *layer, const BatchToSpaceNdDescriptor &batchToSpaceNdDescriptor, const char *name=nullptr) override
virtual LayerGuid GetOwningLayerGuid() const =0
A ReshapeDescriptor for the ReshapeLayer.
void VisitConstantLayer(const IConnectableLayer *layer, const ConstTensor &input, const char *name=nullptr) override
void VisitPermuteLayer(const IConnectableLayer *layer, const PermuteDescriptor &permuteDescriptor, const char *name) override
DynamicQuantizationVisitor(RangeTracker &rangeTracker, Graph &graph)
void VisitStridedSliceLayer(const IConnectableLayer *layer, const StridedSliceDescriptor &stridedSliceDescriptor, const char *name=nullptr) override
A FullyConnectedDescriptor for the FullyConnectedLayer.
const std::vector< armnn::LayerBindingId > & GetOutputLayers()
void Disconnect(InputSlot &slot)
Definition: Layer.cpp:87
void VisitReshapeLayer(const IConnectableLayer *layer, const ReshapeDescriptor &reshapeDescriptor, const char *name=nullptr) override
A ResizeBilinearDescriptor for the ResizeBilinearLayer.
ActivationFunction m_Function
The activation function to use (Sigmoid, TanH, Linear, ReLu, BoundedReLu, SoftReLu, LeakyReLu, Abs, Sqrt, Square).
Definition: Descriptors.hpp:35
A SoftmaxDescriptor for the SoftmaxLayer.
An output connection slot for a layer. The output slot may be connected to 1 or more input slots of s...
Definition: INetwork.hpp:37
void VisitOutputLayer(const IConnectableLayer *layer, LayerBindingId id, const char *name=nullptr) override
void VisitAdditionLayer(const IConnectableLayer *layer, const char *name=nullptr) override
Functions to set the Range on a per-layer-type basis.
This layer visualizes the data flowing through the network.
Definition: DebugLayer.hpp:13
void SetRange(const IConnectableLayer *layer, unsigned int outputIdx, float min, float max)
Set the range for an output slot on a layer.
MinMaxRange GetRange(LayerGuid guid, unsigned int idx) const
Retrieve the Range for a particular output slot on a particular layer.
Interface for a layer that is connectable to other layers via InputSlots and OutputSlots.
Definition: INetwork.hpp:61
MemoryType GetMemoryArea() const
Definition: Tensor.hpp:177
A Pooling2dDescriptor for the Pooling2dLayer.
virtual unsigned int GetNumInputSlots() const =0
void VisitFullyConnectedLayer(const IConnectableLayer *layer, const FullyConnectedDescriptor &desc, const ConstTensor &weights, const Optional< ConstTensor > &biases, const char *name) override
void EraseLayer(Iterator pos)
Deletes the layer at the specified position.
Definition: Graph.hpp:442
virtual unsigned int CalculateIndexOnOwner() const =0
A DepthwiseConvolution2dDescriptor for the DepthwiseConvolution2dLayer.
void VisitBatchNormalizationLayer(const IConnectableLayer *layer, const BatchNormalizationDescriptor &desc, const ConstTensor &mean, const ConstTensor &variance, const ConstTensor &beta, const ConstTensor &gamma, const char *name=nullptr) override
std::vector< DebugLayer * > InsertDebugLayerAfter(Graph &graph, Layer &layer)
A PermuteDescriptor for the PermuteLayer.
A Convolution2dDescriptor for the Convolution2dLayer.
void VisitDepthwiseConvolution2dLayer(const IConnectableLayer *layer, const DepthwiseConvolution2dDescriptor &desc, const ConstTensor &weights, const Optional< ConstTensor > &biases, const char *name=nullptr) override
A SpaceToBatchNdDescriptor for the SpaceToBatchNdLayer.
void VisitConcatLayer(const IConnectableLayer *layer, const ConcatDescriptor &originsDescriptor, const char *name=nullptr) override
void VisitInputLayer(const IConnectableLayer *layer, LayerBindingId id, const char *name=nullptr) override
void VisitSplitterLayer(const IConnectableLayer *layer, const SplitterDescriptor &splitterDescriptor, const char *name=nullptr) override
int LayerBindingId
Type of identifiers for bindable layers (inputs, outputs).
Definition: Types.hpp:168
void VisitActivationLayer(const IConnectableLayer *layer, const ActivationDescriptor &activationDescriptor, const char *name=nullptr) override
An OriginsDescriptor for the ConcatLayer. Descriptor to configure the concatenation process...
void VisitResizeBilinearLayer(const IConnectableLayer *layer, const ResizeBilinearDescriptor &resizeDesc, const char *name=nullptr) override
A StridedSliceDescriptor for the StridedSliceLayer.