ArmNN  NotReleased
NetworkQuantizer.cpp
Go to the documentation of this file.
1 //
2 // Copyright © 2017 Arm Ltd. All rights reserved.
3 // SPDX-License-Identifier: MIT
4 //
5 
6 #include "NetworkQuantizer.hpp"
8 #include "Graph.hpp"
9 #include "Layer.hpp"
10 #include "Network.hpp"
12 #include "StaticRangeVisitor.hpp"
13 #include "QuantizerVisitor.hpp"
15 
16 #include <TensorIOUtils.hpp>
17 
18 #include <armnn/ILayerVisitor.hpp>
19 #include <armnn/INetwork.hpp>
20 #include <armnn/Tensor.hpp>
21 #include <armnn/Types.hpp>
22 
24 
25 #include <boost/variant.hpp>
26 
27 #include <vector>
28 #include <cmath>
29 
30 namespace armnn
31 {
32 
33 using TContainer = boost::variant<std::vector<float>, std::vector<int>, std::vector<unsigned char>>;
34 
36 {
37  return new NetworkQuantizer(inputNetwork, options);
38 }
39 
41 {
42  return INetworkQuantizerPtr(CreateRaw(inputNetwork, options), &INetworkQuantizer::Destroy);
43 }
44 
46 {
47  delete boost::polymorphic_downcast<NetworkQuantizer*>(quantizer);
48 }
49 
50 void NetworkQuantizer::OverrideInputRange(LayerBindingId layerId, float min, float max)
51 {
52  const Graph& graph = boost::polymorphic_downcast<const Network*>(m_InputNetwork)->GetGraph();
53  auto inputLayers = graph.GetInputLayers();
54 
55  // Walk the input layers of the graph and override the quantization parameters of the one with the given id
56  OverrideInputRangeVisitor overrideInputRangeVisitor(m_Ranges, layerId, RangeTracker::MinMaxRange{min, max});
57  VisitLayers(inputLayers, overrideInputRangeVisitor);
58 }
59 
60 void NetworkQuantizer::Refine(const InputTensors& inputTensors)
61 {
62  // The first time Refine is called the m_Runtime and the DynamicQuantizationVisitor
63  // will not have been created. Need to get the environment set up, Runtime loaded,
64  // DynamicQuantizationVisitor created and run over the network to initialise itself
65  // and the RangeTracker the Debug callback registered and an initial inference
66  // done to set up the first min/max values
67  if (!m_Runtime)
68  {
69  m_RefineCount = 0;
70  m_Ranges.SetDynamicMode(true);
71  const Graph& cGraph = boost::polymorphic_downcast<const Network*>(m_InputNetwork)->GetGraph().TopologicalSort();
72 
73  // need to insert Debug layers in the DynamicQuantizationVisitor
74  Graph& graph = const_cast<Graph&>(cGraph);
75 
76  // Initialize RangeTracker to the default values for each layer.
77  // The default values are overwritten by the min/max that is
78  // recorded during the first dataset min/max calibration. This
79  // initialisation is only required for the first call of Refine().
80  m_DynamicQuantizationVisitor = DynamicQuantizationVisitor(m_Ranges, graph);
81  VisitLayers(cGraph, m_DynamicQuantizationVisitor.value());
82 
84  m_Runtime = IRuntime::Create(options);
85 
86  // Optimize network - debug already enabled for layers that require quantization
87  OptimizerOptions optimizerOptions(false, false);
88  std::vector<BackendId> backends = {"CpuRef"};
89  IOptimizedNetworkPtr optimizedNet = Optimize(*m_InputNetwork,
90  backends,
91  m_Runtime->GetDeviceSpec(),
92  optimizerOptions);
93 
94  m_Runtime->LoadNetwork(m_NetworkId, std::move(optimizedNet));
95 
96  // Debug callback function to refine min/max in RangeTracker
97  auto rangeTrackerCallback = [&](LayerGuid guid, unsigned int slotIndex, ITensorHandle *tensorHandle) {
98  // Get min/max pair from tensor data
99  std::pair<float, float> minMax = armnnUtils::FindMinMax(tensorHandle);
100 
101  // For first calibration dataset, set min/max range in RangeTracker to
102  // min/max ranges gathered during inference
103  if (m_RefineCount == 0)
104  {
105  m_Ranges.ResetMinMax(guid, slotIndex, minMax.first, minMax.second);
106  }
107  else
108  {
109  // For every other calibration dataset, only set min/max range if the
110  // values gathered are less than / greater than originally recorded.
111  m_Ranges.RefineMin(guid, slotIndex, minMax.first);
112  m_Ranges.RefineMax(guid, slotIndex, minMax.second);
113  }
114  };
115 
116  m_Runtime->RegisterDebugCallback(m_NetworkId, rangeTrackerCallback);
117  }
118 
119  // Create output tensor for EnqueueWorkload
120  std::vector<armnn::BindingPointInfo> outputBindings;
121  auto outputLayers = m_DynamicQuantizationVisitor.value().GetOutputLayers();
122  std::vector<TContainer> outputVectors;
123  for (auto outputLayerBindingId : outputLayers)
124  {
125  auto outputTensorInfo = m_Runtime->GetOutputTensorInfo(m_NetworkId, outputLayerBindingId);
126  outputBindings.push_back(std::make_pair(outputLayerBindingId, outputTensorInfo));
127  outputVectors.push_back(std::vector<float>(outputTensorInfo.GetNumElements(), 0));
128  }
129  OutputTensors outputTensors = armnnUtils::MakeOutputTensors<TContainer>(outputBindings, outputVectors);
130 
131  // Execute EnqueueWorkload with calibration image
132  m_Runtime->EnqueueWorkload(m_NetworkId, inputTensors, outputTensors);
133  ++m_RefineCount;
134 }
135 
137 {
138  const Graph& graph = boost::polymorphic_downcast<const Network*>(m_InputNetwork)->GetGraph().TopologicalSort();
139 
140  // Step 1) Walk the graph and populate default min/max values for
141  // intermediate tensors, only if Runtime does not exist (created
142  // if Refine has been called)
143  if (!m_Runtime)
144  {
145  m_Ranges.SetDynamicMode(false);
146  StaticRangeVisitor rangeVisitor(m_Ranges);
147  VisitLayers(graph, rangeVisitor);
148  }
149  else
150  {
151  // Set min/max range of non-calibrated layers to parent layer's range
152  m_DynamicQuantizationVisitor.value().VisitNonCalibratedLayers();
153  // now tear down the runtime and the dynamic visitor.
154  m_Runtime.reset(nullptr);
155  m_DynamicQuantizationVisitor = EmptyOptional();
156  m_RefineCount = 0;
157  }
158 
159  // Step 2) Convert input InputNetwork to Quantized InputNetwork
160  std::unique_ptr<IQuantizationScheme> quantizationScheme;
161  switch (m_Options.m_ActivationFormat)
162  {
163  case DataType::QAsymmU8:
164  quantizationScheme = std::make_unique<QAsymmU8QuantizationScheme>();
165  break;
166  case DataType::QAsymmS8:
167  quantizationScheme = std::make_unique<QAsymmS8QuantizationScheme>();
168  break;
169  case DataType::QSymmS8:
170  quantizationScheme = std::make_unique<QSymmS8QuantizationScheme>();
171  break;
172  case DataType::QSymmS16:
173  quantizationScheme = std::make_unique<QSymm16QuantizationScheme>();
174  break;
175  default:
176  throw InvalidArgumentException("Unsupported quantization target");
177  }
178 
179  QuantizerVisitor quantizerVisitor(m_Ranges, quantizationScheme.get(), m_Options.m_PreserveType);
180  VisitLayers(graph, quantizerVisitor);
181 
182  // clear the ranges
183  m_Ranges.Reset();
184 
185  return quantizerVisitor.RetrieveFinalNetwork();
186 }
187 
188 } //namespace armn
Visitor object for overriding the input range of the quantized input layers in a network.
Graph & TopologicalSort()
Sorts layers in topological order and return this.
Definition: Graph.hpp:173
std::pair< float, float > FindMinMax(armnn::ITensorHandle *tensorHandle)
Definition: TensorUtils.cpp:58
std::vector< std::pair< LayerBindingId, class Tensor > > OutputTensors
Definition: Tensor.hpp:226
void VisitLayers(const LayerContainer &layerContainer, ILayerVisitor &visitor)
IOptimizedNetworkPtr Optimize(const INetwork &network, const std::vector< BackendId > &backendPreferences, const IDeviceSpec &deviceSpec, const OptimizerOptions &options=OptimizerOptions(), Optional< std::vector< std::string > &> messages=EmptyOptional())
Definition: Network.cpp:807
static void Destroy(INetworkQuantizer *quantizer)
Destroy Quantizer object.
void Refine(const InputTensors &inputTensors) override
Refine input network with a set of refinement data for specified LayerBindingId.
std::pair< float, float > MinMaxRange
std::unique_ptr< class INetworkQuantizer, void(*)(INetworkQuantizer *quantizer)> INetworkQuantizerPtr
std::vector< std::pair< LayerBindingId, class ConstTensor > > InputTensors
Definition: Tensor.hpp:225
Quantizer class Quantizes a float32 InputNetwork.
INetworkPtr ExportNetwork() override
Extract final quantized network.
Visitor class to establish min/max ranges based on the type of the layer.
InputLayersAccessor GetInputLayers() const
Definition: Graph.hpp:181
std::unique_ptr< INetwork, void(*)(INetwork *network)> INetworkPtr
Definition: INetwork.hpp:85
static IRuntimePtr Create(const CreationOptions &options)
Definition: Runtime.cpp:32
Visitor object for quantizing layers in a network.
boost::variant< std::vector< float >, std::vector< int >, std::vector< unsigned char > > TContainer
static INetworkQuantizerPtr Create(INetwork *inputNetwork, const QuantizerOptions &options=QuantizerOptions())
Create Quantizer object wrapped in unique_ptr.
std::unique_ptr< IOptimizedNetwork, void(*)(IOptimizedNetwork *network)> IOptimizedNetworkPtr
Definition: INetwork.hpp:544
void OverrideInputRange(LayerBindingId layerId, float min, float max) override
Overrides the default quantization values for the input layer with the given id.
armnn::Runtime::CreationOptions::ExternalProfilingOptions options
Visitor class to establish min/max ranges based on the type of the layer.
int LayerBindingId
Type of identifiers for bindable layers (inputs, outputs).
Definition: Types.hpp:168
static INetworkQuantizer * CreateRaw(INetwork *inputNetwork, const QuantizerOptions &options=QuantizerOptions())
Create Quantizer object and return raw pointer.