ArmNN
 20.02
NetworkQuantizer.cpp
Go to the documentation of this file.
1 //
2 // Copyright © 2017 Arm Ltd. All rights reserved.
3 // SPDX-License-Identifier: MIT
4 //
5 
6 #include "NetworkQuantizer.hpp"
8 #include "Graph.hpp"
9 #include "Layer.hpp"
10 #include "Network.hpp"
12 #include "StaticRangeVisitor.hpp"
13 #include "QuantizerVisitor.hpp"
15 
16 #include <TensorIOUtils.hpp>
17 
18 #include <armnn/ILayerVisitor.hpp>
19 #include <armnn/INetwork.hpp>
20 #include <armnn/Tensor.hpp>
21 #include <armnn/Types.hpp>
22 
24 
25 #include <boost/variant.hpp>
26 
27 #include <vector>
28 #include <cmath>
29 
30 namespace armnn
31 {
32 
33 using TContainer = boost::variant<std::vector<float>, std::vector<int>, std::vector<unsigned char>>;
34 
36 {
37  return new NetworkQuantizer(inputNetwork, options);
38 }
39 
41 {
42  return INetworkQuantizerPtr(CreateRaw(inputNetwork, options), &INetworkQuantizer::Destroy);
43 }
44 
46 {
47  delete boost::polymorphic_downcast<NetworkQuantizer*>(quantizer);
48 }
49 
50 void NetworkQuantizer::OverrideInputRange(LayerBindingId layerId, float min, float max)
51 {
52  const Graph& graph = boost::polymorphic_downcast<const Network*>(m_InputNetwork)->GetGraph();
53  auto inputLayers = graph.GetInputLayers();
54 
55  // Walk the input layers of the graph and override the quantization parameters of the one with the given id
56  OverrideInputRangeVisitor overrideInputRangeVisitor(m_Ranges, layerId, RangeTracker::MinMaxRange{min, max});
57  VisitLayers(inputLayers, overrideInputRangeVisitor);
58 }
59 
60 void NetworkQuantizer::Refine(const InputTensors& inputTensors)
61 {
62  // The first time Refine is called the m_Runtime and the DynamicQuantizationVisitor
63  // will not have been created. Need to get the environment set up, Runtime loaded,
64  // DynamicQuantizationVisitor created and run over the network to initialise itself
65  // and the RangeTracker the Debug callback registered and an initial inference
66  // done to set up the first min/max values
67  if (!m_Runtime)
68  {
69  m_RefineCount = 0;
70  m_Ranges.SetDynamicMode(true);
71  const Graph& cGraph = boost::polymorphic_downcast<const Network*>(m_InputNetwork)->GetGraph().TopologicalSort();
72 
73  // need to insert Debug layers in the DynamicQuantizationVisitor
74  Graph& graph = const_cast<Graph&>(cGraph);
75 
76  // Initialize RangeTracker to the default values for each layer.
77  // The default values are overwritten by the min/max that is
78  // recorded during the first dataset min/max calibration. This
79  // initialisation is only required for the first call of Refine().
80  m_DynamicQuantizationVisitor = DynamicQuantizationVisitor(m_Ranges, graph);
81  VisitLayers(cGraph, m_DynamicQuantizationVisitor.value());
82 
84  m_Runtime = IRuntime::Create(options);
85 
86  // Optimize network - debug already enabled for layers that require quantization
87  OptimizerOptions optimizerOptions(false, false);
88  std::vector<BackendId> backends = {"CpuRef"};
89  IOptimizedNetworkPtr optimizedNet = Optimize(*m_InputNetwork,
90  backends,
91  m_Runtime->GetDeviceSpec(),
92  optimizerOptions);
93 
94  m_Runtime->LoadNetwork(m_NetworkId, std::move(optimizedNet));
95 
96  // Debug callback function to refine min/max in RangeTracker
97  auto rangeTrackerCallback = [&](LayerGuid guid, unsigned int slotIndex, ITensorHandle *tensorHandle) {
98  // Get min/max pair from tensor data
99  std::pair<float, float> minMax = armnnUtils::FindMinMax(tensorHandle);
100 
101  // For first calibration dataset, set min/max range in RangeTracker to
102  // min/max ranges gathered during inference
103  if (m_RefineCount == 0)
104  {
105  m_Ranges.ResetMinMax(guid, slotIndex, minMax.first, minMax.second);
106  }
107  else
108  {
109  // For every other calibration dataset, only set min/max range if the
110  // values gathered are less than / greater than originally recorded.
111  m_Ranges.RefineMin(guid, slotIndex, minMax.first);
112  m_Ranges.RefineMax(guid, slotIndex, minMax.second);
113  }
114  };
115 
116  m_Runtime->RegisterDebugCallback(m_NetworkId, rangeTrackerCallback);
117  }
118 
119  // Create output tensor for EnqueueWorkload
120  std::vector<armnn::BindingPointInfo> outputBindings;
121  auto outputLayers = m_DynamicQuantizationVisitor.value().GetOutputLayers();
122  std::vector<TContainer> outputVectors;
123  for (auto outputLayerBindingId : outputLayers)
124  {
125  auto outputTensorInfo = m_Runtime->GetOutputTensorInfo(m_NetworkId, outputLayerBindingId);
126  outputBindings.push_back(std::make_pair(outputLayerBindingId, outputTensorInfo));
127  outputVectors.push_back(std::vector<float>(outputTensorInfo.GetNumElements(), 0));
128  }
129  OutputTensors outputTensors = armnnUtils::MakeOutputTensors<TContainer>(outputBindings, outputVectors);
130 
131  // Execute EnqueueWorkload with calibration image
132  m_Runtime->EnqueueWorkload(m_NetworkId, inputTensors, outputTensors);
133  ++m_RefineCount;
134 }
135 
137 {
138  const Graph& graph = boost::polymorphic_downcast<const Network*>(m_InputNetwork)->GetGraph().TopologicalSort();
139 
140  // Step 1) Walk the graph and populate default min/max values for
141  // intermediate tensors, only if Runtime does not exist (created
142  // if Refine has been called)
143  if (!m_Runtime)
144  {
145  m_Ranges.SetDynamicMode(false);
146  StaticRangeVisitor rangeVisitor(m_Ranges);
147  VisitLayers(graph, rangeVisitor);
148  }
149  else
150  {
151  // Set min/max range of non-calibrated layers to parent layer's range
152  m_DynamicQuantizationVisitor.value().VisitNonCalibratedLayers();
153  // now tear down the runtime and the dynamic visitor.
154  m_Runtime.reset(nullptr);
155  m_DynamicQuantizationVisitor = EmptyOptional();
156  m_RefineCount = 0;
157  }
158 
159  // Step 2) Convert input InputNetwork to Quantized InputNetwork
160  std::unique_ptr<IQuantizationScheme> quantizationScheme;
161  switch (m_Options.m_ActivationFormat)
162  {
163  case DataType::QAsymmU8:
164  quantizationScheme = std::make_unique<QAsymmU8QuantizationScheme>();
165  break;
166  case DataType::QAsymmS8:
167  quantizationScheme = std::make_unique<QAsymmS8QuantizationScheme>();
168  break;
169  case DataType::QSymmS8:
170  quantizationScheme = std::make_unique<QSymmS8QuantizationScheme>();
171  break;
172  case DataType::QSymmS16:
173  quantizationScheme = std::make_unique<QSymm16QuantizationScheme>();
174  break;
175  default:
176  throw InvalidArgumentException("Unsupported quantization target");
177  }
178 
179  QuantizerVisitor quantizerVisitor(m_Ranges, quantizationScheme.get(), m_Options.m_PreserveType);
180  VisitLayers(graph, quantizerVisitor);
181 
182  // clear the ranges
183  m_Ranges.Reset();
184 
185  return quantizerVisitor.RetrieveFinalNetwork();
186 }
187 
188 } //namespace armn
static IRuntimePtr Create(const CreationOptions &options)
Definition: Runtime.cpp:32
Visitor object for overriding the input range of the quantized input layers in a network.
std::unique_ptr< class INetworkQuantizer, void(*)(INetworkQuantizer *quantizer)> INetworkQuantizerPtr
Main network class which provides the interface for building up a neural network. ...
Definition: INetwork.hpp:105
std::vector< std::pair< LayerBindingId, class ConstTensor > > InputTensors
Definition: Tensor.hpp:225
Copyright (c) 2020 ARM Limited.
void VisitLayers(const LayerContainer &layerContainer, ILayerVisitor &visitor)
std::pair< float, float > MinMaxRange
int LayerBindingId
Type of identifiers for bindable layers (inputs, outputs).
Definition: Types.hpp:171
static void Destroy(INetworkQuantizer *quantizer)
Destroy Quantizer object.
Visitor class to establish min/max ranges based on the type of the layer.
void Refine(const InputTensors &inputTensors) override
Refine input network with a set of refinement data for specified LayerBindingId.
void OverrideInputRange(LayerBindingId layerId, float min, float max) override
Overrides the default quantization values for the input layer with the given id.
IOptimizedNetworkPtr Optimize(const INetwork &network, const std::vector< BackendId > &backendPreferences, const IDeviceSpec &deviceSpec, const OptimizerOptions &options=OptimizerOptions(), Optional< std::vector< std::string > &> messages=EmptyOptional())
Create an optimized version of the network.
Definition: Network.cpp:890
static INetworkQuantizer * CreateRaw(INetwork *inputNetwork, const QuantizerOptions &options=QuantizerOptions())
Create Quantizer object and return raw pointer.
std::vector< std::pair< LayerBindingId, class Tensor > > OutputTensors
Definition: Tensor.hpp:226
std::unique_ptr< IOptimizedNetwork, void(*)(IOptimizedNetwork *network)> IOptimizedNetworkPtr
Definition: INetwork.hpp:566
std::pair< float, float > FindMinMax(armnn::ITensorHandle *tensorHandle)
Definition: TensorUtils.cpp:58
Visitor object for quantizing layers in a network.
boost::variant< std::vector< float >, std::vector< int >, std::vector< unsigned char > > TContainer
EmptyOptional is used to initialize the Optional class in case we want to have default value for an O...
Definition: Optional.hpp:32
Visitor class to establish min/max ranges based on the type of the layer.
Graph & TopologicalSort()
Sorts layers in topological order and return this.
Definition: Graph.hpp:173
InputLayersAccessor GetInputLayers() const
Returns a wrapper object with begin(), end() methods to iterate over the input layers in a range-base...
Definition: Graph.hpp:181
std::unique_ptr< INetwork, void(*)(INetwork *network)> INetworkPtr
Definition: INetwork.hpp:101
Quantizer class Quantizes a float32 InputNetwork.
armnn::Runtime::CreationOptions::ExternalProfilingOptions options
INetworkPtr ExportNetwork() override
Extract final quantized network.
static INetworkQuantizerPtr Create(INetwork *inputNetwork, const QuantizerOptions &options=QuantizerOptions())
Create Quantizer object wrapped in unique_ptr.