ArmNN
 20.05
NetworkQuantizer.cpp
Go to the documentation of this file.
1 //
2 // Copyright © 2017 Arm Ltd. All rights reserved.
3 // SPDX-License-Identifier: MIT
4 //
5 
6 #include "NetworkQuantizer.hpp"
8 #include "Graph.hpp"
9 #include "Layer.hpp"
10 #include "Network.hpp"
12 #include "StaticRangeVisitor.hpp"
13 #include "QuantizerVisitor.hpp"
15 
16 #include <TensorIOUtils.hpp>
17 
18 #include <armnn/ILayerVisitor.hpp>
19 #include <armnn/INetwork.hpp>
20 #include <armnn/Tensor.hpp>
21 #include <armnn/Types.hpp>
22 
25 
26 #include <boost/variant.hpp>
27 
28 #include <vector>
29 #include <cmath>
30 
31 namespace armnn
32 {
33 
34 using TContainer = boost::variant<std::vector<float>, std::vector<int>, std::vector<unsigned char>>;
35 
37 {
38  return new NetworkQuantizer(inputNetwork, options);
39 }
40 
42 {
43  return INetworkQuantizerPtr(CreateRaw(inputNetwork, options), &INetworkQuantizer::Destroy);
44 }
45 
47 {
48  delete PolymorphicDowncast<NetworkQuantizer*>(quantizer);
49 }
50 
51 void NetworkQuantizer::OverrideInputRange(LayerBindingId layerId, float min, float max)
52 {
53  const Graph& graph = PolymorphicDowncast<const Network*>(m_InputNetwork)->GetGraph();
54  auto inputLayers = graph.GetInputLayers();
55 
56  // Walk the input layers of the graph and override the quantization parameters of the one with the given id
57  OverrideInputRangeVisitor overrideInputRangeVisitor(m_Ranges, layerId, RangeTracker::MinMaxRange{min, max});
58  VisitLayers(inputLayers, overrideInputRangeVisitor);
59 }
60 
61 void NetworkQuantizer::Refine(const InputTensors& inputTensors)
62 {
63  // The first time Refine is called the m_Runtime and the DynamicQuantizationVisitor
64  // will not have been created. Need to get the environment set up, Runtime loaded,
65  // DynamicQuantizationVisitor created and run over the network to initialise itself
66  // and the RangeTracker the Debug callback registered and an initial inference
67  // done to set up the first min/max values
68  if (!m_Runtime)
69  {
70  m_RefineCount = 0;
71  m_Ranges.SetDynamicMode(true);
72  const Graph& cGraph = PolymorphicDowncast<const Network*>(m_InputNetwork)->GetGraph().TopologicalSort();
73 
74  // need to insert Debug layers in the DynamicQuantizationVisitor
75  Graph& graph = const_cast<Graph&>(cGraph);
76 
77  // Initialize RangeTracker to the default values for each layer.
78  // The default values are overwritten by the min/max that is
79  // recorded during the first dataset min/max calibration. This
80  // initialisation is only required for the first call of Refine().
81  m_DynamicQuantizationVisitor = DynamicQuantizationVisitor(m_Ranges, graph);
82  VisitLayers(cGraph, m_DynamicQuantizationVisitor.value());
83 
85  m_Runtime = IRuntime::Create(options);
86 
87  // Optimize network - debug already enabled for layers that require quantization
88  OptimizerOptions optimizerOptions(false, false);
89  std::vector<BackendId> backends = {"CpuRef"};
90  IOptimizedNetworkPtr optimizedNet = Optimize(*m_InputNetwork,
91  backends,
92  m_Runtime->GetDeviceSpec(),
93  optimizerOptions);
94 
95  m_Runtime->LoadNetwork(m_NetworkId, std::move(optimizedNet));
96 
97  // Debug callback function to refine min/max in RangeTracker
98  auto rangeTrackerCallback = [&](LayerGuid guid, unsigned int slotIndex, ITensorHandle *tensorHandle) {
99  // Get min/max pair from tensor data
100  std::pair<float, float> minMax = armnnUtils::FindMinMax(tensorHandle);
101 
102  // For first calibration dataset, set min/max range in RangeTracker to
103  // min/max ranges gathered during inference
104  if (m_RefineCount == 0)
105  {
106  m_Ranges.ResetMinMax(guid, slotIndex, minMax.first, minMax.second);
107  }
108  else
109  {
110  // For every other calibration dataset, only set min/max range if the
111  // values gathered are less than / greater than originally recorded.
112  m_Ranges.RefineMin(guid, slotIndex, minMax.first);
113  m_Ranges.RefineMax(guid, slotIndex, minMax.second);
114  }
115  };
116 
117  m_Runtime->RegisterDebugCallback(m_NetworkId, rangeTrackerCallback);
118  }
119 
120  // Create output tensor for EnqueueWorkload
121  std::vector<armnn::BindingPointInfo> outputBindings;
122  auto outputLayers = m_DynamicQuantizationVisitor.value().GetOutputLayers();
123  std::vector<TContainer> outputVectors;
124  for (auto outputLayerBindingId : outputLayers)
125  {
126  auto outputTensorInfo = m_Runtime->GetOutputTensorInfo(m_NetworkId, outputLayerBindingId);
127  outputBindings.push_back(std::make_pair(outputLayerBindingId, outputTensorInfo));
128  outputVectors.push_back(std::vector<float>(outputTensorInfo.GetNumElements(), 0));
129  }
130  OutputTensors outputTensors = armnnUtils::MakeOutputTensors<TContainer>(outputBindings, outputVectors);
131 
132  // Execute EnqueueWorkload with calibration image
133  m_Runtime->EnqueueWorkload(m_NetworkId, inputTensors, outputTensors);
134  ++m_RefineCount;
135 }
136 
138 {
139  const Graph& graph = PolymorphicDowncast<const Network*>(m_InputNetwork)->GetGraph().TopologicalSort();
140 
141  // Step 1) Walk the graph and populate default min/max values for
142  // intermediate tensors, only if Runtime does not exist (created
143  // if Refine has been called)
144  if (!m_Runtime)
145  {
146  m_Ranges.SetDynamicMode(false);
147  StaticRangeVisitor rangeVisitor(m_Ranges);
148  VisitLayers(graph, rangeVisitor);
149  }
150  else
151  {
152  // Set min/max range of non-calibrated layers to parent layer's range
153  m_DynamicQuantizationVisitor.value().VisitNonCalibratedLayers();
154  // now tear down the runtime and the dynamic visitor.
155  m_Runtime.reset(nullptr);
156  m_DynamicQuantizationVisitor = EmptyOptional();
157  m_RefineCount = 0;
158  }
159 
160  // Step 2) Convert input InputNetwork to Quantized InputNetwork
161  std::unique_ptr<IQuantizationScheme> quantizationScheme;
162  switch (m_Options.m_ActivationFormat)
163  {
164  case DataType::QAsymmU8:
165  quantizationScheme = std::make_unique<QAsymmU8QuantizationScheme>();
166  break;
167  case DataType::QAsymmS8:
168  quantizationScheme = std::make_unique<QAsymmS8QuantizationScheme>();
169  break;
170  case DataType::QSymmS8:
171  quantizationScheme = std::make_unique<QSymmS8QuantizationScheme>();
172  break;
173  case DataType::QSymmS16:
174  quantizationScheme = std::make_unique<QSymm16QuantizationScheme>();
175  break;
176  default:
177  throw InvalidArgumentException("Unsupported quantization target");
178  }
179 
180  QuantizerVisitor quantizerVisitor(m_Ranges, quantizationScheme.get(), m_Options.m_PreserveType);
181  VisitLayers(graph, quantizerVisitor);
182 
183  // clear the ranges
184  m_Ranges.Reset();
185 
186  return quantizerVisitor.RetrieveFinalNetwork();
187 }
188 
189 } //namespace armn
static IRuntimePtr Create(const CreationOptions &options)
Definition: Runtime.cpp:31
Visitor object for overriding the input range of the quantized input layers in a network.
std::unique_ptr< class INetworkQuantizer, void(*)(INetworkQuantizer *quantizer)> INetworkQuantizerPtr
Main network class which provides the interface for building up a neural network. ...
Definition: INetwork.hpp:105
std::vector< std::pair< LayerBindingId, class ConstTensor > > InputTensors
Definition: Tensor.hpp:225
Copyright (c) 2020 ARM Limited.
void VisitLayers(const LayerContainer &layerContainer, ILayerVisitor &visitor)
std::pair< float, float > MinMaxRange
int LayerBindingId
Type of identifiers for bindable layers (inputs, outputs).
Definition: Types.hpp:171
static void Destroy(INetworkQuantizer *quantizer)
Destroy Quantizer object.
Visitor class to establish min/max ranges based on the type of the layer.
void Refine(const InputTensors &inputTensors) override
Refine input network with a set of refinement data for specified LayerBindingId.
void OverrideInputRange(LayerBindingId layerId, float min, float max) override
Overrides the default quantization values for the input layer with the given id.
IOptimizedNetworkPtr Optimize(const INetwork &network, const std::vector< BackendId > &backendPreferences, const IDeviceSpec &deviceSpec, const OptimizerOptions &options=OptimizerOptions(), Optional< std::vector< std::string > &> messages=EmptyOptional())
Create an optimized version of the network.
Definition: Network.cpp:1003
static INetworkQuantizer * CreateRaw(INetwork *inputNetwork, const QuantizerOptions &options=QuantizerOptions())
Create Quantizer object and return raw pointer.
std::vector< std::pair< LayerBindingId, class Tensor > > OutputTensors
Definition: Tensor.hpp:226
std::unique_ptr< IOptimizedNetwork, void(*)(IOptimizedNetwork *network)> IOptimizedNetworkPtr
Definition: INetwork.hpp:573
std::pair< float, float > FindMinMax(armnn::ITensorHandle *tensorHandle)
Definition: TensorUtils.cpp:58
Visitor object for quantizing layers in a network.
boost::variant< std::vector< float >, std::vector< int >, std::vector< unsigned char > > TContainer
EmptyOptional is used to initialize the Optional class in case we want to have default value for an O...
Definition: Optional.hpp:32
Visitor class to establish min/max ranges based on the type of the layer.
Graph & TopologicalSort()
Sorts layers in topological order and return this.
Definition: Graph.hpp:174
InputLayersAccessor GetInputLayers() const
Returns a wrapper object with begin(), end() methods to iterate over the input layers in a range-base...
Definition: Graph.hpp:182
std::unique_ptr< INetwork, void(*)(INetwork *network)> INetworkPtr
Definition: INetwork.hpp:101
Quantizer class Quantizes a float32 InputNetwork.
armnn::Runtime::CreationOptions::ExternalProfilingOptions options
INetworkPtr ExportNetwork() override
Extract final quantized network.
static INetworkQuantizerPtr Create(INetwork *inputNetwork, const QuantizerOptions &options=QuantizerOptions())
Create Quantizer object wrapped in unique_ptr.