ArmNN
 21.02
NetworkQuantizer.cpp
Go to the documentation of this file.
1 //
2 // Copyright © 2017 Arm Ltd. All rights reserved.
3 // SPDX-License-Identifier: MIT
4 //
5 
6 #include "NetworkQuantizer.hpp"
8 #include "Graph.hpp"
9 #include "Layer.hpp"
10 #include "Network.hpp"
12 #include "StaticRangeStrategy.hpp"
13 #include "QuantizerStrategy.hpp"
15 
16 #include <TensorIOUtils.hpp>
17 
18 #include <armnn/ILayerVisitor.hpp>
19 #include <armnn/INetwork.hpp>
20 #include <armnn/Tensor.hpp>
21 #include <armnn/Types.hpp>
22 
25 
26 #include <mapbox/variant.hpp>
27 
28 #include <vector>
29 #include <cmath>
30 
31 namespace armnn
32 {
33 
34 using TContainer = mapbox::util::variant<std::vector<float>, std::vector<int>, std::vector<unsigned char>>;
35 
37 {
38  return new NetworkQuantizer(inputNetwork, options);
39 }
40 
42 {
43  return INetworkQuantizerPtr(CreateRaw(inputNetwork, options), &INetworkQuantizer::Destroy);
44 }
45 
47 {
48  delete PolymorphicDowncast<NetworkQuantizer*>(quantizer);
49 }
50 
51 void NetworkQuantizer::OverrideInputRange(LayerBindingId layerId, float min, float max)
52 {
53  const Graph& graph = m_InputNetwork->pNetworkImpl->GetGraph();
54  auto inputLayers = graph.GetInputLayers();
55 
56  // Walk the input layers of the graph and override the quantization parameters of the one with the given id
57  OverrideInputRangeVisitor overrideInputRangeVisitor(m_Ranges, layerId, RangeTracker::MinMaxRange{min, max});
58  VisitLayers(inputLayers, overrideInputRangeVisitor);
59 }
60 
61 void NetworkQuantizer::Refine(const InputTensors& inputTensors)
62 {
63  // The first time Refine is called the m_Runtime and the DynamicQuantizationStrategy
64  // will not have been created. Need to get the environment set up, Runtime loaded,
65  // DynamicQuantizationStrategy created and run over the network to initialise itself
66  // and the RangeTracker the Debug callback registered and an initial inference
67  // done to set up the first min/max values
68  if (!m_Runtime)
69  {
70  m_RefineCount = 0;
71  m_Ranges.SetDynamicMode(true);
72  const Graph& cGraph = m_InputNetwork->pNetworkImpl->GetGraph().TopologicalSort();
73 
74  // need to insert Debug layers in the DynamicQuantizationStrategy
75  Graph& graph = const_cast<Graph&>(cGraph);
76 
77  // Initialize RangeTracker to the default values for each layer.
78  // The default values are overwritten by the min/max that is
79  // recorded during the first dataset min/max calibration. This
80  // initialisation is only required for the first call of Refine().
81  m_DynamicQuantizationStrategy = DynamicQuantizationStrategy(m_Ranges, graph);
82  ApplyStrategyToLayers(cGraph, m_DynamicQuantizationStrategy.value());
83 
85  m_Runtime = IRuntime::Create(options);
86 
87  // Optimize network - debug already enabled for layers that require quantization
88  OptimizerOptions optimizerOptions(false, false);
89  std::vector<BackendId> backends = {"CpuRef"};
90  IOptimizedNetworkPtr optimizedNet = Optimize(*m_InputNetwork,
91  backends,
92  m_Runtime->GetDeviceSpec(),
93  optimizerOptions);
94 
95  m_Runtime->LoadNetwork(m_NetworkId, std::move(optimizedNet));
96 
97  // Debug callback function to refine min/max in RangeTracker
98  auto rangeTrackerCallback = [&](LayerGuid guid, unsigned int slotIndex, ITensorHandle *tensorHandle) {
99  // Get min/max pair from tensor data
100  std::pair<float, float> minMax = armnnUtils::FindMinMax(tensorHandle);
101 
102  // For first calibration dataset, set min/max range in RangeTracker to
103  // min/max ranges gathered during inference
104  if (m_RefineCount == 0)
105  {
106  m_Ranges.ResetMinMax(guid, slotIndex, minMax.first, minMax.second);
107  }
108  else
109  {
110  // For every other calibration dataset, only set min/max range if the
111  // values gathered are less than / greater than originally recorded.
112  m_Ranges.RefineMin(guid, slotIndex, minMax.first);
113  m_Ranges.RefineMax(guid, slotIndex, minMax.second);
114  }
115  };
116 
117  m_Runtime->RegisterDebugCallback(m_NetworkId, rangeTrackerCallback);
118  }
119 
120  // Create output tensor for EnqueueWorkload
121  std::vector<armnn::BindingPointInfo> outputBindings;
122  auto outputLayers = m_DynamicQuantizationStrategy.value().GetOutputLayers();
123  std::vector<TContainer> outputVectors;
124  for (auto outputLayerBindingId : outputLayers)
125  {
126  auto outputTensorInfo = m_Runtime->GetOutputTensorInfo(m_NetworkId, outputLayerBindingId);
127  outputBindings.push_back(std::make_pair(outputLayerBindingId, outputTensorInfo));
128  outputVectors.push_back(std::vector<float>(outputTensorInfo.GetNumElements(), 0));
129  }
130  OutputTensors outputTensors = armnnUtils::MakeOutputTensors<TContainer>(outputBindings, outputVectors);
131 
132  // Execute EnqueueWorkload with calibration image
133  m_Runtime->EnqueueWorkload(m_NetworkId, inputTensors, outputTensors);
134  ++m_RefineCount;
135 }
136 
138 {
139  const Graph& graph = m_InputNetwork->pNetworkImpl->GetGraph().TopologicalSort();
140 
141  // Step 1) Walk the graph and populate default min/max values for
142  // intermediate tensors, only if Runtime does not exist (created
143  // if Refine has been called)
144  if (!m_Runtime)
145  {
146  m_Ranges.SetDynamicMode(false);
147  StaticRangeStrategy rangeStrategy(m_Ranges);
148  ApplyStrategyToLayers(graph, rangeStrategy);
149  }
150  else
151  {
152  // Set min/max range of non-calibrated layers to parent layer's range
153  m_DynamicQuantizationStrategy.value().VisitNonCalibratedLayers();
154  // now tear down the runtime and the dynamic visitor.
155  m_Runtime.reset(nullptr);
156  m_DynamicQuantizationStrategy = EmptyOptional();
157  m_RefineCount = 0;
158  }
159 
160  // Step 2) Convert input InputNetwork to Quantized InputNetwork
161  std::unique_ptr<IQuantizationScheme> quantizationScheme;
162  switch (m_Options.m_ActivationFormat)
163  {
164  case DataType::QAsymmU8:
165  quantizationScheme = std::make_unique<QAsymmU8QuantizationScheme>();
166  break;
167  case DataType::QAsymmS8:
168  quantizationScheme = std::make_unique<QAsymmS8QuantizationScheme>();
169  break;
170  case DataType::QSymmS8:
171  quantizationScheme = std::make_unique<QSymmS8QuantizationScheme>();
172  break;
173  case DataType::QSymmS16:
174  quantizationScheme = std::make_unique<QSymm16QuantizationScheme>();
175  break;
176  default:
177  throw InvalidArgumentException("Unsupported quantization target");
178  }
179 
180  QuantizerStrategy quantizerVisitor(m_Ranges, quantizationScheme.get(), m_Options.m_PreserveType);
181  ApplyStrategyToLayers(graph, quantizerVisitor);
182 
183  // clear the ranges
184  m_Ranges.Reset();
185 
186  return quantizerVisitor.RetrieveFinalNetwork();
187 }
188 
189 } //namespace armn
static IRuntimePtr Create(const CreationOptions &options)
Definition: Runtime.cpp:37
Visitor object for overriding the input range of the quantized input layers in a network.
std::unique_ptr< class INetworkQuantizer, void(*)(INetworkQuantizer *quantizer)> INetworkQuantizerPtr
Main network class which provides the interface for building up a neural network. ...
Definition: INetwork.hpp:178
std::vector< std::pair< LayerBindingId, class ConstTensor > > InputTensors
Definition: Tensor.hpp:340
Copyright (c) 2021 ARM Limited and Contributors.
void VisitLayers(const LayerContainer &layerContainer, ILayerVisitor &visitor)
std::pair< float, float > MinMaxRange
int LayerBindingId
Type of identifiers for bindable layers (inputs, outputs).
Definition: Types.hpp:210
mapbox::util::variant< std::vector< float >, std::vector< int >, std::vector< unsigned char > > TContainer
static void Destroy(INetworkQuantizer *quantizer)
Destroy Quantizer object.
void Refine(const InputTensors &inputTensors) override
Refine input network with a set of refinement data for specified LayerBindingId.
void OverrideInputRange(LayerBindingId layerId, float min, float max) override
Overrides the default quantization values for the input layer with the given id.
IOptimizedNetworkPtr Optimize(const INetwork &network, const std::vector< BackendId > &backendPreferences, const IDeviceSpec &deviceSpec, const OptimizerOptions &options=OptimizerOptions(), Optional< std::vector< std::string > &> messages=EmptyOptional())
Create an optimized version of the network.
Definition: Network.cpp:1502
void ApplyStrategyToLayers(const LayerContainer &layerContainer, IStrategy &strategy)
static INetworkQuantizer * CreateRaw(INetwork *inputNetwork, const QuantizerOptions &options=QuantizerOptions())
Create Quantizer object and return raw pointer.
std::vector< std::pair< LayerBindingId, class Tensor > > OutputTensors
Definition: Tensor.hpp:341
std::unique_ptr< IOptimizedNetwork, void(*)(IOptimizedNetwork *network)> IOptimizedNetworkPtr
Definition: INetwork.hpp:174
std::pair< float, float > FindMinMax(armnn::ITensorHandle *tensorHandle)
Definition: TensorUtils.cpp:58
EmptyOptional is used to initialize the Optional class in case we want to have default value for an O...
Definition: Optional.hpp:32
Visitor class implementation to gather the TensorInfo for LayerBindingID for creation of ConstTensor ...
Graph & TopologicalSort()
Sorts layers in topological order and return this.
Definition: Graph.hpp:177
InputLayersAccessor GetInputLayers() const
Returns a wrapper object with begin(), end() methods to iterate over the input layers in a range-base...
Definition: Graph.hpp:185
std::unique_ptr< INetwork, void(*)(INetwork *network)> INetworkPtr
Definition: INetwork.hpp:173
Quantizer class Quantizes a float32 InputNetwork.
INetworkPtr ExportNetwork() override
Extract final quantized network.
static INetworkQuantizerPtr Create(INetwork *inputNetwork, const QuantizerOptions &options=QuantizerOptions())
Create Quantizer object wrapped in unique_ptr.