ArmNN
 20.11
NetworkQuantizer Class Reference

#include <NetworkQuantizer.hpp>

Inheritance diagram for NetworkQuantizer:
INetworkQuantizer

Public Member Functions

 NetworkQuantizer (INetwork *inputNetwork, const QuantizerOptions &options)
 
void OverrideInputRange (LayerBindingId layerId, float min, float max) override
 Overrides the default quantization values for the input layer with the given id. More...
 
void Refine (const InputTensors &inputTensors) override
 Refine input network with a set of refinement data for specified LayerBindingId. More...
 
std::pair< float, float > GetMinMaxRange (LayerGuid guid, unsigned int idx)
 
INetworkPtr ExportNetwork () override
 Extract final quantized network. More...
 

Additional Inherited Members

- Static Public Member Functions inherited from INetworkQuantizer
static INetworkQuantizerCreateRaw (INetwork *inputNetwork, const QuantizerOptions &options=QuantizerOptions())
 Create Quantizer object and return raw pointer. More...
 
static INetworkQuantizerPtr Create (INetwork *inputNetwork, const QuantizerOptions &options=QuantizerOptions())
 Create Quantizer object wrapped in unique_ptr. More...
 
static void Destroy (INetworkQuantizer *quantizer)
 Destroy Quantizer object. More...
 
- Protected Member Functions inherited from INetworkQuantizer
virtual ~INetworkQuantizer ()
 

Detailed Description

Definition at line 20 of file NetworkQuantizer.hpp.

Constructor & Destructor Documentation

◆ NetworkQuantizer()

NetworkQuantizer ( INetwork inputNetwork,
const QuantizerOptions options 
)
inline

Definition at line 23 of file NetworkQuantizer.hpp.

References NetworkQuantizer::OverrideInputRange(), and NetworkQuantizer::Refine().

24  : m_InputNetwork(inputNetwork),
25  m_NetworkId(0),
26  m_Runtime(nullptr, &IRuntime::Destroy),
27  m_RefineCount(0),
28  m_Options(options) {}
static void Destroy(IRuntime *runtime)
Definition: Runtime.cpp:37

Member Function Documentation

◆ ExportNetwork()

INetworkPtr ExportNetwork ( )
overridevirtual

Extract final quantized network.

Implements INetworkQuantizer.

Definition at line 137 of file NetworkQuantizer.cpp.

References armnn::QAsymmS8, armnn::QAsymmU8, armnn::QSymmS16, armnn::QSymmS8, Graph::TopologicalSort(), and armnn::VisitLayers().

Referenced by NetworkQuantizer::GetMinMaxRange().

138 {
139  const Graph& graph = PolymorphicDowncast<const Network*>(m_InputNetwork)->GetGraph().TopologicalSort();
140 
141  // Step 1) Walk the graph and populate default min/max values for
142  // intermediate tensors, only if Runtime does not exist (created
143  // if Refine has been called)
144  if (!m_Runtime)
145  {
146  m_Ranges.SetDynamicMode(false);
147  StaticRangeVisitor rangeVisitor(m_Ranges);
148  VisitLayers(graph, rangeVisitor);
149  }
150  else
151  {
152  // Set min/max range of non-calibrated layers to parent layer's range
153  m_DynamicQuantizationVisitor.value().VisitNonCalibratedLayers();
154  // now tear down the runtime and the dynamic visitor.
155  m_Runtime.reset(nullptr);
156  m_DynamicQuantizationVisitor = EmptyOptional();
157  m_RefineCount = 0;
158  }
159 
160  // Step 2) Convert input InputNetwork to Quantized InputNetwork
161  std::unique_ptr<IQuantizationScheme> quantizationScheme;
162  switch (m_Options.m_ActivationFormat)
163  {
164  case DataType::QAsymmU8:
165  quantizationScheme = std::make_unique<QAsymmU8QuantizationScheme>();
166  break;
167  case DataType::QAsymmS8:
168  quantizationScheme = std::make_unique<QAsymmS8QuantizationScheme>();
169  break;
170  case DataType::QSymmS8:
171  quantizationScheme = std::make_unique<QSymmS8QuantizationScheme>();
172  break;
173  case DataType::QSymmS16:
174  quantizationScheme = std::make_unique<QSymm16QuantizationScheme>();
175  break;
176  default:
177  throw InvalidArgumentException("Unsupported quantization target");
178  }
179 
180  QuantizerVisitor quantizerVisitor(m_Ranges, quantizationScheme.get(), m_Options.m_PreserveType);
181  VisitLayers(graph, quantizerVisitor);
182 
183  // clear the ranges
184  m_Ranges.Reset();
185 
186  return quantizerVisitor.RetrieveFinalNetwork();
187 }
void VisitLayers(const LayerContainer &layerContainer, ILayerVisitor &visitor)
void SetDynamicMode(bool flag)

◆ GetMinMaxRange()

std::pair<float, float> GetMinMaxRange ( LayerGuid  guid,
unsigned int  idx 
)
inline

Definition at line 34 of file NetworkQuantizer.hpp.

References NetworkQuantizer::ExportNetwork(), and RangeTracker::GetRange().

34 { return m_Ranges.GetRange(guid, idx); }
MinMaxRange GetRange(LayerGuid guid, unsigned int idx) const
Retrieve the Range for a particular output slot on a particular layer.

◆ OverrideInputRange()

void OverrideInputRange ( LayerBindingId  layerId,
float  min,
float  max 
)
overridevirtual

Overrides the default quantization values for the input layer with the given id.

Implements INetworkQuantizer.

Definition at line 51 of file NetworkQuantizer.cpp.

References Graph::GetInputLayers(), and armnn::VisitLayers().

Referenced by NetworkQuantizer::NetworkQuantizer().

52 {
53  const Graph& graph = PolymorphicDowncast<const Network*>(m_InputNetwork)->GetGraph();
54  auto inputLayers = graph.GetInputLayers();
55 
56  // Walk the input layers of the graph and override the quantization parameters of the one with the given id
57  OverrideInputRangeVisitor overrideInputRangeVisitor(m_Ranges, layerId, RangeTracker::MinMaxRange{min, max});
58  VisitLayers(inputLayers, overrideInputRangeVisitor);
59 }
void VisitLayers(const LayerContainer &layerContainer, ILayerVisitor &visitor)
std::pair< float, float > MinMaxRange

◆ Refine()

void Refine ( const InputTensors inputTensors)
overridevirtual

Refine input network with a set of refinement data for specified LayerBindingId.

Implements INetworkQuantizer.

Definition at line 61 of file NetworkQuantizer.cpp.

References IRuntime::Create(), armnnUtils::FindMinMax(), armnn::Optimize(), Graph::TopologicalSort(), and armnn::VisitLayers().

Referenced by NetworkQuantizer::NetworkQuantizer().

62 {
63  // The first time Refine is called the m_Runtime and the DynamicQuantizationVisitor
64  // will not have been created. Need to get the environment set up, Runtime loaded,
65  // DynamicQuantizationVisitor created and run over the network to initialise itself
66  // and the RangeTracker the Debug callback registered and an initial inference
67  // done to set up the first min/max values
68  if (!m_Runtime)
69  {
70  m_RefineCount = 0;
71  m_Ranges.SetDynamicMode(true);
72  const Graph& cGraph = PolymorphicDowncast<const Network*>(m_InputNetwork)->GetGraph().TopologicalSort();
73 
74  // need to insert Debug layers in the DynamicQuantizationVisitor
75  Graph& graph = const_cast<Graph&>(cGraph);
76 
77  // Initialize RangeTracker to the default values for each layer.
78  // The default values are overwritten by the min/max that is
79  // recorded during the first dataset min/max calibration. This
80  // initialisation is only required for the first call of Refine().
81  m_DynamicQuantizationVisitor = DynamicQuantizationVisitor(m_Ranges, graph);
82  VisitLayers(cGraph, m_DynamicQuantizationVisitor.value());
83 
84  IRuntime::CreationOptions options;
85  m_Runtime = IRuntime::Create(options);
86 
87  // Optimize network - debug already enabled for layers that require quantization
88  OptimizerOptions optimizerOptions(false, false);
89  std::vector<BackendId> backends = {"CpuRef"};
90  IOptimizedNetworkPtr optimizedNet = Optimize(*m_InputNetwork,
91  backends,
92  m_Runtime->GetDeviceSpec(),
93  optimizerOptions);
94 
95  m_Runtime->LoadNetwork(m_NetworkId, std::move(optimizedNet));
96 
97  // Debug callback function to refine min/max in RangeTracker
98  auto rangeTrackerCallback = [&](LayerGuid guid, unsigned int slotIndex, ITensorHandle *tensorHandle) {
99  // Get min/max pair from tensor data
100  std::pair<float, float> minMax = armnnUtils::FindMinMax(tensorHandle);
101 
102  // For first calibration dataset, set min/max range in RangeTracker to
103  // min/max ranges gathered during inference
104  if (m_RefineCount == 0)
105  {
106  m_Ranges.ResetMinMax(guid, slotIndex, minMax.first, minMax.second);
107  }
108  else
109  {
110  // For every other calibration dataset, only set min/max range if the
111  // values gathered are less than / greater than originally recorded.
112  m_Ranges.RefineMin(guid, slotIndex, minMax.first);
113  m_Ranges.RefineMax(guid, slotIndex, minMax.second);
114  }
115  };
116 
117  m_Runtime->RegisterDebugCallback(m_NetworkId, rangeTrackerCallback);
118  }
119 
120  // Create output tensor for EnqueueWorkload
121  std::vector<armnn::BindingPointInfo> outputBindings;
122  auto outputLayers = m_DynamicQuantizationVisitor.value().GetOutputLayers();
123  std::vector<TContainer> outputVectors;
124  for (auto outputLayerBindingId : outputLayers)
125  {
126  auto outputTensorInfo = m_Runtime->GetOutputTensorInfo(m_NetworkId, outputLayerBindingId);
127  outputBindings.push_back(std::make_pair(outputLayerBindingId, outputTensorInfo));
128  outputVectors.push_back(std::vector<float>(outputTensorInfo.GetNumElements(), 0));
129  }
130  OutputTensors outputTensors = armnnUtils::MakeOutputTensors<TContainer>(outputBindings, outputVectors);
131 
132  // Execute EnqueueWorkload with calibration image
133  m_Runtime->EnqueueWorkload(m_NetworkId, inputTensors, outputTensors);
134  ++m_RefineCount;
135 }
static IRuntimePtr Create(const CreationOptions &options)
Definition: Runtime.cpp:32
void ResetMinMax(LayerGuid guid, unsigned int idx, float newMin, float newMax)
Overwrite min and max in RangeTracker with newMin and newMax.
void RefineMax(LayerGuid guid, unsigned int slotIndex, float newMax)
Update max in RangeTracker with new_max if it is greater than current value.
void VisitLayers(const LayerContainer &layerContainer, ILayerVisitor &visitor)
IOptimizedNetworkPtr Optimize(const INetwork &network, const std::vector< BackendId > &backendPreferences, const IDeviceSpec &deviceSpec, const OptimizerOptions &options=OptimizerOptions(), Optional< std::vector< std::string > &> messages=EmptyOptional())
Create an optimized version of the network.
Definition: Network.cpp:1011
std::vector< std::pair< LayerBindingId, class Tensor > > OutputTensors
Definition: Tensor.hpp:341
std::unique_ptr< IOptimizedNetwork, void(*)(IOptimizedNetwork *network)> IOptimizedNetworkPtr
Definition: INetwork.hpp:600
std::pair< float, float > FindMinMax(armnn::ITensorHandle *tensorHandle)
Definition: TensorUtils.cpp:58
void SetDynamicMode(bool flag)
profiling::ProfilingGuid LayerGuid
Define LayerGuid type.
Definition: Types.hpp:267
void RefineMin(LayerGuid guid, unsigned int slotIndex, float newMin)
Update min in RangeTracker with new_min if it is lower than current value.

The documentation for this class was generated from the following files: