ArmNN
 20.02
NetworkQuantizer Class Reference

#include <NetworkQuantizer.hpp>

Inheritance diagram for NetworkQuantizer:
INetworkQuantizer

Public Member Functions

 NetworkQuantizer (INetwork *inputNetwork, const QuantizerOptions &options)
 
void OverrideInputRange (LayerBindingId layerId, float min, float max) override
 Overrides the default quantization values for the input layer with the given id. More...
 
void Refine (const InputTensors &inputTensors) override
 Refine input network with a set of refinement data for specified LayerBindingId. More...
 
std::pair< float, float > GetMinMaxRange (LayerGuid guid, unsigned int idx)
 
INetworkPtr ExportNetwork () override
 Extract final quantized network. More...
 

Additional Inherited Members

- Static Public Member Functions inherited from INetworkQuantizer
static INetworkQuantizerCreateRaw (INetwork *inputNetwork, const QuantizerOptions &options=QuantizerOptions())
 Create Quantizer object and return raw pointer. More...
 
static INetworkQuantizerPtr Create (INetwork *inputNetwork, const QuantizerOptions &options=QuantizerOptions())
 Create Quantizer object wrapped in unique_ptr. More...
 
static void Destroy (INetworkQuantizer *quantizer)
 Destroy Quantizer object. More...
 
- Protected Member Functions inherited from INetworkQuantizer
virtual ~INetworkQuantizer ()
 

Detailed Description

Definition at line 20 of file NetworkQuantizer.hpp.

Constructor & Destructor Documentation

◆ NetworkQuantizer()

NetworkQuantizer ( INetwork inputNetwork,
const QuantizerOptions options 
)
inline

Definition at line 23 of file NetworkQuantizer.hpp.

References NetworkQuantizer::OverrideInputRange(), and NetworkQuantizer::Refine().

24  : m_InputNetwork(inputNetwork),
25  m_NetworkId(0),
26  m_Runtime(nullptr, &IRuntime::Destroy),
27  m_RefineCount(0),
28  m_Options(options) {}
static void Destroy(IRuntime *runtime)
Definition: Runtime.cpp:37
armnn::Runtime::CreationOptions::ExternalProfilingOptions options

Member Function Documentation

◆ ExportNetwork()

INetworkPtr ExportNetwork ( )
overridevirtual

Extract final quantized network.

Implements INetworkQuantizer.

Definition at line 136 of file NetworkQuantizer.cpp.

References armnn::QAsymmS8, armnn::QAsymmU8, armnn::QSymmS16, armnn::QSymmS8, Graph::TopologicalSort(), and armnn::VisitLayers().

Referenced by NetworkQuantizer::GetMinMaxRange().

137 {
138  const Graph& graph = boost::polymorphic_downcast<const Network*>(m_InputNetwork)->GetGraph().TopologicalSort();
139 
140  // Step 1) Walk the graph and populate default min/max values for
141  // intermediate tensors, only if Runtime does not exist (created
142  // if Refine has been called)
143  if (!m_Runtime)
144  {
145  m_Ranges.SetDynamicMode(false);
146  StaticRangeVisitor rangeVisitor(m_Ranges);
147  VisitLayers(graph, rangeVisitor);
148  }
149  else
150  {
151  // Set min/max range of non-calibrated layers to parent layer's range
152  m_DynamicQuantizationVisitor.value().VisitNonCalibratedLayers();
153  // now tear down the runtime and the dynamic visitor.
154  m_Runtime.reset(nullptr);
155  m_DynamicQuantizationVisitor = EmptyOptional();
156  m_RefineCount = 0;
157  }
158 
159  // Step 2) Convert input InputNetwork to Quantized InputNetwork
160  std::unique_ptr<IQuantizationScheme> quantizationScheme;
161  switch (m_Options.m_ActivationFormat)
162  {
163  case DataType::QAsymmU8:
164  quantizationScheme = std::make_unique<QAsymmU8QuantizationScheme>();
165  break;
166  case DataType::QAsymmS8:
167  quantizationScheme = std::make_unique<QAsymmS8QuantizationScheme>();
168  break;
169  case DataType::QSymmS8:
170  quantizationScheme = std::make_unique<QSymmS8QuantizationScheme>();
171  break;
172  case DataType::QSymmS16:
173  quantizationScheme = std::make_unique<QSymm16QuantizationScheme>();
174  break;
175  default:
176  throw InvalidArgumentException("Unsupported quantization target");
177  }
178 
179  QuantizerVisitor quantizerVisitor(m_Ranges, quantizationScheme.get(), m_Options.m_PreserveType);
180  VisitLayers(graph, quantizerVisitor);
181 
182  // clear the ranges
183  m_Ranges.Reset();
184 
185  return quantizerVisitor.RetrieveFinalNetwork();
186 }
void VisitLayers(const LayerContainer &layerContainer, ILayerVisitor &visitor)
void SetDynamicMode(bool flag)

◆ GetMinMaxRange()

std::pair<float, float> GetMinMaxRange ( LayerGuid  guid,
unsigned int  idx 
)
inline

Definition at line 34 of file NetworkQuantizer.hpp.

References NetworkQuantizer::ExportNetwork(), and RangeTracker::GetRange().

34 { return m_Ranges.GetRange(guid, idx); }
MinMaxRange GetRange(LayerGuid guid, unsigned int idx) const
Retrieve the Range for a particular output slot on a particular layer.

◆ OverrideInputRange()

void OverrideInputRange ( LayerBindingId  layerId,
float  min,
float  max 
)
overridevirtual

Overrides the default quantization values for the input layer with the given id.

Implements INetworkQuantizer.

Definition at line 50 of file NetworkQuantizer.cpp.

References Graph::GetInputLayers(), and armnn::VisitLayers().

Referenced by NetworkQuantizer::NetworkQuantizer().

51 {
52  const Graph& graph = boost::polymorphic_downcast<const Network*>(m_InputNetwork)->GetGraph();
53  auto inputLayers = graph.GetInputLayers();
54 
55  // Walk the input layers of the graph and override the quantization parameters of the one with the given id
56  OverrideInputRangeVisitor overrideInputRangeVisitor(m_Ranges, layerId, RangeTracker::MinMaxRange{min, max});
57  VisitLayers(inputLayers, overrideInputRangeVisitor);
58 }
void VisitLayers(const LayerContainer &layerContainer, ILayerVisitor &visitor)
std::pair< float, float > MinMaxRange

◆ Refine()

void Refine ( const InputTensors inputTensors)
overridevirtual

Refine input network with a set of refinement data for specified LayerBindingId.

Implements INetworkQuantizer.

Definition at line 60 of file NetworkQuantizer.cpp.

References IRuntime::Create(), armnnUtils::FindMinMax(), armnn::Optimize(), options, Graph::TopologicalSort(), and armnn::VisitLayers().

Referenced by NetworkQuantizer::NetworkQuantizer().

61 {
62  // The first time Refine is called the m_Runtime and the DynamicQuantizationVisitor
63  // will not have been created. Need to get the environment set up, Runtime loaded,
64  // DynamicQuantizationVisitor created and run over the network to initialise itself
65  // and the RangeTracker the Debug callback registered and an initial inference
66  // done to set up the first min/max values
67  if (!m_Runtime)
68  {
69  m_RefineCount = 0;
70  m_Ranges.SetDynamicMode(true);
71  const Graph& cGraph = boost::polymorphic_downcast<const Network*>(m_InputNetwork)->GetGraph().TopologicalSort();
72 
73  // need to insert Debug layers in the DynamicQuantizationVisitor
74  Graph& graph = const_cast<Graph&>(cGraph);
75 
76  // Initialize RangeTracker to the default values for each layer.
77  // The default values are overwritten by the min/max that is
78  // recorded during the first dataset min/max calibration. This
79  // initialisation is only required for the first call of Refine().
80  m_DynamicQuantizationVisitor = DynamicQuantizationVisitor(m_Ranges, graph);
81  VisitLayers(cGraph, m_DynamicQuantizationVisitor.value());
82 
83  IRuntime::CreationOptions options;
84  m_Runtime = IRuntime::Create(options);
85 
86  // Optimize network - debug already enabled for layers that require quantization
87  OptimizerOptions optimizerOptions(false, false);
88  std::vector<BackendId> backends = {"CpuRef"};
89  IOptimizedNetworkPtr optimizedNet = Optimize(*m_InputNetwork,
90  backends,
91  m_Runtime->GetDeviceSpec(),
92  optimizerOptions);
93 
94  m_Runtime->LoadNetwork(m_NetworkId, std::move(optimizedNet));
95 
96  // Debug callback function to refine min/max in RangeTracker
97  auto rangeTrackerCallback = [&](LayerGuid guid, unsigned int slotIndex, ITensorHandle *tensorHandle) {
98  // Get min/max pair from tensor data
99  std::pair<float, float> minMax = armnnUtils::FindMinMax(tensorHandle);
100 
101  // For first calibration dataset, set min/max range in RangeTracker to
102  // min/max ranges gathered during inference
103  if (m_RefineCount == 0)
104  {
105  m_Ranges.ResetMinMax(guid, slotIndex, minMax.first, minMax.second);
106  }
107  else
108  {
109  // For every other calibration dataset, only set min/max range if the
110  // values gathered are less than / greater than originally recorded.
111  m_Ranges.RefineMin(guid, slotIndex, minMax.first);
112  m_Ranges.RefineMax(guid, slotIndex, minMax.second);
113  }
114  };
115 
116  m_Runtime->RegisterDebugCallback(m_NetworkId, rangeTrackerCallback);
117  }
118 
119  // Create output tensor for EnqueueWorkload
120  std::vector<armnn::BindingPointInfo> outputBindings;
121  auto outputLayers = m_DynamicQuantizationVisitor.value().GetOutputLayers();
122  std::vector<TContainer> outputVectors;
123  for (auto outputLayerBindingId : outputLayers)
124  {
125  auto outputTensorInfo = m_Runtime->GetOutputTensorInfo(m_NetworkId, outputLayerBindingId);
126  outputBindings.push_back(std::make_pair(outputLayerBindingId, outputTensorInfo));
127  outputVectors.push_back(std::vector<float>(outputTensorInfo.GetNumElements(), 0));
128  }
129  OutputTensors outputTensors = armnnUtils::MakeOutputTensors<TContainer>(outputBindings, outputVectors);
130 
131  // Execute EnqueueWorkload with calibration image
132  m_Runtime->EnqueueWorkload(m_NetworkId, inputTensors, outputTensors);
133  ++m_RefineCount;
134 }
static IRuntimePtr Create(const CreationOptions &options)
Definition: Runtime.cpp:32
void ResetMinMax(LayerGuid guid, unsigned int idx, float newMin, float newMax)
Overwrite min and max in RangeTracker with newMin and newMax.
void RefineMax(LayerGuid guid, unsigned int slotIndex, float newMax)
Update max in RangeTracker with new_max if it is greater than current value.
void VisitLayers(const LayerContainer &layerContainer, ILayerVisitor &visitor)
IOptimizedNetworkPtr Optimize(const INetwork &network, const std::vector< BackendId > &backendPreferences, const IDeviceSpec &deviceSpec, const OptimizerOptions &options=OptimizerOptions(), Optional< std::vector< std::string > &> messages=EmptyOptional())
Create an optimized version of the network.
Definition: Network.cpp:890
std::vector< std::pair< LayerBindingId, class Tensor > > OutputTensors
Definition: Tensor.hpp:226
std::unique_ptr< IOptimizedNetwork, void(*)(IOptimizedNetwork *network)> IOptimizedNetworkPtr
Definition: INetwork.hpp:566
std::pair< float, float > FindMinMax(armnn::ITensorHandle *tensorHandle)
Definition: TensorUtils.cpp:58
void SetDynamicMode(bool flag)
profiling::ProfilingGuid LayerGuid
Define LayerGuid type.
Definition: Types.hpp:236
void RefineMin(LayerGuid guid, unsigned int slotIndex, float newMin)
Update min in RangeTracker with new_min if it is lower than current value.
armnn::Runtime::CreationOptions::ExternalProfilingOptions options

The documentation for this class was generated from the following files: