ArmNN  NotReleased
NetworkQuantizer Class Reference

#include <NetworkQuantizer.hpp>

Inheritance diagram for NetworkQuantizer:
INetworkQuantizer

Public Member Functions

 NetworkQuantizer (INetwork *inputNetwork, const QuantizerOptions &options)
 
void OverrideInputRange (LayerBindingId layerId, float min, float max) override
 Overrides the default quantization values for the input layer with the given id. More...
 
void Refine (const InputTensors &inputTensors) override
 Refine input network with a set of refinement data for specified LayerBindingId. More...
 
std::pair< float, float > GetMinMaxRange (LayerGuid guid, unsigned int idx)
 
INetworkPtr ExportNetwork () override
 Extract final quantized network. More...
 

Additional Inherited Members

- Static Public Member Functions inherited from INetworkQuantizer
static INetworkQuantizerCreateRaw (INetwork *inputNetwork, const QuantizerOptions &options=QuantizerOptions())
 Create Quantizer object and return raw pointer. More...
 
static INetworkQuantizerPtr Create (INetwork *inputNetwork, const QuantizerOptions &options=QuantizerOptions())
 Create Quantizer object wrapped in unique_ptr. More...
 
static void Destroy (INetworkQuantizer *quantizer)
 Destroy Quantizer object. More...
 
- Protected Member Functions inherited from INetworkQuantizer
virtual ~INetworkQuantizer ()
 

Detailed Description

Definition at line 20 of file NetworkQuantizer.hpp.

Constructor & Destructor Documentation

◆ NetworkQuantizer()

NetworkQuantizer ( INetwork inputNetwork,
const QuantizerOptions options 
)
inline

Definition at line 23 of file NetworkQuantizer.hpp.

References NetworkQuantizer::OverrideInputRange(), and NetworkQuantizer::Refine().

24  : m_InputNetwork(inputNetwork),
25  m_NetworkId(0),
26  m_Runtime(nullptr, &IRuntime::Destroy),
27  m_RefineCount(0),
28  m_Options(options) {}
static void Destroy(IRuntime *runtime)
Definition: Runtime.cpp:37
armnn::Runtime::CreationOptions::ExternalProfilingOptions options

Member Function Documentation

◆ ExportNetwork()

INetworkPtr ExportNetwork ( )
overridevirtual

Extract final quantized network.

Implements INetworkQuantizer.

Definition at line 136 of file NetworkQuantizer.cpp.

References armnn::QAsymmS8, armnn::QAsymmU8, armnn::QSymmS16, armnn::QSymmS8, Graph::TopologicalSort(), and armnn::VisitLayers().

Referenced by NetworkQuantizer::GetMinMaxRange().

137 {
138  const Graph& graph = boost::polymorphic_downcast<const Network*>(m_InputNetwork)->GetGraph().TopologicalSort();
139 
140  // Step 1) Walk the graph and populate default min/max values for
141  // intermediate tensors, only if Runtime does not exist (created
142  // if Refine has been called)
143  if (!m_Runtime)
144  {
145  m_Ranges.SetDynamicMode(false);
146  StaticRangeVisitor rangeVisitor(m_Ranges);
147  VisitLayers(graph, rangeVisitor);
148  }
149  else
150  {
151  // Set min/max range of non-calibrated layers to parent layer's range
152  m_DynamicQuantizationVisitor.value().VisitNonCalibratedLayers();
153  // now tear down the runtime and the dynamic visitor.
154  m_Runtime.reset(nullptr);
155  m_DynamicQuantizationVisitor = EmptyOptional();
156  m_RefineCount = 0;
157  }
158 
159  // Step 2) Convert input InputNetwork to Quantized InputNetwork
160  std::unique_ptr<IQuantizationScheme> quantizationScheme;
161  switch (m_Options.m_ActivationFormat)
162  {
163  case DataType::QAsymmU8:
164  quantizationScheme = std::make_unique<QAsymmU8QuantizationScheme>();
165  break;
166  case DataType::QAsymmS8:
167  quantizationScheme = std::make_unique<QAsymmS8QuantizationScheme>();
168  break;
169  case DataType::QSymmS8:
170  quantizationScheme = std::make_unique<QSymmS8QuantizationScheme>();
171  break;
172  case DataType::QSymmS16:
173  quantizationScheme = std::make_unique<QSymm16QuantizationScheme>();
174  break;
175  default:
176  throw InvalidArgumentException("Unsupported quantization target");
177  }
178 
179  QuantizerVisitor quantizerVisitor(m_Ranges, quantizationScheme.get(), m_Options.m_PreserveType);
180  VisitLayers(graph, quantizerVisitor);
181 
182  // clear the ranges
183  m_Ranges.Reset();
184 
185  return quantizerVisitor.RetrieveFinalNetwork();
186 }
void VisitLayers(const LayerContainer &layerContainer, ILayerVisitor &visitor)
void SetDynamicMode(bool flag)

◆ GetMinMaxRange()

std::pair<float, float> GetMinMaxRange ( LayerGuid  guid,
unsigned int  idx 
)
inline

Definition at line 34 of file NetworkQuantizer.hpp.

References NetworkQuantizer::ExportNetwork(), and RangeTracker::GetRange().

34 { return m_Ranges.GetRange(guid, idx); }
MinMaxRange GetRange(LayerGuid guid, unsigned int idx) const
Retrieve the Range for a particular output slot on a particular layer.

◆ OverrideInputRange()

void OverrideInputRange ( LayerBindingId  layerId,
float  min,
float  max 
)
overridevirtual

Overrides the default quantization values for the input layer with the given id.

Implements INetworkQuantizer.

Definition at line 50 of file NetworkQuantizer.cpp.

References Graph::GetInputLayers(), and armnn::VisitLayers().

Referenced by NetworkQuantizer::NetworkQuantizer().

51 {
52  const Graph& graph = boost::polymorphic_downcast<const Network*>(m_InputNetwork)->GetGraph();
53  auto inputLayers = graph.GetInputLayers();
54 
55  // Walk the input layers of the graph and override the quantization parameters of the one with the given id
56  OverrideInputRangeVisitor overrideInputRangeVisitor(m_Ranges, layerId, RangeTracker::MinMaxRange{min, max});
57  VisitLayers(inputLayers, overrideInputRangeVisitor);
58 }
void VisitLayers(const LayerContainer &layerContainer, ILayerVisitor &visitor)
std::pair< float, float > MinMaxRange

◆ Refine()

void Refine ( const InputTensors inputTensors)
overridevirtual

Refine input network with a set of refinement data for specified LayerBindingId.

Implements INetworkQuantizer.

Definition at line 60 of file NetworkQuantizer.cpp.

References IRuntime::Create(), armnnUtils::FindMinMax(), armnn::Optimize(), options, Graph::TopologicalSort(), and armnn::VisitLayers().

Referenced by NetworkQuantizer::NetworkQuantizer().

61 {
62  // The first time Refine is called the m_Runtime and the DynamicQuantizationVisitor
63  // will not have been created. Need to get the environment set up, Runtime loaded,
64  // DynamicQuantizationVisitor created and run over the network to initialise itself
65  // and the RangeTracker the Debug callback registered and an initial inference
66  // done to set up the first min/max values
67  if (!m_Runtime)
68  {
69  m_RefineCount = 0;
70  m_Ranges.SetDynamicMode(true);
71  const Graph& cGraph = boost::polymorphic_downcast<const Network*>(m_InputNetwork)->GetGraph().TopologicalSort();
72 
73  // need to insert Debug layers in the DynamicQuantizationVisitor
74  Graph& graph = const_cast<Graph&>(cGraph);
75 
76  // Initialize RangeTracker to the default values for each layer.
77  // The default values are overwritten by the min/max that is
78  // recorded during the first dataset min/max calibration. This
79  // initialisation is only required for the first call of Refine().
80  m_DynamicQuantizationVisitor = DynamicQuantizationVisitor(m_Ranges, graph);
81  VisitLayers(cGraph, m_DynamicQuantizationVisitor.value());
82 
83  IRuntime::CreationOptions options;
84  m_Runtime = IRuntime::Create(options);
85 
86  // Optimize network - debug already enabled for layers that require quantization
87  OptimizerOptions optimizerOptions(false, false);
88  std::vector<BackendId> backends = {"CpuRef"};
89  IOptimizedNetworkPtr optimizedNet = Optimize(*m_InputNetwork,
90  backends,
91  m_Runtime->GetDeviceSpec(),
92  optimizerOptions);
93 
94  m_Runtime->LoadNetwork(m_NetworkId, std::move(optimizedNet));
95 
96  // Debug callback function to refine min/max in RangeTracker
97  auto rangeTrackerCallback = [&](LayerGuid guid, unsigned int slotIndex, ITensorHandle *tensorHandle) {
98  // Get min/max pair from tensor data
99  std::pair<float, float> minMax = armnnUtils::FindMinMax(tensorHandle);
100 
101  // For first calibration dataset, set min/max range in RangeTracker to
102  // min/max ranges gathered during inference
103  if (m_RefineCount == 0)
104  {
105  m_Ranges.ResetMinMax(guid, slotIndex, minMax.first, minMax.second);
106  }
107  else
108  {
109  // For every other calibration dataset, only set min/max range if the
110  // values gathered are less than / greater than originally recorded.
111  m_Ranges.RefineMin(guid, slotIndex, minMax.first);
112  m_Ranges.RefineMax(guid, slotIndex, minMax.second);
113  }
114  };
115 
116  m_Runtime->RegisterDebugCallback(m_NetworkId, rangeTrackerCallback);
117  }
118 
119  // Create output tensor for EnqueueWorkload
120  std::vector<armnn::BindingPointInfo> outputBindings;
121  auto outputLayers = m_DynamicQuantizationVisitor.value().GetOutputLayers();
122  std::vector<TContainer> outputVectors;
123  for (auto outputLayerBindingId : outputLayers)
124  {
125  auto outputTensorInfo = m_Runtime->GetOutputTensorInfo(m_NetworkId, outputLayerBindingId);
126  outputBindings.push_back(std::make_pair(outputLayerBindingId, outputTensorInfo));
127  outputVectors.push_back(std::vector<float>(outputTensorInfo.GetNumElements(), 0));
128  }
129  OutputTensors outputTensors = armnnUtils::MakeOutputTensors<TContainer>(outputBindings, outputVectors);
130 
131  // Execute EnqueueWorkload with calibration image
132  m_Runtime->EnqueueWorkload(m_NetworkId, inputTensors, outputTensors);
133  ++m_RefineCount;
134 }
std::pair< float, float > FindMinMax(armnn::ITensorHandle *tensorHandle)
Definition: TensorUtils.cpp:58
std::vector< std::pair< LayerBindingId, class Tensor > > OutputTensors
Definition: Tensor.hpp:226
void VisitLayers(const LayerContainer &layerContainer, ILayerVisitor &visitor)
IOptimizedNetworkPtr Optimize(const INetwork &network, const std::vector< BackendId > &backendPreferences, const IDeviceSpec &deviceSpec, const OptimizerOptions &options=OptimizerOptions(), Optional< std::vector< std::string > &> messages=EmptyOptional())
Definition: Network.cpp:807
void RefineMax(LayerGuid guid, unsigned int slotIndex, float newMax)
Update max in RangeTracker with new_max if it is greater than current value.
void RefineMin(LayerGuid guid, unsigned int slotIndex, float newMin)
Update min in RangeTracker with new_min if it is lower than current value.
static IRuntimePtr Create(const CreationOptions &options)
Definition: Runtime.cpp:32
std::unique_ptr< IOptimizedNetwork, void(*)(IOptimizedNetwork *network)> IOptimizedNetworkPtr
Definition: INetwork.hpp:544
void ResetMinMax(LayerGuid guid, unsigned int idx, float newMin, float newMax)
Overwrite min and max in RangeTracker with newMin and newMax.
armnn::Runtime::CreationOptions::ExternalProfilingOptions options
profiling::ProfilingGuid LayerGuid
Define LayerGuid type.
Definition: Types.hpp:233
void SetDynamicMode(bool flag)

The documentation for this class was generated from the following files: