25 #include <boost/variant.hpp> 33 using TContainer = boost::variant<std::vector<float>, std::vector<int>, std::vector<unsigned char>>;
47 delete boost::polymorphic_downcast<NetworkQuantizer*>(quantizer);
52 const Graph& graph = boost::polymorphic_downcast<const Network*>(m_InputNetwork)->GetGraph();
57 VisitLayers(inputLayers, overrideInputRangeVisitor);
70 m_Ranges.SetDynamicMode(
true);
71 const Graph& cGraph = boost::polymorphic_downcast<const Network*>(m_InputNetwork)->GetGraph().
TopologicalSort();
81 VisitLayers(cGraph, m_DynamicQuantizationVisitor.value());
88 std::vector<BackendId> backends = {
"CpuRef"};
91 m_Runtime->GetDeviceSpec(),
94 m_Runtime->LoadNetwork(m_NetworkId, std::move(optimizedNet));
103 if (m_RefineCount == 0)
105 m_Ranges.ResetMinMax(guid, slotIndex, minMax.first, minMax.second);
111 m_Ranges.RefineMin(guid, slotIndex, minMax.first);
112 m_Ranges.RefineMax(guid, slotIndex, minMax.second);
116 m_Runtime->RegisterDebugCallback(m_NetworkId, rangeTrackerCallback);
120 std::vector<armnn::BindingPointInfo> outputBindings;
121 auto outputLayers = m_DynamicQuantizationVisitor.value().GetOutputLayers();
122 std::vector<TContainer> outputVectors;
123 for (
auto outputLayerBindingId : outputLayers)
125 auto outputTensorInfo = m_Runtime->GetOutputTensorInfo(m_NetworkId, outputLayerBindingId);
126 outputBindings.push_back(std::make_pair(outputLayerBindingId, outputTensorInfo));
127 outputVectors.push_back(std::vector<float>(outputTensorInfo.GetNumElements(), 0));
129 OutputTensors outputTensors = armnnUtils::MakeOutputTensors<TContainer>(outputBindings, outputVectors);
132 m_Runtime->EnqueueWorkload(m_NetworkId, inputTensors, outputTensors);
138 const Graph& graph = boost::polymorphic_downcast<const Network*>(m_InputNetwork)->GetGraph().
TopologicalSort();
145 m_Ranges.SetDynamicMode(
false);
152 m_DynamicQuantizationVisitor.value().VisitNonCalibratedLayers();
154 m_Runtime.reset(
nullptr);
160 std::unique_ptr<IQuantizationScheme> quantizationScheme;
161 switch (m_Options.m_ActivationFormat)
164 quantizationScheme = std::make_unique<QAsymmU8QuantizationScheme>();
167 quantizationScheme = std::make_unique<QAsymmS8QuantizationScheme>();
170 quantizationScheme = std::make_unique<QSymmS8QuantizationScheme>();
173 quantizationScheme = std::make_unique<QSymm16QuantizationScheme>();
179 QuantizerVisitor quantizerVisitor(m_Ranges, quantizationScheme.get(), m_Options.m_PreserveType);
185 return quantizerVisitor.RetrieveFinalNetwork();
Graph & TopologicalSort()
Sorts layers in topological order and return this.
std::pair< float, float > FindMinMax(armnn::ITensorHandle *tensorHandle)
std::vector< std::pair< LayerBindingId, class Tensor > > OutputTensors
void VisitLayers(const LayerContainer &layerContainer, ILayerVisitor &visitor)
IOptimizedNetworkPtr Optimize(const INetwork &network, const std::vector< BackendId > &backendPreferences, const IDeviceSpec &deviceSpec, const OptimizerOptions &options=OptimizerOptions(), Optional< std::vector< std::string > &> messages=EmptyOptional())
static void Destroy(INetworkQuantizer *quantizer)
Destroy Quantizer object.
void Refine(const InputTensors &inputTensors) override
Refine input network with a set of refinement data for specified LayerBindingId.
std::pair< float, float > MinMaxRange
std::unique_ptr< class INetworkQuantizer, void(*)(INetworkQuantizer *quantizer)> INetworkQuantizerPtr
std::vector< std::pair< LayerBindingId, class ConstTensor > > InputTensors
Quantizer class Quantizes a float32 InputNetwork.
INetworkPtr ExportNetwork() override
Extract final quantized network.
Visitor class to establish min/max ranges based on the type of the layer.
InputLayersAccessor GetInputLayers() const
std::unique_ptr< INetwork, void(*)(INetwork *network)> INetworkPtr
static IRuntimePtr Create(const CreationOptions &options)
Visitor object for quantizing layers in a network.
boost::variant< std::vector< float >, std::vector< int >, std::vector< unsigned char > > TContainer
static INetworkQuantizerPtr Create(INetwork *inputNetwork, const QuantizerOptions &options=QuantizerOptions())
Create Quantizer object wrapped in unique_ptr.
std::unique_ptr< IOptimizedNetwork, void(*)(IOptimizedNetwork *network)> IOptimizedNetworkPtr
void OverrideInputRange(LayerBindingId layerId, float min, float max) override
Overrides the default quantization values for the input layer with the given id.
armnn::Runtime::CreationOptions::ExternalProfilingOptions options
Visitor class to establish min/max ranges based on the type of the layer.
int LayerBindingId
Type of identifiers for bindable layers (inputs, outputs).
static INetworkQuantizer * CreateRaw(INetwork *inputNetwork, const QuantizerOptions &options=QuantizerOptions())
Create Quantizer object and return raw pointer.