25 #include <boost/variant.hpp> 33 using TContainer = boost::variant<std::vector<float>, std::vector<int>, std::vector<unsigned char>>;
47 delete boost::polymorphic_downcast<NetworkQuantizer*>(quantizer);
52 const Graph& graph = boost::polymorphic_downcast<const Network*>(m_InputNetwork)->GetGraph();
57 VisitLayers(inputLayers, overrideInputRangeVisitor);
70 m_Ranges.SetDynamicMode(
true);
71 const Graph& cGraph = boost::polymorphic_downcast<const Network*>(m_InputNetwork)->GetGraph().
TopologicalSort();
81 VisitLayers(cGraph, m_DynamicQuantizationVisitor.value());
88 std::vector<BackendId> backends = {
"CpuRef"};
91 m_Runtime->GetDeviceSpec(),
94 m_Runtime->LoadNetwork(m_NetworkId, std::move(optimizedNet));
103 if (m_RefineCount == 0)
105 m_Ranges.ResetMinMax(guid, slotIndex, minMax.first, minMax.second);
111 m_Ranges.RefineMin(guid, slotIndex, minMax.first);
112 m_Ranges.RefineMax(guid, slotIndex, minMax.second);
116 m_Runtime->RegisterDebugCallback(m_NetworkId, rangeTrackerCallback);
120 std::vector<armnn::BindingPointInfo> outputBindings;
121 auto outputLayers = m_DynamicQuantizationVisitor.value().GetOutputLayers();
122 std::vector<TContainer> outputVectors;
123 for (
auto outputLayerBindingId : outputLayers)
125 auto outputTensorInfo = m_Runtime->GetOutputTensorInfo(m_NetworkId, outputLayerBindingId);
126 outputBindings.push_back(std::make_pair(outputLayerBindingId, outputTensorInfo));
127 outputVectors.push_back(std::vector<float>(outputTensorInfo.GetNumElements(), 0));
129 OutputTensors outputTensors = armnnUtils::MakeOutputTensors<TContainer>(outputBindings, outputVectors);
132 m_Runtime->EnqueueWorkload(m_NetworkId, inputTensors, outputTensors);
138 const Graph& graph = boost::polymorphic_downcast<const Network*>(m_InputNetwork)->GetGraph().
TopologicalSort();
145 m_Ranges.SetDynamicMode(
false);
152 m_DynamicQuantizationVisitor.value().VisitNonCalibratedLayers();
154 m_Runtime.reset(
nullptr);
160 std::unique_ptr<IQuantizationScheme> quantizationScheme;
161 switch (m_Options.m_ActivationFormat)
164 quantizationScheme = std::make_unique<QAsymmU8QuantizationScheme>();
167 quantizationScheme = std::make_unique<QAsymmS8QuantizationScheme>();
170 quantizationScheme = std::make_unique<QSymmS8QuantizationScheme>();
173 quantizationScheme = std::make_unique<QSymm16QuantizationScheme>();
179 QuantizerVisitor quantizerVisitor(m_Ranges, quantizationScheme.get(), m_Options.m_PreserveType);
185 return quantizerVisitor.RetrieveFinalNetwork();
static IRuntimePtr Create(const CreationOptions &options)
std::unique_ptr< class INetworkQuantizer, void(*)(INetworkQuantizer *quantizer)> INetworkQuantizerPtr
Main network class which provides the interface for building up a neural network. ...
std::vector< std::pair< LayerBindingId, class ConstTensor > > InputTensors
Copyright (c) 2020 ARM Limited.
void VisitLayers(const LayerContainer &layerContainer, ILayerVisitor &visitor)
std::pair< float, float > MinMaxRange
int LayerBindingId
Type of identifiers for bindable layers (inputs, outputs).
static void Destroy(INetworkQuantizer *quantizer)
Destroy Quantizer object.
Visitor class to establish min/max ranges based on the type of the layer.
void Refine(const InputTensors &inputTensors) override
Refine input network with a set of refinement data for specified LayerBindingId.
void OverrideInputRange(LayerBindingId layerId, float min, float max) override
Overrides the default quantization values for the input layer with the given id.
IOptimizedNetworkPtr Optimize(const INetwork &network, const std::vector< BackendId > &backendPreferences, const IDeviceSpec &deviceSpec, const OptimizerOptions &options=OptimizerOptions(), Optional< std::vector< std::string > &> messages=EmptyOptional())
Create an optimized version of the network.
static INetworkQuantizer * CreateRaw(INetwork *inputNetwork, const QuantizerOptions &options=QuantizerOptions())
Create Quantizer object and return raw pointer.
std::vector< std::pair< LayerBindingId, class Tensor > > OutputTensors
std::unique_ptr< IOptimizedNetwork, void(*)(IOptimizedNetwork *network)> IOptimizedNetworkPtr
std::pair< float, float > FindMinMax(armnn::ITensorHandle *tensorHandle)
Visitor object for quantizing layers in a network.
boost::variant< std::vector< float >, std::vector< int >, std::vector< unsigned char > > TContainer
EmptyOptional is used to initialize the Optional class in case we want to have default value for an O...
Visitor class to establish min/max ranges based on the type of the layer.
Graph & TopologicalSort()
Sorts layers in topological order and return this.
InputLayersAccessor GetInputLayers() const
Returns a wrapper object with begin(), end() methods to iterate over the input layers in a range-base...
std::unique_ptr< INetwork, void(*)(INetwork *network)> INetworkPtr
Quantizer class Quantizes a float32 InputNetwork.
armnn::Runtime::CreationOptions::ExternalProfilingOptions options
INetworkPtr ExportNetwork() override
Extract final quantized network.
static INetworkQuantizerPtr Create(INetwork *inputNetwork, const QuantizerOptions &options=QuantizerOptions())
Create Quantizer object wrapped in unique_ptr.