diff options
Diffstat (limited to 'src/armnn/NetworkQuantizer.cpp')
-rw-r--r-- | src/armnn/NetworkQuantizer.cpp | 106 |
1 files changed, 102 insertions, 4 deletions
diff --git a/src/armnn/NetworkQuantizer.cpp b/src/armnn/NetworkQuantizer.cpp index 4692a6803f..12e459d276 100644 --- a/src/armnn/NetworkQuantizer.cpp +++ b/src/armnn/NetworkQuantizer.cpp @@ -7,6 +7,8 @@ #include <armnn/INetwork.hpp> #include <armnn/Tensor.hpp> #include <armnn/Types.hpp> +#include <TensorUtils.hpp> +#include <TensorIOUtils.hpp> #include "Graph.hpp" #include "Layer.hpp" @@ -14,6 +16,7 @@ #include "NetworkQuantizer.hpp" #include "NetworkQuantizerUtils.hpp" +#include "DynamicQuantizationVisitor.hpp" #include "StaticRangeVisitor.hpp" #include "QuantizerVisitor.hpp" #include "OverrideInputRangeVisitor.hpp" @@ -21,9 +24,15 @@ #include <vector> #include <cmath> +#include <boost/variant.hpp> + + namespace armnn { +using TContainer = boost::variant<std::vector<float>, std::vector<int>, std::vector<unsigned char>>; + + INetworkQuantizer* INetworkQuantizer::CreateRaw(INetwork* inputNetwork, const QuantizerOptions& options) { return new NetworkQuantizer(inputNetwork, options); @@ -51,16 +60,102 @@ void NetworkQuantizer::OverrideInputRange(LayerBindingId layerId, float min, flo void NetworkQuantizer::Refine(const InputTensors& inputTensors) { - //Implementation in a following commit + // The first time Refine is called the m_Runtime and the DynamicQuantizationVisitor + // will not have been created. Need to get the environment set up, Runtime loaded, + // DynamicQuantizationVisitor created and run over the network to initialise itself + // and the RangeTracker the Debug callback registered and an initial inference + // done to set up the first min/max values + if (!m_Runtime) + { + m_RefineCount = 0; + m_Ranges.SetDynamicMode(true); + const Graph& cGraph = boost::polymorphic_downcast<const Network*>(m_InputNetwork)->GetGraph().TopologicalSort(); + + // need to insert Debug layers in the DynamicQuantizationVisitor + Graph& graph = const_cast<Graph&>(cGraph); + + // Initialize RangeTracker to the default values for each layer. + // The default values are overwritten by the min/max that is + // recorded during the first dataset min/max calibration. This + // initialisation is only required for the first call of Refine(). + m_DynamicQuantizationVisitor = DynamicQuantizationVisitor(m_Ranges, graph); + VisitLayers(cGraph, m_DynamicQuantizationVisitor.value()); + + IRuntime::CreationOptions options; + m_Runtime = IRuntime::Create(options); + + // Optimize network - debug already enabled for layers that require quantization + OptimizerOptions optimizerOptions(false, false); + std::vector<BackendId> backends = {"CpuRef"}; + IOptimizedNetworkPtr optimizedNet = Optimize(*m_InputNetwork, + backends, + m_Runtime->GetDeviceSpec(), + optimizerOptions); + + m_Runtime->LoadNetwork(m_NetworkId, std::move(optimizedNet)); + + // Debug callback function to refine min/max in RangeTracker + auto rangeTrackerCallback = [&](LayerGuid guid, unsigned int slotIndex, ITensorHandle *tensorHandle) { + // Get min/max pair from tensor data + std::pair<float, float> minMax = armnnUtils::FindMinMax(tensorHandle); + + // For first calibration dataset, set min/max range in RangeTracker to + // min/max ranges gathered during inference + if (m_RefineCount == 0) + { + m_Ranges.ResetMinMax(guid, slotIndex, minMax.first, minMax.second); + } + else + { + // For every other calibration dataset, only set min/max range if the + // values gathered are less than / greater than originally recorded. + m_Ranges.RefineMin(guid, slotIndex, minMax.first); + m_Ranges.RefineMax(guid, slotIndex, minMax.second); + } + }; + + m_Runtime->RegisterDebugCallback(m_NetworkId, rangeTrackerCallback); + } + + // Create output tensor for EnqueueWorkload + std::vector<armnn::BindingPointInfo> outputBindings; + auto outputLayers = m_DynamicQuantizationVisitor.value().GetOutputLayers(); + std::vector<TContainer> outputVectors; + for (auto outputLayerBindingId : outputLayers) + { + auto outputTensorInfo = m_Runtime->GetOutputTensorInfo(m_NetworkId, outputLayerBindingId); + outputBindings.push_back(std::make_pair(outputLayerBindingId, outputTensorInfo)); + outputVectors.push_back(std::vector<float>(outputTensorInfo.GetNumElements(), 0)); + } + OutputTensors outputTensors = armnnUtils::MakeOutputTensors<TContainer>(outputBindings, outputVectors); + + // Execute EnqueueWorkload with calibration image + m_Runtime->EnqueueWorkload(m_NetworkId, inputTensors, outputTensors); + ++m_RefineCount; } INetworkPtr NetworkQuantizer::ExportNetwork() { const Graph& graph = boost::polymorphic_downcast<const Network*>(m_InputNetwork)->GetGraph().TopologicalSort(); - // Step 1) Walk the graph and register min/max values for intermediate tensors - StaticRangeVisitor rangeVisitor(m_Ranges); - VisitLayers(graph, rangeVisitor); + // Step 1) Walk the graph and populate default min/max values for + // intermediate tensors, only if Runtime does not exist (created + // if Refine has been called) + if (!m_Runtime) + { + m_Ranges.SetDynamicMode(false); + StaticRangeVisitor rangeVisitor(m_Ranges); + VisitLayers(graph, rangeVisitor); + } + else + { + // Set min/max range of non-calibrated layers to parent layer's range + m_DynamicQuantizationVisitor.value().VisitNonCalibratedLayers(); + // now tear down the runtime and the dynamic visitor. + m_Runtime.reset(nullptr); + m_DynamicQuantizationVisitor = EmptyOptional(); + m_RefineCount = 0; + } // Step 2) Convert input InputNetwork to Quantized InputNetwork std::unique_ptr<IQuantizationScheme> quantizationScheme; @@ -79,6 +174,9 @@ INetworkPtr NetworkQuantizer::ExportNetwork() QuantizerVisitor quantizerVisitor(m_Ranges, quantizationScheme.get()); VisitLayers(graph, quantizerVisitor); + // clear the ranges + m_Ranges.Reset(); + return quantizerVisitor.RetrieveFinalNetwork(); } |