aboutsummaryrefslogtreecommitdiff
path: root/src/armnn/NetworkQuantizer.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'src/armnn/NetworkQuantizer.cpp')
-rw-r--r--src/armnn/NetworkQuantizer.cpp106
1 files changed, 102 insertions, 4 deletions
diff --git a/src/armnn/NetworkQuantizer.cpp b/src/armnn/NetworkQuantizer.cpp
index 4692a6803f..12e459d276 100644
--- a/src/armnn/NetworkQuantizer.cpp
+++ b/src/armnn/NetworkQuantizer.cpp
@@ -7,6 +7,8 @@
#include <armnn/INetwork.hpp>
#include <armnn/Tensor.hpp>
#include <armnn/Types.hpp>
+#include <TensorUtils.hpp>
+#include <TensorIOUtils.hpp>
#include "Graph.hpp"
#include "Layer.hpp"
@@ -14,6 +16,7 @@
#include "NetworkQuantizer.hpp"
#include "NetworkQuantizerUtils.hpp"
+#include "DynamicQuantizationVisitor.hpp"
#include "StaticRangeVisitor.hpp"
#include "QuantizerVisitor.hpp"
#include "OverrideInputRangeVisitor.hpp"
@@ -21,9 +24,15 @@
#include <vector>
#include <cmath>
+#include <boost/variant.hpp>
+
+
namespace armnn
{
+using TContainer = boost::variant<std::vector<float>, std::vector<int>, std::vector<unsigned char>>;
+
+
INetworkQuantizer* INetworkQuantizer::CreateRaw(INetwork* inputNetwork, const QuantizerOptions& options)
{
return new NetworkQuantizer(inputNetwork, options);
@@ -51,16 +60,102 @@ void NetworkQuantizer::OverrideInputRange(LayerBindingId layerId, float min, flo
void NetworkQuantizer::Refine(const InputTensors& inputTensors)
{
- //Implementation in a following commit
+ // The first time Refine is called the m_Runtime and the DynamicQuantizationVisitor
+ // will not have been created. Need to get the environment set up, Runtime loaded,
+ // DynamicQuantizationVisitor created and run over the network to initialise itself
+ // and the RangeTracker the Debug callback registered and an initial inference
+ // done to set up the first min/max values
+ if (!m_Runtime)
+ {
+ m_RefineCount = 0;
+ m_Ranges.SetDynamicMode(true);
+ const Graph& cGraph = boost::polymorphic_downcast<const Network*>(m_InputNetwork)->GetGraph().TopologicalSort();
+
+ // need to insert Debug layers in the DynamicQuantizationVisitor
+ Graph& graph = const_cast<Graph&>(cGraph);
+
+ // Initialize RangeTracker to the default values for each layer.
+ // The default values are overwritten by the min/max that is
+ // recorded during the first dataset min/max calibration. This
+ // initialisation is only required for the first call of Refine().
+ m_DynamicQuantizationVisitor = DynamicQuantizationVisitor(m_Ranges, graph);
+ VisitLayers(cGraph, m_DynamicQuantizationVisitor.value());
+
+ IRuntime::CreationOptions options;
+ m_Runtime = IRuntime::Create(options);
+
+ // Optimize network - debug already enabled for layers that require quantization
+ OptimizerOptions optimizerOptions(false, false);
+ std::vector<BackendId> backends = {"CpuRef"};
+ IOptimizedNetworkPtr optimizedNet = Optimize(*m_InputNetwork,
+ backends,
+ m_Runtime->GetDeviceSpec(),
+ optimizerOptions);
+
+ m_Runtime->LoadNetwork(m_NetworkId, std::move(optimizedNet));
+
+ // Debug callback function to refine min/max in RangeTracker
+ auto rangeTrackerCallback = [&](LayerGuid guid, unsigned int slotIndex, ITensorHandle *tensorHandle) {
+ // Get min/max pair from tensor data
+ std::pair<float, float> minMax = armnnUtils::FindMinMax(tensorHandle);
+
+ // For first calibration dataset, set min/max range in RangeTracker to
+ // min/max ranges gathered during inference
+ if (m_RefineCount == 0)
+ {
+ m_Ranges.ResetMinMax(guid, slotIndex, minMax.first, minMax.second);
+ }
+ else
+ {
+ // For every other calibration dataset, only set min/max range if the
+ // values gathered are less than / greater than originally recorded.
+ m_Ranges.RefineMin(guid, slotIndex, minMax.first);
+ m_Ranges.RefineMax(guid, slotIndex, minMax.second);
+ }
+ };
+
+ m_Runtime->RegisterDebugCallback(m_NetworkId, rangeTrackerCallback);
+ }
+
+ // Create output tensor for EnqueueWorkload
+ std::vector<armnn::BindingPointInfo> outputBindings;
+ auto outputLayers = m_DynamicQuantizationVisitor.value().GetOutputLayers();
+ std::vector<TContainer> outputVectors;
+ for (auto outputLayerBindingId : outputLayers)
+ {
+ auto outputTensorInfo = m_Runtime->GetOutputTensorInfo(m_NetworkId, outputLayerBindingId);
+ outputBindings.push_back(std::make_pair(outputLayerBindingId, outputTensorInfo));
+ outputVectors.push_back(std::vector<float>(outputTensorInfo.GetNumElements(), 0));
+ }
+ OutputTensors outputTensors = armnnUtils::MakeOutputTensors<TContainer>(outputBindings, outputVectors);
+
+ // Execute EnqueueWorkload with calibration image
+ m_Runtime->EnqueueWorkload(m_NetworkId, inputTensors, outputTensors);
+ ++m_RefineCount;
}
INetworkPtr NetworkQuantizer::ExportNetwork()
{
const Graph& graph = boost::polymorphic_downcast<const Network*>(m_InputNetwork)->GetGraph().TopologicalSort();
- // Step 1) Walk the graph and register min/max values for intermediate tensors
- StaticRangeVisitor rangeVisitor(m_Ranges);
- VisitLayers(graph, rangeVisitor);
+ // Step 1) Walk the graph and populate default min/max values for
+ // intermediate tensors, only if Runtime does not exist (created
+ // if Refine has been called)
+ if (!m_Runtime)
+ {
+ m_Ranges.SetDynamicMode(false);
+ StaticRangeVisitor rangeVisitor(m_Ranges);
+ VisitLayers(graph, rangeVisitor);
+ }
+ else
+ {
+ // Set min/max range of non-calibrated layers to parent layer's range
+ m_DynamicQuantizationVisitor.value().VisitNonCalibratedLayers();
+ // now tear down the runtime and the dynamic visitor.
+ m_Runtime.reset(nullptr);
+ m_DynamicQuantizationVisitor = EmptyOptional();
+ m_RefineCount = 0;
+ }
// Step 2) Convert input InputNetwork to Quantized InputNetwork
std::unique_ptr<IQuantizationScheme> quantizationScheme;
@@ -79,6 +174,9 @@ INetworkPtr NetworkQuantizer::ExportNetwork()
QuantizerVisitor quantizerVisitor(m_Ranges, quantizationScheme.get());
VisitLayers(graph, quantizerVisitor);
+ // clear the ranges
+ m_Ranges.Reset();
+
return quantizerVisitor.RetrieveFinalNetwork();
}