aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJim Flynn <jim.flynn@arm.com>2019-05-02 11:33:25 +0100
committerRuomei Yan <ruomei.yan@arm.com>2019-05-08 16:10:59 +0000
commitf92dfced4498f12b9315c0fa377ba7be8998b607 (patch)
tree4015208a5493ea414babba76c18bc72b6dbef875
parenta4247d5a50502811a6956dffd990c0254622b7e1 (diff)
downloadarmnn-f92dfced4498f12b9315c0fa377ba7be8998b607.tar.gz
IVGCVSW-2833 Add Dynamic Quantization
Change-Id: Iba91e3f3625639f01d66f81a9f3e419e0e285d66 Signed-off-by: Jim Flynn <jim.flynn@arm.com>
-rw-r--r--CMakeLists.txt4
-rw-r--r--include/armnn/ILayerVisitor.hpp2
-rw-r--r--include/armnn/TypesUtils.hpp4
-rw-r--r--include/armnnQuantizer/INetworkQuantizer.hpp (renamed from include/armnn/INetworkQuantizer.hpp)0
-rw-r--r--src/armnn/DynamicQuantizationVisitor.cpp330
-rw-r--r--src/armnn/DynamicQuantizationVisitor.hpp137
-rw-r--r--src/armnn/NetworkQuantizer.cpp106
-rw-r--r--src/armnn/NetworkQuantizer.hpp27
-rw-r--r--src/armnn/NetworkQuantizerUtils.hpp2
-rw-r--r--src/armnn/QuantizerVisitor.hpp2
-rw-r--r--src/armnn/RangeTracker.cpp55
-rw-r--r--src/armnn/RangeTracker.hpp17
-rw-r--r--src/armnn/StaticRangeVisitor.hpp2
-rw-r--r--src/armnn/test/QuantizerTest.cpp92
-rw-r--r--src/armnnQuantizer/ArmNNQuantizerMain.cpp4
-rw-r--r--src/armnnQuantizer/QuantizationInput.hpp2
-rw-r--r--src/armnnUtils/TensorUtils.cpp28
-rw-r--r--src/armnnUtils/TensorUtils.hpp2
18 files changed, 797 insertions, 19 deletions
diff --git a/CMakeLists.txt b/CMakeLists.txt
index c54c3955f1..b6c977612c 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -61,6 +61,7 @@ list(APPEND armnnUtils_sources
)
add_library_ex(armnnUtils STATIC ${armnnUtils_sources})
+target_include_directories(armnnUtils PRIVATE src/backends)
if(BUILD_CAFFE_PARSER)
# ArmNN Parser source files required for all build options
@@ -203,7 +204,6 @@ list(APPEND armnn_sources
include/armnn/ILayerSupport.hpp
include/armnn/ILayerVisitor.hpp
include/armnn/INetwork.hpp
- include/armnn/INetworkQuantizer.hpp
include/armnn/IProfiler.hpp
include/armnn/IRuntime.hpp
include/armnn/LayerSupport.hpp
@@ -315,6 +315,8 @@ list(APPEND armnn_sources
src/armnn/CompatibleTypes.hpp
src/armnn/Descriptors.cpp
src/armnn/DeviceSpec.hpp
+ src/armnn/DynamicQuantizationVisitor.cpp
+ src/armnn/DynamicQuantizationVisitor.hpp
src/armnn/Exceptions.cpp
src/armnn/ExecutionFrame.cpp
src/armnn/ExecutionFrame.hpp
diff --git a/include/armnn/ILayerVisitor.hpp b/include/armnn/ILayerVisitor.hpp
index eabad58366..ab793bc587 100644
--- a/include/armnn/ILayerVisitor.hpp
+++ b/include/armnn/ILayerVisitor.hpp
@@ -347,6 +347,8 @@ public:
virtual void VisitSwitchLayer(const IConnectableLayer* layer,
const char* name = nullptr) = 0;
+ virtual void StartVisit() {}
+ virtual void FinishVisit() {}
};
} // namespace armnn
diff --git a/include/armnn/TypesUtils.hpp b/include/armnn/TypesUtils.hpp
index 837490d258..cb52471cd5 100644
--- a/include/armnn/TypesUtils.hpp
+++ b/include/armnn/TypesUtils.hpp
@@ -4,8 +4,8 @@
//
#pragma once
-#include "Tensor.hpp"
-#include "Types.hpp"
+#include <armnn/Tensor.hpp>
+#include <armnn/Types.hpp>
#include <cmath>
#include <ostream>
diff --git a/include/armnn/INetworkQuantizer.hpp b/include/armnnQuantizer/INetworkQuantizer.hpp
index 89548d1057..89548d1057 100644
--- a/include/armnn/INetworkQuantizer.hpp
+++ b/include/armnnQuantizer/INetworkQuantizer.hpp
diff --git a/src/armnn/DynamicQuantizationVisitor.cpp b/src/armnn/DynamicQuantizationVisitor.cpp
new file mode 100644
index 0000000000..9b33fb7642
--- /dev/null
+++ b/src/armnn/DynamicQuantizationVisitor.cpp
@@ -0,0 +1,330 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#include "DynamicQuantizationVisitor.hpp"
+#include "NetworkUtils.hpp"
+
+#include <boost/core/ignore_unused.hpp>
+#include <armnn/Descriptors.hpp>
+#include <armnn/Types.hpp>
+
+#include <limits>
+
+namespace armnn
+{
+
+DynamicQuantizationVisitor::DynamicQuantizationVisitor(RangeTracker& rangeTracker, Graph& graph)
+ : m_RangeTracker(rangeTracker),
+ m_Graph(graph)
+{}
+
+void DynamicQuantizationVisitor::SetRange(const IConnectableLayer* layer, unsigned int outputIdx, float min, float max)
+{
+ m_RangeTracker.SetRange(layer, outputIdx, min, max);
+}
+
+void DynamicQuantizationVisitor::ForwardParentParameters(const IConnectableLayer* layer)
+{
+ for (unsigned int i = 0; i < layer->GetNumInputSlots(); ++i)
+ {
+ const IOutputSlot *outputSlot = layer->GetInputSlot(i).GetConnection();
+ LayerGuid previousLayerId = outputSlot->GetOwningLayerGuid();
+ unsigned int ownerIndex = outputSlot->CalculateIndexOnOwner();
+ const auto parentRange = m_RangeTracker.GetRange(previousLayerId, ownerIndex);
+ SetRange(layer, i, parentRange.first, parentRange.second);
+ }
+}
+
+void DynamicQuantizationVisitor::AddToCalibratedLayers(const IConnectableLayer* layer)
+{
+ m_LayersToCalibrate.push_back(layer);
+}
+
+void DynamicQuantizationVisitor::AddToNonCalibratedLayers(const IConnectableLayer* layer)
+{
+ m_LayersNotToCalibrate.push_back(layer);
+}
+
+void DynamicQuantizationVisitor::FinishVisit()
+{
+ for (const IConnectableLayer* layer : m_LayersToCalibrate)
+ {
+ std::vector<DebugLayer*> newDebugLayers = InsertDebugLayerAfter(
+ m_Graph, *boost::polymorphic_downcast<Layer*>(const_cast<IConnectableLayer*>(layer)));
+ // record them so we can take them out again efficiently afterward
+ m_DebugLayers.insert(std::end(m_DebugLayers), std::begin(newDebugLayers), std::end(newDebugLayers));
+ }
+}
+
+void DynamicQuantizationVisitor::RemoveDebugLayers()
+{
+ for (DebugLayer* debugLayer : m_DebugLayers)
+ {
+ OutputSlot& proceedingOutputSlot = *debugLayer->GetInputSlot(0).GetConnectedOutputSlot();
+ InputSlot& succeedingInputSlot = *debugLayer->GetOutputSlot(0).GetConnection(0);
+ proceedingOutputSlot.Disconnect(debugLayer->GetInputSlot(0));
+ debugLayer->GetOutputSlot(0).Disconnect(succeedingInputSlot);
+
+ m_Graph.EraseLayer(debugLayer);
+
+ proceedingOutputSlot.Connect(succeedingInputSlot);
+ }
+ m_DebugLayers.clear();
+}
+
+void DynamicQuantizationVisitor::VisitNonCalibratedLayers() {
+ RemoveDebugLayers();
+ for (const IConnectableLayer* layer : m_LayersNotToCalibrate)
+ {
+ ForwardParentParameters(layer);
+ }
+}
+
+void DynamicQuantizationVisitor::VisitAdditionLayer(const IConnectableLayer* layer, const char* name)
+{
+ SetRange(layer, 0, -20.f, 20.f);
+ AddToCalibratedLayers(layer);
+}
+
+void DynamicQuantizationVisitor::VisitBatchNormalizationLayer(const IConnectableLayer* layer,
+ const BatchNormalizationDescriptor& desc,
+ const ConstTensor& mean,
+ const ConstTensor& variance,
+ const ConstTensor& beta,
+ const ConstTensor& gamma,
+ const char* name)
+{
+ boost::ignore_unused(desc);
+ boost::ignore_unused(mean);
+ boost::ignore_unused(variance);
+ boost::ignore_unused(beta);
+ boost::ignore_unused(gamma);
+ boost::ignore_unused(name);
+ SetRange(layer, 0, -15.0f, 15.0f);
+ AddToCalibratedLayers(layer);
+}
+
+void DynamicQuantizationVisitor::VisitConvolution2dLayer(const IConnectableLayer* layer,
+ const Convolution2dDescriptor& convolution2dDescriptor,
+ const ConstTensor& weights,
+ const Optional<ConstTensor>& biases,
+ const char* name)
+{
+ boost::ignore_unused(convolution2dDescriptor);
+ boost::ignore_unused(weights);
+ boost::ignore_unused(biases);
+ boost::ignore_unused(name);
+ SetRange(layer, 0, -15.0f, 15.0f);
+ AddToCalibratedLayers(layer);
+}
+
+void DynamicQuantizationVisitor::VisitDepthwiseConvolution2dLayer(const IConnectableLayer* layer,
+ const DepthwiseConvolution2dDescriptor& desc,
+ const ConstTensor& weights,
+ const Optional<ConstTensor>& biases,
+ const char* name)
+{
+ boost::ignore_unused(desc);
+ boost::ignore_unused(weights);
+ boost::ignore_unused(biases);
+ boost::ignore_unused(name);
+ SetRange(layer, 0, -15.0f, 15.0f);
+ AddToCalibratedLayers(layer);
+}
+
+void DynamicQuantizationVisitor::VisitActivationLayer(const IConnectableLayer* layer,
+ const ActivationDescriptor& activationDescriptor,
+ const char* name)
+{
+ switch (activationDescriptor.m_Function)
+ {
+ // Range is 0, 15 for Abs, Linear, ReLu and Soft ReLu
+ case ActivationFunction::Abs:
+ case ActivationFunction::Linear:
+ case ActivationFunction::ReLu:
+ case ActivationFunction::SoftReLu:
+ SetRange(layer, 0, 0.f, 15.f);
+ break;
+ case ActivationFunction::BoundedReLu:
+ SetRange(layer, 0, 0.f, activationDescriptor.m_A);
+ break;
+ case ActivationFunction::TanH:
+ SetRange(layer, 0, -1.f, 1.f);
+ break;
+ case ActivationFunction::LeakyReLu:
+ SetRange(layer, 0, -5.f, 15.f);
+ break;
+ default:
+ SetRange(layer, 0, -15.f, 15.f);
+ break;
+ }
+ AddToCalibratedLayers(layer);
+}
+
+void DynamicQuantizationVisitor::VisitFullyConnectedLayer(const IConnectableLayer *layer,
+ const FullyConnectedDescriptor& desc,
+ const ConstTensor& weights,
+ const Optional<ConstTensor>& biases,
+ const char *name)
+{
+ boost::ignore_unused(desc);
+ boost::ignore_unused(weights);
+ boost::ignore_unused(biases);
+ boost::ignore_unused(name);
+ SetRange(layer, 0, -15.0f, 15.0f);
+ AddToCalibratedLayers(layer);
+}
+
+void DynamicQuantizationVisitor::VisitPermuteLayer(const IConnectableLayer* layer,
+ const PermuteDescriptor& permuteDescriptor,
+ const char* name)
+{
+ boost::ignore_unused(permuteDescriptor);
+ boost::ignore_unused(name);
+ AddToNonCalibratedLayers(layer);
+}
+
+void DynamicQuantizationVisitor::VisitSpaceToBatchNdLayer(const IConnectableLayer* layer,
+ const SpaceToBatchNdDescriptor& spaceToBatchNdDescriptor,
+ const char* name)
+{
+ boost::ignore_unused(spaceToBatchNdDescriptor);
+ boost::ignore_unused(name);
+ AddToNonCalibratedLayers(layer);
+}
+
+void DynamicQuantizationVisitor::VisitPooling2dLayer(const IConnectableLayer* layer,
+ const Pooling2dDescriptor& pooling2dDescriptor,
+ const char* name)
+{
+ boost::ignore_unused(pooling2dDescriptor);
+ boost::ignore_unused(name);
+ AddToNonCalibratedLayers(layer);
+}
+
+void DynamicQuantizationVisitor::VisitSoftmaxLayer(const IConnectableLayer* layer,
+ const SoftmaxDescriptor& softmaxDescriptor,
+ const char* name)
+{
+ boost::ignore_unused(softmaxDescriptor);
+ boost::ignore_unused(name);
+ SetRange(layer, 0, 0.f, 1.f);
+ AddToCalibratedLayers(layer);
+}
+
+void DynamicQuantizationVisitor::VisitConstantLayer(const IConnectableLayer* layer,
+ const ConstTensor& input,
+ const char* name)
+{
+ boost::ignore_unused(name);
+
+ if (input.GetDataType() != DataType::Float32)
+ {
+ throw InvalidArgumentException("Quantization is supported only for FP32 tensors");
+ }
+
+ // Work out the range based on the input constants
+ unsigned int inputNumElements = input.GetNumElements();
+ const float* inputData = reinterpret_cast<const float*>(input.GetMemoryArea());
+
+ float min = std::numeric_limits<float>::max();
+ float max = std::numeric_limits<float>::lowest();
+
+ for (unsigned int i = 0; i < inputNumElements; i++)
+ {
+ const float inputValue = inputData[i];
+
+ min = std::min(min, inputValue);
+ max = std::max(max, inputValue);
+ }
+ SetRange(layer, 0, min, max);
+}
+
+void DynamicQuantizationVisitor::VisitMergerLayer(const IConnectableLayer* layer,
+ const OriginsDescriptor& mergerDescriptor,
+ const char* name)
+{
+ float min = std::numeric_limits<float>::max();
+ float max = std::numeric_limits<float>::lowest();
+ for (unsigned int i = 0; i < layer->GetNumInputSlots(); ++i)
+ {
+ const IOutputSlot* outputSlot = layer->GetInputSlot(i).GetConnection();
+ LayerGuid layerId = outputSlot->GetOwningLayerGuid();
+ unsigned int slotIndex = outputSlot->CalculateIndexOnOwner();
+ RangeTracker::MinMaxRange range = m_RangeTracker.GetRange(layerId, slotIndex);
+ min = std::min(min, range.first);
+ max = std::max(max, range.second);
+ }
+ SetRange(layer, 0, min, max);
+ AddToCalibratedLayers(layer);
+}
+
+void DynamicQuantizationVisitor::VisitReshapeLayer(const IConnectableLayer* layer,
+ const ReshapeDescriptor& reshapeDescriptor,
+ const char* name)
+{
+ boost::ignore_unused(reshapeDescriptor);
+ boost::ignore_unused(name);
+ AddToNonCalibratedLayers(layer);
+}
+
+void DynamicQuantizationVisitor::VisitSplitterLayer(const IConnectableLayer* layer,
+ const SplitterDescriptor& splitterDescriptor,
+ const char* name)
+{
+ boost::ignore_unused(splitterDescriptor);
+ boost::ignore_unused(name);
+ AddToNonCalibratedLayers(layer);
+}
+
+void DynamicQuantizationVisitor::VisitResizeBilinearLayer(const IConnectableLayer* layer,
+ const ResizeBilinearDescriptor& resizeDesc,
+ const char* name)
+{
+ boost::ignore_unused(resizeDesc);
+ boost::ignore_unused(name);
+ AddToNonCalibratedLayers(layer);
+}
+
+void DynamicQuantizationVisitor::VisitStridedSliceLayer(const IConnectableLayer* layer,
+ const StridedSliceDescriptor& stridedSliceDescriptor,
+ const char* name)
+{
+ boost::ignore_unused(stridedSliceDescriptor);
+ boost::ignore_unused(name);
+ AddToNonCalibratedLayers(layer);
+}
+
+void DynamicQuantizationVisitor::VisitBatchToSpaceNdLayer(const IConnectableLayer* layer,
+ const BatchToSpaceNdDescriptor& batchToSpaceNdDescriptor,
+ const char* name)
+{
+ boost::ignore_unused(batchToSpaceNdDescriptor);
+ boost::ignore_unused(name);
+ AddToNonCalibratedLayers(layer);
+}
+
+void DynamicQuantizationVisitor::VisitInputLayer(const IConnectableLayer* layer, LayerBindingId id, const char* name)
+{
+ boost::ignore_unused(id);
+ boost::ignore_unused(name);
+ SetRange(layer, 0, -0.0f, 0.0f);
+ AddToCalibratedLayers(layer);
+}
+
+void DynamicQuantizationVisitor::VisitOutputLayer(const IConnectableLayer* layer, LayerBindingId id, const char* name)
+{
+ boost::ignore_unused(id);
+ boost::ignore_unused(name);
+ AddToNonCalibratedLayers(layer);
+ m_OutputLayers.push_back(id);
+}
+
+const std::vector<LayerBindingId>& DynamicQuantizationVisitor::GetOutputLayers()
+{
+ return m_OutputLayers;
+}
+
+} //namespace armnn
diff --git a/src/armnn/DynamicQuantizationVisitor.hpp b/src/armnn/DynamicQuantizationVisitor.hpp
new file mode 100644
index 0000000000..6d430f1142
--- /dev/null
+++ b/src/armnn/DynamicQuantizationVisitor.hpp
@@ -0,0 +1,137 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#pragma once
+
+#include "armnn/LayerVisitorBase.hpp"
+#include "RangeTracker.hpp"
+#include "layers/DebugLayer.hpp"
+
+#include <armnn/INetwork.hpp>
+#include <armnnQuantizer/INetworkQuantizer.hpp>
+
+namespace armnn
+{
+
+/// Visitor class to establish min/max ranges based on the type of the layer
+class DynamicQuantizationVisitor : public LayerVisitorBase<VisitorNoThrowPolicy>
+{
+public:
+ DynamicQuantizationVisitor(RangeTracker& rangeTracker, Graph& graph);
+ ~DynamicQuantizationVisitor() = default;
+
+ /// Functions to set the Range on a per-layer-type basis
+ void VisitAdditionLayer(const IConnectableLayer* layer, const char* name = nullptr) override;
+
+ void VisitBatchNormalizationLayer(const IConnectableLayer* layer,
+ const BatchNormalizationDescriptor& desc,
+ const ConstTensor& mean,
+ const ConstTensor& variance,
+ const ConstTensor& beta,
+ const ConstTensor& gamma,
+ const char* name = nullptr) override;
+
+ void VisitConvolution2dLayer(const IConnectableLayer* layer,
+ const Convolution2dDescriptor& convolution2dDescriptor,
+ const ConstTensor& weights,
+ const Optional<ConstTensor>& biases,
+ const char* name = nullptr) override;
+
+ void VisitDepthwiseConvolution2dLayer(const IConnectableLayer* layer,
+ const DepthwiseConvolution2dDescriptor& desc,
+ const ConstTensor& weights,
+ const Optional<ConstTensor>& biases,
+ const char* name = nullptr) override;
+
+ void VisitActivationLayer(const IConnectableLayer* layer,
+ const ActivationDescriptor& activationDescriptor,
+ const char* name = nullptr) override;
+
+ void VisitFullyConnectedLayer(const IConnectableLayer *layer,
+ const FullyConnectedDescriptor& desc,
+ const ConstTensor& weights,
+ const Optional<ConstTensor>& biases,
+ const char *name) override;
+
+ void VisitPermuteLayer(const IConnectableLayer* layer,
+ const PermuteDescriptor& permuteDescriptor,
+ const char* name) override;
+
+ void VisitSpaceToBatchNdLayer(const IConnectableLayer* layer,
+ const SpaceToBatchNdDescriptor& spaceToBatchNdDescriptor,
+ const char* name = nullptr) override;
+
+ void VisitPooling2dLayer(const IConnectableLayer* layer,
+ const Pooling2dDescriptor& pooling2dDescriptor,
+ const char* name) override;
+
+ void VisitSoftmaxLayer(const IConnectableLayer* layer,
+ const SoftmaxDescriptor& softmaxDescriptor,
+ const char* name = nullptr) override;
+
+ void VisitConstantLayer(const IConnectableLayer* layer,
+ const ConstTensor& input,
+ const char* name = nullptr) override;
+
+ void VisitMergerLayer(const IConnectableLayer* layer,
+ const OriginsDescriptor& mergerDescriptor,
+ const char* name = nullptr) override;
+
+ void VisitReshapeLayer(const IConnectableLayer* layer,
+ const ReshapeDescriptor& reshapeDescriptor,
+ const char* name = nullptr) override;
+
+ void VisitSplitterLayer(const IConnectableLayer* layer,
+ const SplitterDescriptor& splitterDescriptor,
+ const char* name = nullptr) override;
+
+ void VisitResizeBilinearLayer(const IConnectableLayer* layer,
+ const ResizeBilinearDescriptor& resizeDesc,
+ const char* name = nullptr) override;
+
+ void VisitStridedSliceLayer(const IConnectableLayer* layer,
+ const StridedSliceDescriptor& stridedSliceDescriptor,
+ const char* name = nullptr) override;
+
+ void VisitBatchToSpaceNdLayer(const IConnectableLayer* layer,
+ const BatchToSpaceNdDescriptor& batchToSpaceNdDescriptor,
+ const char* name = nullptr) override;
+
+ void VisitInputLayer(const IConnectableLayer* layer,
+ LayerBindingId id,
+ const char* name = nullptr) override;
+
+ void VisitOutputLayer(const IConnectableLayer* layer,
+ LayerBindingId id,
+ const char* name = nullptr) override;
+
+ void FinishVisit() override;
+ void VisitNonCalibratedLayers();
+
+ const std::vector<armnn::LayerBindingId>& GetOutputLayers();
+
+private:
+ /// Set the range for an output slot on a layer
+ void SetRange(const IConnectableLayer* layer, unsigned int outputIdx, float min, float max);
+
+ void ForwardParentParameters(const IConnectableLayer* layer);
+
+ /// Mapping from a layer Guid to an array of ranges for outputs
+ RangeTracker& m_RangeTracker;
+
+ Graph& m_Graph;
+
+ std::vector<const IConnectableLayer*> m_LayersToCalibrate;
+ std::vector<const IConnectableLayer*> m_LayersNotToCalibrate;
+ std::vector<DebugLayer*> m_DebugLayers;
+
+ std::vector<armnn::LayerBindingId> m_OutputLayers;
+
+ void AddToCalibratedLayers(const IConnectableLayer* layer);
+ void AddToNonCalibratedLayers(const IConnectableLayer* layer);
+ void RemoveDebugLayers();
+};
+
+} //namespace armnn \ No newline at end of file
diff --git a/src/armnn/NetworkQuantizer.cpp b/src/armnn/NetworkQuantizer.cpp
index 4692a6803f..12e459d276 100644
--- a/src/armnn/NetworkQuantizer.cpp
+++ b/src/armnn/NetworkQuantizer.cpp
@@ -7,6 +7,8 @@
#include <armnn/INetwork.hpp>
#include <armnn/Tensor.hpp>
#include <armnn/Types.hpp>
+#include <TensorUtils.hpp>
+#include <TensorIOUtils.hpp>
#include "Graph.hpp"
#include "Layer.hpp"
@@ -14,6 +16,7 @@
#include "NetworkQuantizer.hpp"
#include "NetworkQuantizerUtils.hpp"
+#include "DynamicQuantizationVisitor.hpp"
#include "StaticRangeVisitor.hpp"
#include "QuantizerVisitor.hpp"
#include "OverrideInputRangeVisitor.hpp"
@@ -21,9 +24,15 @@
#include <vector>
#include <cmath>
+#include <boost/variant.hpp>
+
+
namespace armnn
{
+using TContainer = boost::variant<std::vector<float>, std::vector<int>, std::vector<unsigned char>>;
+
+
INetworkQuantizer* INetworkQuantizer::CreateRaw(INetwork* inputNetwork, const QuantizerOptions& options)
{
return new NetworkQuantizer(inputNetwork, options);
@@ -51,16 +60,102 @@ void NetworkQuantizer::OverrideInputRange(LayerBindingId layerId, float min, flo
void NetworkQuantizer::Refine(const InputTensors& inputTensors)
{
- //Implementation in a following commit
+ // The first time Refine is called the m_Runtime and the DynamicQuantizationVisitor
+ // will not have been created. Need to get the environment set up, Runtime loaded,
+ // DynamicQuantizationVisitor created and run over the network to initialise itself
+ // and the RangeTracker the Debug callback registered and an initial inference
+ // done to set up the first min/max values
+ if (!m_Runtime)
+ {
+ m_RefineCount = 0;
+ m_Ranges.SetDynamicMode(true);
+ const Graph& cGraph = boost::polymorphic_downcast<const Network*>(m_InputNetwork)->GetGraph().TopologicalSort();
+
+ // need to insert Debug layers in the DynamicQuantizationVisitor
+ Graph& graph = const_cast<Graph&>(cGraph);
+
+ // Initialize RangeTracker to the default values for each layer.
+ // The default values are overwritten by the min/max that is
+ // recorded during the first dataset min/max calibration. This
+ // initialisation is only required for the first call of Refine().
+ m_DynamicQuantizationVisitor = DynamicQuantizationVisitor(m_Ranges, graph);
+ VisitLayers(cGraph, m_DynamicQuantizationVisitor.value());
+
+ IRuntime::CreationOptions options;
+ m_Runtime = IRuntime::Create(options);
+
+ // Optimize network - debug already enabled for layers that require quantization
+ OptimizerOptions optimizerOptions(false, false);
+ std::vector<BackendId> backends = {"CpuRef"};
+ IOptimizedNetworkPtr optimizedNet = Optimize(*m_InputNetwork,
+ backends,
+ m_Runtime->GetDeviceSpec(),
+ optimizerOptions);
+
+ m_Runtime->LoadNetwork(m_NetworkId, std::move(optimizedNet));
+
+ // Debug callback function to refine min/max in RangeTracker
+ auto rangeTrackerCallback = [&](LayerGuid guid, unsigned int slotIndex, ITensorHandle *tensorHandle) {
+ // Get min/max pair from tensor data
+ std::pair<float, float> minMax = armnnUtils::FindMinMax(tensorHandle);
+
+ // For first calibration dataset, set min/max range in RangeTracker to
+ // min/max ranges gathered during inference
+ if (m_RefineCount == 0)
+ {
+ m_Ranges.ResetMinMax(guid, slotIndex, minMax.first, minMax.second);
+ }
+ else
+ {
+ // For every other calibration dataset, only set min/max range if the
+ // values gathered are less than / greater than originally recorded.
+ m_Ranges.RefineMin(guid, slotIndex, minMax.first);
+ m_Ranges.RefineMax(guid, slotIndex, minMax.second);
+ }
+ };
+
+ m_Runtime->RegisterDebugCallback(m_NetworkId, rangeTrackerCallback);
+ }
+
+ // Create output tensor for EnqueueWorkload
+ std::vector<armnn::BindingPointInfo> outputBindings;
+ auto outputLayers = m_DynamicQuantizationVisitor.value().GetOutputLayers();
+ std::vector<TContainer> outputVectors;
+ for (auto outputLayerBindingId : outputLayers)
+ {
+ auto outputTensorInfo = m_Runtime->GetOutputTensorInfo(m_NetworkId, outputLayerBindingId);
+ outputBindings.push_back(std::make_pair(outputLayerBindingId, outputTensorInfo));
+ outputVectors.push_back(std::vector<float>(outputTensorInfo.GetNumElements(), 0));
+ }
+ OutputTensors outputTensors = armnnUtils::MakeOutputTensors<TContainer>(outputBindings, outputVectors);
+
+ // Execute EnqueueWorkload with calibration image
+ m_Runtime->EnqueueWorkload(m_NetworkId, inputTensors, outputTensors);
+ ++m_RefineCount;
}
INetworkPtr NetworkQuantizer::ExportNetwork()
{
const Graph& graph = boost::polymorphic_downcast<const Network*>(m_InputNetwork)->GetGraph().TopologicalSort();
- // Step 1) Walk the graph and register min/max values for intermediate tensors
- StaticRangeVisitor rangeVisitor(m_Ranges);
- VisitLayers(graph, rangeVisitor);
+ // Step 1) Walk the graph and populate default min/max values for
+ // intermediate tensors, only if Runtime does not exist (created
+ // if Refine has been called)
+ if (!m_Runtime)
+ {
+ m_Ranges.SetDynamicMode(false);
+ StaticRangeVisitor rangeVisitor(m_Ranges);
+ VisitLayers(graph, rangeVisitor);
+ }
+ else
+ {
+ // Set min/max range of non-calibrated layers to parent layer's range
+ m_DynamicQuantizationVisitor.value().VisitNonCalibratedLayers();
+ // now tear down the runtime and the dynamic visitor.
+ m_Runtime.reset(nullptr);
+ m_DynamicQuantizationVisitor = EmptyOptional();
+ m_RefineCount = 0;
+ }
// Step 2) Convert input InputNetwork to Quantized InputNetwork
std::unique_ptr<IQuantizationScheme> quantizationScheme;
@@ -79,6 +174,9 @@ INetworkPtr NetworkQuantizer::ExportNetwork()
QuantizerVisitor quantizerVisitor(m_Ranges, quantizationScheme.get());
VisitLayers(graph, quantizerVisitor);
+ // clear the ranges
+ m_Ranges.Reset();
+
return quantizerVisitor.RetrieveFinalNetwork();
}
diff --git a/src/armnn/NetworkQuantizer.hpp b/src/armnn/NetworkQuantizer.hpp
index 4f6359f36d..d384bdc545 100644
--- a/src/armnn/NetworkQuantizer.hpp
+++ b/src/armnn/NetworkQuantizer.hpp
@@ -6,9 +6,12 @@
#pragma once
#include <armnn/INetwork.hpp>
-#include <armnn/INetworkQuantizer.hpp>
+#include <armnnQuantizer/INetworkQuantizer.hpp>
+#include <armnn/IRuntime.hpp>
#include <armnn/Types.hpp>
+#include <armnn/Optional.hpp>
+#include "DynamicQuantizationVisitor.hpp"
#include "RangeTracker.hpp"
namespace armnn
@@ -18,21 +21,41 @@ class NetworkQuantizer : public INetworkQuantizer
{
public:
NetworkQuantizer(INetwork* inputNetwork, const QuantizerOptions& options)
- : m_InputNetwork(inputNetwork), m_Options(options) {}
+ : m_InputNetwork(inputNetwork),
+ m_NetworkId(0),
+ m_Runtime(nullptr, &IRuntime::Destroy),
+ m_RefineCount(0),
+ m_Options(options) {}
void OverrideInputRange(LayerBindingId layerId, float min, float max) override;
void Refine(const InputTensors& inputTensors) override;
+
+ // Required for testing? Need some way to get min/max in RangeTracker (m_Ranges)
+ std::pair<float, float> GetMinMaxRange(LayerGuid guid, unsigned int idx) { return m_Ranges.GetRange(guid, idx); }
INetworkPtr ExportNetwork() override;
private:
/// Original input network to quantize
INetwork* m_InputNetwork;
+ NetworkId m_NetworkId;
+
+ // if we are run in dynamic mode this unique pointer will hold
+ // the runtime between invocations of the Refine method.
+ IRuntimePtr m_Runtime;
+
+ Optional<DynamicQuantizationVisitor> m_DynamicQuantizationVisitor;
+
+ // counts the number of times refine is called
+ unsigned int m_RefineCount;
+
/// Mapping from Guid to an array of ranges for outputs
RangeTracker m_Ranges;
/// Options for the NetworkQuantizer
QuantizerOptions m_Options;
+
+ std::pair<float, float> FindMinMax(ITensorHandle* tensorHandle);
};
} //namespace armnn
diff --git a/src/armnn/NetworkQuantizerUtils.hpp b/src/armnn/NetworkQuantizerUtils.hpp
index 26f67f95b2..6176a9c794 100644
--- a/src/armnn/NetworkQuantizerUtils.hpp
+++ b/src/armnn/NetworkQuantizerUtils.hpp
@@ -49,10 +49,12 @@ ConstTensor CreateQuantizedConst(const ConstTensor& tensor, std::vector<uint8_t>
template <typename LayerContainer>
void VisitLayers(const LayerContainer& layerContainer, ILayerVisitor& visitor)
{
+ visitor.StartVisit();
for (auto layer : layerContainer)
{
layer->Accept(visitor);
}
+ visitor.FinishVisit();
}
} // namespace armnn
diff --git a/src/armnn/QuantizerVisitor.hpp b/src/armnn/QuantizerVisitor.hpp
index 1751229435..eb9ebac3d9 100644
--- a/src/armnn/QuantizerVisitor.hpp
+++ b/src/armnn/QuantizerVisitor.hpp
@@ -11,7 +11,7 @@
#include <armnn/INetwork.hpp>
#include <armnn/Types.hpp>
-#include <armnn/INetworkQuantizer.hpp>
+#include <armnnQuantizer/INetworkQuantizer.hpp>
#include <unordered_map>
diff --git a/src/armnn/RangeTracker.cpp b/src/armnn/RangeTracker.cpp
index 202510346e..ae756fbb9c 100644
--- a/src/armnn/RangeTracker.cpp
+++ b/src/armnn/RangeTracker.cpp
@@ -4,17 +4,24 @@
//
#include "RangeTracker.hpp"
+#include "InternalTypes.hpp"
namespace armnn
{
-void RangeTracker::SetRange(const armnn::IConnectableLayer *layer, unsigned int outputIdx, float min, float max)
+void RangeTracker::SetRange(const armnn::IConnectableLayer* layer, unsigned int outputIdx, float min, float max)
{
auto& ranges = m_GuidToRangesMap[layer->GetGuid()];
- if (ranges.size() < layer->GetNumOutputSlots())
+ unsigned int numOfOutputSlots = layer->GetNumOutputSlots();
+ // output layers are a special case
+ if (numOfOutputSlots == 0)
{
- ranges.resize(layer->GetNumOutputSlots());
+ ++numOfOutputSlots;
+ }
+ if (ranges.size() < numOfOutputSlots)
+ {
+ ranges.resize(numOfOutputSlots);
}
ranges[outputIdx] = std::make_pair(min, max);
}
@@ -24,9 +31,49 @@ RangeTracker::MinMaxRange RangeTracker::GetRange(armnn::LayerGuid guid, unsigned
auto search = m_GuidToRangesMap.find(guid);
if (search == m_GuidToRangesMap.end())
{
- return DefaultRange();
+ if (IsInDynamicMode())
+ {
+ throw armnn::Exception("Have no entry for layer GUID [" + std::to_string(guid) + "]");
+ }
+ else
+ {
+ return DefaultRange();
+ }
}
return search->second.at(idx);
}
+void RangeTracker::RefineMin(LayerGuid guid, unsigned int idx, float newMin)
+{
+ auto& currentMin = m_GuidToRangesMap.find(guid)->second.at(idx).first;
+ if (newMin < currentMin)
+ {
+ currentMin = newMin;
+ }
+}
+
+void RangeTracker::RefineMax(LayerGuid guid, unsigned int idx, float newMax)
+{
+ auto& currentMax = m_GuidToRangesMap.find(guid)->second.at(idx).second;
+ if (newMax > currentMax)
+ {
+ currentMax = newMax;
+ }
+}
+
+void RangeTracker::ResetMinMax(LayerGuid guid, unsigned int idx, float newMin, float newMax)
+{
+ auto minMaxPair = m_GuidToRangesMap.find(guid);
+ auto& currentMin = minMaxPair->second.at(idx).first;
+ auto& currentMax = minMaxPair->second.at(idx).second;
+
+ currentMin = newMin;
+ currentMax = newMax;
+}
+
+void RangeTracker::Reset()
+{
+ m_GuidToRangesMap.clear();
+}
+
} //namespace armnn \ No newline at end of file
diff --git a/src/armnn/RangeTracker.hpp b/src/armnn/RangeTracker.hpp
index 2e8b33ad0f..6662775e38 100644
--- a/src/armnn/RangeTracker.hpp
+++ b/src/armnn/RangeTracker.hpp
@@ -31,6 +31,21 @@ public:
/// Query that there is an entry for a layer
bool HasRanges(LayerGuid guid) const { return m_GuidToRangesMap.find(guid) != m_GuidToRangesMap.end(); }
+ /// Update min in RangeTracker with new_min if it is lower than current value
+ void RefineMin(LayerGuid guid, unsigned int slotIndex, float newMin);
+
+ /// Update max in RangeTracker with new_max if it is greater than current value
+ void RefineMax(LayerGuid guid, unsigned int slotIndex, float newMax);
+
+ /// Overwrite min and max in RangeTracker with newMin and newMax
+ void ResetMinMax(LayerGuid guid, unsigned int idx, float newMin, float newMax);
+
+ void Reset();
+
+ void SetDynamicMode(bool flag) { m_DynamicMode = flag; }
+
+ bool IsInDynamicMode() const { return m_DynamicMode; }
+
private:
using MinMaxRanges = std::vector<MinMaxRange>;
@@ -39,6 +54,8 @@ private:
/// Mapping from a layer Guid to an array of ranges for outputs
std::unordered_map<LayerGuid, MinMaxRanges> m_GuidToRangesMap;
+
+ bool m_DynamicMode = false;
};
} //namespace armnn \ No newline at end of file
diff --git a/src/armnn/StaticRangeVisitor.hpp b/src/armnn/StaticRangeVisitor.hpp
index 5a16e184d6..8f2e698a7e 100644
--- a/src/armnn/StaticRangeVisitor.hpp
+++ b/src/armnn/StaticRangeVisitor.hpp
@@ -9,7 +9,7 @@
#include "RangeTracker.hpp"
#include <armnn/INetwork.hpp>
-#include <armnn/INetworkQuantizer.hpp>
+#include <armnnQuantizer/INetworkQuantizer.hpp>
namespace armnn
diff --git a/src/armnn/test/QuantizerTest.cpp b/src/armnn/test/QuantizerTest.cpp
index 372d0ca002..259e90fcca 100644
--- a/src/armnn/test/QuantizerTest.cpp
+++ b/src/armnn/test/QuantizerTest.cpp
@@ -5,7 +5,7 @@
#include <armnn/INetwork.hpp>
#include <armnn/Tensor.hpp>
-#include <armnn/INetworkQuantizer.hpp>
+#include <armnnQuantizer/INetworkQuantizer.hpp>
#include <armnn/Types.hpp>
#include "armnn/LayerVisitorBase.hpp"
@@ -15,6 +15,7 @@
#include "../OverrideInputRangeVisitor.hpp"
#include "../RangeTracker.hpp"
#include "../backends/backendsCommon/test/QuantizeHelper.hpp"
+#include "../../armnnQuantizer/CommandLineProcessor.hpp"
#include <boost/test/unit_test.hpp>
@@ -207,6 +208,95 @@ INetworkPtr CreateNetworkWithActivationLayer(const ActivationDescriptor& descrip
return network;
}
+INetworkPtr CreateNetworkWithInputOutputLayers()
+{
+ INetworkPtr network = INetwork::Create();
+
+ // Add input/output layers
+ IConnectableLayer* inputLayer = network->AddInputLayer(0);
+ IConnectableLayer* output = network->AddOutputLayer(1);
+
+ // Establish connections
+ inputLayer->GetOutputSlot(0).Connect(output->GetInputSlot(0));
+
+ // Set TensorInfo
+ TensorShape shape{8U};
+ TensorInfo info(shape, DataType::Float32);
+ inputLayer->GetOutputSlot(0).SetTensorInfo(info);
+
+ return network;
+}
+
+TensorInfo GetInputTensorInfo(const Network* network)
+{
+ for (auto&& inputLayer : network->GetGraph().GetInputLayers())
+ {
+ BOOST_ASSERT_MSG(inputLayer->GetNumOutputSlots() == 1, "Input layer should have exactly 1 output slot");
+ return inputLayer->GetOutputSlot(0).GetTensorInfo();
+ }
+ throw InvalidArgumentException("Network has no input layers");
+}
+
+BOOST_AUTO_TEST_CASE(InputOutputLayerDynamicQuant)
+{
+ INetworkPtr network = CreateNetworkWithInputOutputLayers();
+
+ armnn::TensorInfo tensorInfo = GetInputTensorInfo(boost::polymorphic_downcast<const Network*>(network.get()));
+
+ // Outliers -56 and 98
+ std::vector<float> inputData({0, 0, 0, -56, 98, 0, 0, 0});
+ armnn::ConstTensor inputTensor(tensorInfo, inputData.data());
+
+ InputTensors inputTensors;
+ inputTensors.push_back(std::make_pair(0, inputTensor));
+
+ armnn::INetworkQuantizerPtr quantizer = armnn::INetworkQuantizer::Create(network.get());
+
+ quantizer->Refine(inputTensors);
+
+ // Outliers -77 and 65
+ std::vector<float> inputData2({0, -77, 0, -56, 65, 0, 0, 0});
+ armnn::ConstTensor inputTensor2(tensorInfo, inputData2.data());
+ InputTensors inputTensors2;
+ inputTensors2.push_back(std::make_pair(0, inputTensor2));
+
+ quantizer->Refine(inputTensors2);
+
+ INetworkPtr quantizedNetwork = quantizer->ExportNetwork();
+ // Output Layer should be quantized for a min max of -77 and 98
+ // according to QAsymm8 Quantization Scheme
+ std::unique_ptr<IQuantizationScheme> quantizationScheme = std::make_unique<QAsymm8QuantizationScheme>();
+ OffsetScalePair qParams = quantizationScheme->ComputeScheme(-77.0, 98.0);
+
+ class TestOutputLayerVisitor : public LayerVisitorBase<VisitorNoThrowPolicy>
+ {
+ public:
+ TestOutputLayerVisitor(const OffsetScalePair& offsetScalePair, const DataType& dataType) :
+ m_OffsetScalePair(offsetScalePair), m_DataType(dataType) {}
+
+ void VisitOutputLayer(const IConnectableLayer* layer,
+ LayerBindingId id,
+ const char* name = nullptr) override
+ {
+ const TensorInfo& info = layer->GetInputSlot(0).GetConnection()->GetTensorInfo();
+ BOOST_CHECK_MESSAGE(info.GetDataType() == m_DataType,
+ std::string(armnn::GetDataTypeName(info.GetDataType()))
+ .append(" == ").append(armnn::GetDataTypeName(m_DataType)));
+ // int_32t
+ BOOST_CHECK(info.GetQuantizationOffset() == m_OffsetScalePair.second);
+ // float
+ BOOST_TEST(info.GetQuantizationScale() == m_OffsetScalePair.first, boost::test_tools::tolerance(0.001));
+ }
+
+ private:
+ const OffsetScalePair m_OffsetScalePair;
+ const DataType m_DataType;
+ };
+
+ TestOutputLayerVisitor visitor(qParams, quantizationScheme->GetDataType());
+ quantizedNetwork->Accept(visitor);
+}
+
BOOST_AUTO_TEST_CASE(QuantizeAbsActivation)
{
ActivationDescriptor descriptor;
diff --git a/src/armnnQuantizer/ArmNNQuantizerMain.cpp b/src/armnnQuantizer/ArmNNQuantizerMain.cpp
index 103597a72d..96d6515ba0 100644
--- a/src/armnnQuantizer/ArmNNQuantizerMain.cpp
+++ b/src/armnnQuantizer/ArmNNQuantizerMain.cpp
@@ -5,7 +5,7 @@
#include "CommandLineProcessor.hpp"
#include <armnnDeserializer/IDeserializer.hpp>
-#include <armnn/INetworkQuantizer.hpp>
+#include <armnnQuantizer/INetworkQuantizer.hpp>
#include <armnnSerializer/ISerializer.hpp>
#include "QuantizationDataSet.hpp"
#include "QuantizationInput.hpp"
@@ -52,7 +52,7 @@ int main(int argc, char* argv[])
armnnQuantizer::InputLayerVisitor inputLayerVisitor;
network->Accept(inputLayerVisitor);
- for(armnnQuantizer::QuantizationInput quantizationInput : dataSet)
+ for (armnnQuantizer::QuantizationInput quantizationInput : dataSet)
{
armnn::InputTensors inputTensors;
std::vector<std::vector<float>> inputData(quantizationInput.GetNumberOfInputs());
diff --git a/src/armnnQuantizer/QuantizationInput.hpp b/src/armnnQuantizer/QuantizationInput.hpp
index ebabdd704f..1bfe84bc19 100644
--- a/src/armnnQuantizer/QuantizationInput.hpp
+++ b/src/armnnQuantizer/QuantizationInput.hpp
@@ -7,7 +7,7 @@
#include <map>
#include <armnn/Types.hpp>
-#include <armnn/INetworkQuantizer.hpp>
+#include <armnnQuantizer/INetworkQuantizer.hpp>
namespace armnnQuantizer
{
diff --git a/src/armnnUtils/TensorUtils.cpp b/src/armnnUtils/TensorUtils.cpp
index 57f823fe13..c2fbbe0bcc 100644
--- a/src/armnnUtils/TensorUtils.cpp
+++ b/src/armnnUtils/TensorUtils.cpp
@@ -4,6 +4,7 @@
//
#include "TensorUtils.hpp"
+#include <backendsCommon/ITensorHandle.hpp>
namespace armnnUtils
{
@@ -47,4 +48,31 @@ armnn::TensorInfo GetTensorInfo(unsigned int numberOfBatches,
}
}
+std::pair<float, float> FindMinMax(armnn::ITensorHandle* tensorHandle)
+{
+ auto tensor_data = static_cast<const float *>(tensorHandle->Map(true));
+ auto tensor_size = tensorHandle->GetShape().GetNumElements();
+
+ // Set min/max initially to first value in tensor
+ float min = tensor_data[0];
+ float max = tensor_data[0];
+
+ // Loop over rest of tensor and update min/max if necessary
+ for (unsigned int val = 1; val < tensor_size; val++)
+ {
+ if (tensor_data[val] < min)
+ {
+ min = tensor_data[val];
+ }
+ else if (tensor_data[val] > max)
+ {
+ max = tensor_data[val];
+ }
+ }
+
+ tensorHandle->Unmap();
+
+ return std::make_pair(min, max);
+}
+
}
diff --git a/src/armnnUtils/TensorUtils.hpp b/src/armnnUtils/TensorUtils.hpp
index fb5e6eb10d..c273b497b3 100644
--- a/src/armnnUtils/TensorUtils.hpp
+++ b/src/armnnUtils/TensorUtils.hpp
@@ -22,4 +22,6 @@ armnn::TensorInfo GetTensorInfo(unsigned int numberOfBatches,
const armnn::DataLayout dataLayout,
const armnn::DataType dataType);
+std::pair<float, float> FindMinMax(armnn::ITensorHandle* tensorHandle);
+
} // namespace armnnUtils