From f92dfced4498f12b9315c0fa377ba7be8998b607 Mon Sep 17 00:00:00 2001 From: Jim Flynn Date: Thu, 2 May 2019 11:33:25 +0100 Subject: IVGCVSW-2833 Add Dynamic Quantization Change-Id: Iba91e3f3625639f01d66f81a9f3e419e0e285d66 Signed-off-by: Jim Flynn --- CMakeLists.txt | 4 +- include/armnn/ILayerVisitor.hpp | 2 + include/armnn/INetworkQuantizer.hpp | 51 ----- include/armnn/TypesUtils.hpp | 4 +- include/armnnQuantizer/INetworkQuantizer.hpp | 51 +++++ src/armnn/DynamicQuantizationVisitor.cpp | 330 +++++++++++++++++++++++++++ src/armnn/DynamicQuantizationVisitor.hpp | 137 +++++++++++ src/armnn/NetworkQuantizer.cpp | 106 ++++++++- src/armnn/NetworkQuantizer.hpp | 27 ++- src/armnn/NetworkQuantizerUtils.hpp | 2 + src/armnn/QuantizerVisitor.hpp | 2 +- src/armnn/RangeTracker.cpp | 55 ++++- src/armnn/RangeTracker.hpp | 17 ++ src/armnn/StaticRangeVisitor.hpp | 2 +- src/armnn/test/QuantizerTest.cpp | 92 +++++++- src/armnnQuantizer/ArmNNQuantizerMain.cpp | 4 +- src/armnnQuantizer/QuantizationInput.hpp | 2 +- src/armnnUtils/TensorUtils.cpp | 28 +++ src/armnnUtils/TensorUtils.hpp | 2 + 19 files changed, 848 insertions(+), 70 deletions(-) delete mode 100644 include/armnn/INetworkQuantizer.hpp create mode 100644 include/armnnQuantizer/INetworkQuantizer.hpp create mode 100644 src/armnn/DynamicQuantizationVisitor.cpp create mode 100644 src/armnn/DynamicQuantizationVisitor.hpp diff --git a/CMakeLists.txt b/CMakeLists.txt index c54c3955f1..b6c977612c 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -61,6 +61,7 @@ list(APPEND armnnUtils_sources ) add_library_ex(armnnUtils STATIC ${armnnUtils_sources}) +target_include_directories(armnnUtils PRIVATE src/backends) if(BUILD_CAFFE_PARSER) # ArmNN Parser source files required for all build options @@ -203,7 +204,6 @@ list(APPEND armnn_sources include/armnn/ILayerSupport.hpp include/armnn/ILayerVisitor.hpp include/armnn/INetwork.hpp - include/armnn/INetworkQuantizer.hpp include/armnn/IProfiler.hpp include/armnn/IRuntime.hpp include/armnn/LayerSupport.hpp @@ -315,6 +315,8 @@ list(APPEND armnn_sources src/armnn/CompatibleTypes.hpp src/armnn/Descriptors.cpp src/armnn/DeviceSpec.hpp + src/armnn/DynamicQuantizationVisitor.cpp + src/armnn/DynamicQuantizationVisitor.hpp src/armnn/Exceptions.cpp src/armnn/ExecutionFrame.cpp src/armnn/ExecutionFrame.hpp diff --git a/include/armnn/ILayerVisitor.hpp b/include/armnn/ILayerVisitor.hpp index eabad58366..ab793bc587 100644 --- a/include/armnn/ILayerVisitor.hpp +++ b/include/armnn/ILayerVisitor.hpp @@ -347,6 +347,8 @@ public: virtual void VisitSwitchLayer(const IConnectableLayer* layer, const char* name = nullptr) = 0; + virtual void StartVisit() {} + virtual void FinishVisit() {} }; } // namespace armnn diff --git a/include/armnn/INetworkQuantizer.hpp b/include/armnn/INetworkQuantizer.hpp deleted file mode 100644 index 89548d1057..0000000000 --- a/include/armnn/INetworkQuantizer.hpp +++ /dev/null @@ -1,51 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#pragma once - -#include -#include -#include - -namespace armnn -{ - -struct QuantizerOptions -{ - QuantizerOptions() : m_ActivationFormat(DataType::QuantisedAsymm8) {} - QuantizerOptions(DataType activationFormat) : m_ActivationFormat(activationFormat) {} - - DataType m_ActivationFormat; -}; - -using INetworkQuantizerPtr = std::unique_ptr; - -/// Quantizer class Quantizes a float32 InputNetwork -class INetworkQuantizer -{ -public: - /// Create Quantizer object and return raw pointer - static INetworkQuantizer* CreateRaw(INetwork* inputNetwork, const QuantizerOptions& options = QuantizerOptions()); - - /// Create Quantizer object wrapped in unique_ptr - static INetworkQuantizerPtr Create(INetwork* inputNetwork, const QuantizerOptions& options = QuantizerOptions()); - - /// Destroy Quantizer object - static void Destroy(INetworkQuantizer* quantizer); - - /// Overrides the default quantization values for the input layer with the given id - virtual void OverrideInputRange(LayerBindingId layerId, float min, float max) = 0; - - /// Refine input network with a set of refinement data for specified LayerBindingId - virtual void Refine(const InputTensors& inputTensors) = 0; - - /// Extract final quantized network - virtual INetworkPtr ExportNetwork() = 0; - -protected: - virtual ~INetworkQuantizer() {} -}; - -} //namespace armnn diff --git a/include/armnn/TypesUtils.hpp b/include/armnn/TypesUtils.hpp index 837490d258..cb52471cd5 100644 --- a/include/armnn/TypesUtils.hpp +++ b/include/armnn/TypesUtils.hpp @@ -4,8 +4,8 @@ // #pragma once -#include "Tensor.hpp" -#include "Types.hpp" +#include +#include #include #include diff --git a/include/armnnQuantizer/INetworkQuantizer.hpp b/include/armnnQuantizer/INetworkQuantizer.hpp new file mode 100644 index 0000000000..89548d1057 --- /dev/null +++ b/include/armnnQuantizer/INetworkQuantizer.hpp @@ -0,0 +1,51 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include +#include +#include + +namespace armnn +{ + +struct QuantizerOptions +{ + QuantizerOptions() : m_ActivationFormat(DataType::QuantisedAsymm8) {} + QuantizerOptions(DataType activationFormat) : m_ActivationFormat(activationFormat) {} + + DataType m_ActivationFormat; +}; + +using INetworkQuantizerPtr = std::unique_ptr; + +/// Quantizer class Quantizes a float32 InputNetwork +class INetworkQuantizer +{ +public: + /// Create Quantizer object and return raw pointer + static INetworkQuantizer* CreateRaw(INetwork* inputNetwork, const QuantizerOptions& options = QuantizerOptions()); + + /// Create Quantizer object wrapped in unique_ptr + static INetworkQuantizerPtr Create(INetwork* inputNetwork, const QuantizerOptions& options = QuantizerOptions()); + + /// Destroy Quantizer object + static void Destroy(INetworkQuantizer* quantizer); + + /// Overrides the default quantization values for the input layer with the given id + virtual void OverrideInputRange(LayerBindingId layerId, float min, float max) = 0; + + /// Refine input network with a set of refinement data for specified LayerBindingId + virtual void Refine(const InputTensors& inputTensors) = 0; + + /// Extract final quantized network + virtual INetworkPtr ExportNetwork() = 0; + +protected: + virtual ~INetworkQuantizer() {} +}; + +} //namespace armnn diff --git a/src/armnn/DynamicQuantizationVisitor.cpp b/src/armnn/DynamicQuantizationVisitor.cpp new file mode 100644 index 0000000000..9b33fb7642 --- /dev/null +++ b/src/armnn/DynamicQuantizationVisitor.cpp @@ -0,0 +1,330 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "DynamicQuantizationVisitor.hpp" +#include "NetworkUtils.hpp" + +#include +#include +#include + +#include + +namespace armnn +{ + +DynamicQuantizationVisitor::DynamicQuantizationVisitor(RangeTracker& rangeTracker, Graph& graph) + : m_RangeTracker(rangeTracker), + m_Graph(graph) +{} + +void DynamicQuantizationVisitor::SetRange(const IConnectableLayer* layer, unsigned int outputIdx, float min, float max) +{ + m_RangeTracker.SetRange(layer, outputIdx, min, max); +} + +void DynamicQuantizationVisitor::ForwardParentParameters(const IConnectableLayer* layer) +{ + for (unsigned int i = 0; i < layer->GetNumInputSlots(); ++i) + { + const IOutputSlot *outputSlot = layer->GetInputSlot(i).GetConnection(); + LayerGuid previousLayerId = outputSlot->GetOwningLayerGuid(); + unsigned int ownerIndex = outputSlot->CalculateIndexOnOwner(); + const auto parentRange = m_RangeTracker.GetRange(previousLayerId, ownerIndex); + SetRange(layer, i, parentRange.first, parentRange.second); + } +} + +void DynamicQuantizationVisitor::AddToCalibratedLayers(const IConnectableLayer* layer) +{ + m_LayersToCalibrate.push_back(layer); +} + +void DynamicQuantizationVisitor::AddToNonCalibratedLayers(const IConnectableLayer* layer) +{ + m_LayersNotToCalibrate.push_back(layer); +} + +void DynamicQuantizationVisitor::FinishVisit() +{ + for (const IConnectableLayer* layer : m_LayersToCalibrate) + { + std::vector newDebugLayers = InsertDebugLayerAfter( + m_Graph, *boost::polymorphic_downcast(const_cast(layer))); + // record them so we can take them out again efficiently afterward + m_DebugLayers.insert(std::end(m_DebugLayers), std::begin(newDebugLayers), std::end(newDebugLayers)); + } +} + +void DynamicQuantizationVisitor::RemoveDebugLayers() +{ + for (DebugLayer* debugLayer : m_DebugLayers) + { + OutputSlot& proceedingOutputSlot = *debugLayer->GetInputSlot(0).GetConnectedOutputSlot(); + InputSlot& succeedingInputSlot = *debugLayer->GetOutputSlot(0).GetConnection(0); + proceedingOutputSlot.Disconnect(debugLayer->GetInputSlot(0)); + debugLayer->GetOutputSlot(0).Disconnect(succeedingInputSlot); + + m_Graph.EraseLayer(debugLayer); + + proceedingOutputSlot.Connect(succeedingInputSlot); + } + m_DebugLayers.clear(); +} + +void DynamicQuantizationVisitor::VisitNonCalibratedLayers() { + RemoveDebugLayers(); + for (const IConnectableLayer* layer : m_LayersNotToCalibrate) + { + ForwardParentParameters(layer); + } +} + +void DynamicQuantizationVisitor::VisitAdditionLayer(const IConnectableLayer* layer, const char* name) +{ + SetRange(layer, 0, -20.f, 20.f); + AddToCalibratedLayers(layer); +} + +void DynamicQuantizationVisitor::VisitBatchNormalizationLayer(const IConnectableLayer* layer, + const BatchNormalizationDescriptor& desc, + const ConstTensor& mean, + const ConstTensor& variance, + const ConstTensor& beta, + const ConstTensor& gamma, + const char* name) +{ + boost::ignore_unused(desc); + boost::ignore_unused(mean); + boost::ignore_unused(variance); + boost::ignore_unused(beta); + boost::ignore_unused(gamma); + boost::ignore_unused(name); + SetRange(layer, 0, -15.0f, 15.0f); + AddToCalibratedLayers(layer); +} + +void DynamicQuantizationVisitor::VisitConvolution2dLayer(const IConnectableLayer* layer, + const Convolution2dDescriptor& convolution2dDescriptor, + const ConstTensor& weights, + const Optional& biases, + const char* name) +{ + boost::ignore_unused(convolution2dDescriptor); + boost::ignore_unused(weights); + boost::ignore_unused(biases); + boost::ignore_unused(name); + SetRange(layer, 0, -15.0f, 15.0f); + AddToCalibratedLayers(layer); +} + +void DynamicQuantizationVisitor::VisitDepthwiseConvolution2dLayer(const IConnectableLayer* layer, + const DepthwiseConvolution2dDescriptor& desc, + const ConstTensor& weights, + const Optional& biases, + const char* name) +{ + boost::ignore_unused(desc); + boost::ignore_unused(weights); + boost::ignore_unused(biases); + boost::ignore_unused(name); + SetRange(layer, 0, -15.0f, 15.0f); + AddToCalibratedLayers(layer); +} + +void DynamicQuantizationVisitor::VisitActivationLayer(const IConnectableLayer* layer, + const ActivationDescriptor& activationDescriptor, + const char* name) +{ + switch (activationDescriptor.m_Function) + { + // Range is 0, 15 for Abs, Linear, ReLu and Soft ReLu + case ActivationFunction::Abs: + case ActivationFunction::Linear: + case ActivationFunction::ReLu: + case ActivationFunction::SoftReLu: + SetRange(layer, 0, 0.f, 15.f); + break; + case ActivationFunction::BoundedReLu: + SetRange(layer, 0, 0.f, activationDescriptor.m_A); + break; + case ActivationFunction::TanH: + SetRange(layer, 0, -1.f, 1.f); + break; + case ActivationFunction::LeakyReLu: + SetRange(layer, 0, -5.f, 15.f); + break; + default: + SetRange(layer, 0, -15.f, 15.f); + break; + } + AddToCalibratedLayers(layer); +} + +void DynamicQuantizationVisitor::VisitFullyConnectedLayer(const IConnectableLayer *layer, + const FullyConnectedDescriptor& desc, + const ConstTensor& weights, + const Optional& biases, + const char *name) +{ + boost::ignore_unused(desc); + boost::ignore_unused(weights); + boost::ignore_unused(biases); + boost::ignore_unused(name); + SetRange(layer, 0, -15.0f, 15.0f); + AddToCalibratedLayers(layer); +} + +void DynamicQuantizationVisitor::VisitPermuteLayer(const IConnectableLayer* layer, + const PermuteDescriptor& permuteDescriptor, + const char* name) +{ + boost::ignore_unused(permuteDescriptor); + boost::ignore_unused(name); + AddToNonCalibratedLayers(layer); +} + +void DynamicQuantizationVisitor::VisitSpaceToBatchNdLayer(const IConnectableLayer* layer, + const SpaceToBatchNdDescriptor& spaceToBatchNdDescriptor, + const char* name) +{ + boost::ignore_unused(spaceToBatchNdDescriptor); + boost::ignore_unused(name); + AddToNonCalibratedLayers(layer); +} + +void DynamicQuantizationVisitor::VisitPooling2dLayer(const IConnectableLayer* layer, + const Pooling2dDescriptor& pooling2dDescriptor, + const char* name) +{ + boost::ignore_unused(pooling2dDescriptor); + boost::ignore_unused(name); + AddToNonCalibratedLayers(layer); +} + +void DynamicQuantizationVisitor::VisitSoftmaxLayer(const IConnectableLayer* layer, + const SoftmaxDescriptor& softmaxDescriptor, + const char* name) +{ + boost::ignore_unused(softmaxDescriptor); + boost::ignore_unused(name); + SetRange(layer, 0, 0.f, 1.f); + AddToCalibratedLayers(layer); +} + +void DynamicQuantizationVisitor::VisitConstantLayer(const IConnectableLayer* layer, + const ConstTensor& input, + const char* name) +{ + boost::ignore_unused(name); + + if (input.GetDataType() != DataType::Float32) + { + throw InvalidArgumentException("Quantization is supported only for FP32 tensors"); + } + + // Work out the range based on the input constants + unsigned int inputNumElements = input.GetNumElements(); + const float* inputData = reinterpret_cast(input.GetMemoryArea()); + + float min = std::numeric_limits::max(); + float max = std::numeric_limits::lowest(); + + for (unsigned int i = 0; i < inputNumElements; i++) + { + const float inputValue = inputData[i]; + + min = std::min(min, inputValue); + max = std::max(max, inputValue); + } + SetRange(layer, 0, min, max); +} + +void DynamicQuantizationVisitor::VisitMergerLayer(const IConnectableLayer* layer, + const OriginsDescriptor& mergerDescriptor, + const char* name) +{ + float min = std::numeric_limits::max(); + float max = std::numeric_limits::lowest(); + for (unsigned int i = 0; i < layer->GetNumInputSlots(); ++i) + { + const IOutputSlot* outputSlot = layer->GetInputSlot(i).GetConnection(); + LayerGuid layerId = outputSlot->GetOwningLayerGuid(); + unsigned int slotIndex = outputSlot->CalculateIndexOnOwner(); + RangeTracker::MinMaxRange range = m_RangeTracker.GetRange(layerId, slotIndex); + min = std::min(min, range.first); + max = std::max(max, range.second); + } + SetRange(layer, 0, min, max); + AddToCalibratedLayers(layer); +} + +void DynamicQuantizationVisitor::VisitReshapeLayer(const IConnectableLayer* layer, + const ReshapeDescriptor& reshapeDescriptor, + const char* name) +{ + boost::ignore_unused(reshapeDescriptor); + boost::ignore_unused(name); + AddToNonCalibratedLayers(layer); +} + +void DynamicQuantizationVisitor::VisitSplitterLayer(const IConnectableLayer* layer, + const SplitterDescriptor& splitterDescriptor, + const char* name) +{ + boost::ignore_unused(splitterDescriptor); + boost::ignore_unused(name); + AddToNonCalibratedLayers(layer); +} + +void DynamicQuantizationVisitor::VisitResizeBilinearLayer(const IConnectableLayer* layer, + const ResizeBilinearDescriptor& resizeDesc, + const char* name) +{ + boost::ignore_unused(resizeDesc); + boost::ignore_unused(name); + AddToNonCalibratedLayers(layer); +} + +void DynamicQuantizationVisitor::VisitStridedSliceLayer(const IConnectableLayer* layer, + const StridedSliceDescriptor& stridedSliceDescriptor, + const char* name) +{ + boost::ignore_unused(stridedSliceDescriptor); + boost::ignore_unused(name); + AddToNonCalibratedLayers(layer); +} + +void DynamicQuantizationVisitor::VisitBatchToSpaceNdLayer(const IConnectableLayer* layer, + const BatchToSpaceNdDescriptor& batchToSpaceNdDescriptor, + const char* name) +{ + boost::ignore_unused(batchToSpaceNdDescriptor); + boost::ignore_unused(name); + AddToNonCalibratedLayers(layer); +} + +void DynamicQuantizationVisitor::VisitInputLayer(const IConnectableLayer* layer, LayerBindingId id, const char* name) +{ + boost::ignore_unused(id); + boost::ignore_unused(name); + SetRange(layer, 0, -0.0f, 0.0f); + AddToCalibratedLayers(layer); +} + +void DynamicQuantizationVisitor::VisitOutputLayer(const IConnectableLayer* layer, LayerBindingId id, const char* name) +{ + boost::ignore_unused(id); + boost::ignore_unused(name); + AddToNonCalibratedLayers(layer); + m_OutputLayers.push_back(id); +} + +const std::vector& DynamicQuantizationVisitor::GetOutputLayers() +{ + return m_OutputLayers; +} + +} //namespace armnn diff --git a/src/armnn/DynamicQuantizationVisitor.hpp b/src/armnn/DynamicQuantizationVisitor.hpp new file mode 100644 index 0000000000..6d430f1142 --- /dev/null +++ b/src/armnn/DynamicQuantizationVisitor.hpp @@ -0,0 +1,137 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include "armnn/LayerVisitorBase.hpp" +#include "RangeTracker.hpp" +#include "layers/DebugLayer.hpp" + +#include +#include + +namespace armnn +{ + +/// Visitor class to establish min/max ranges based on the type of the layer +class DynamicQuantizationVisitor : public LayerVisitorBase +{ +public: + DynamicQuantizationVisitor(RangeTracker& rangeTracker, Graph& graph); + ~DynamicQuantizationVisitor() = default; + + /// Functions to set the Range on a per-layer-type basis + void VisitAdditionLayer(const IConnectableLayer* layer, const char* name = nullptr) override; + + void VisitBatchNormalizationLayer(const IConnectableLayer* layer, + const BatchNormalizationDescriptor& desc, + const ConstTensor& mean, + const ConstTensor& variance, + const ConstTensor& beta, + const ConstTensor& gamma, + const char* name = nullptr) override; + + void VisitConvolution2dLayer(const IConnectableLayer* layer, + const Convolution2dDescriptor& convolution2dDescriptor, + const ConstTensor& weights, + const Optional& biases, + const char* name = nullptr) override; + + void VisitDepthwiseConvolution2dLayer(const IConnectableLayer* layer, + const DepthwiseConvolution2dDescriptor& desc, + const ConstTensor& weights, + const Optional& biases, + const char* name = nullptr) override; + + void VisitActivationLayer(const IConnectableLayer* layer, + const ActivationDescriptor& activationDescriptor, + const char* name = nullptr) override; + + void VisitFullyConnectedLayer(const IConnectableLayer *layer, + const FullyConnectedDescriptor& desc, + const ConstTensor& weights, + const Optional& biases, + const char *name) override; + + void VisitPermuteLayer(const IConnectableLayer* layer, + const PermuteDescriptor& permuteDescriptor, + const char* name) override; + + void VisitSpaceToBatchNdLayer(const IConnectableLayer* layer, + const SpaceToBatchNdDescriptor& spaceToBatchNdDescriptor, + const char* name = nullptr) override; + + void VisitPooling2dLayer(const IConnectableLayer* layer, + const Pooling2dDescriptor& pooling2dDescriptor, + const char* name) override; + + void VisitSoftmaxLayer(const IConnectableLayer* layer, + const SoftmaxDescriptor& softmaxDescriptor, + const char* name = nullptr) override; + + void VisitConstantLayer(const IConnectableLayer* layer, + const ConstTensor& input, + const char* name = nullptr) override; + + void VisitMergerLayer(const IConnectableLayer* layer, + const OriginsDescriptor& mergerDescriptor, + const char* name = nullptr) override; + + void VisitReshapeLayer(const IConnectableLayer* layer, + const ReshapeDescriptor& reshapeDescriptor, + const char* name = nullptr) override; + + void VisitSplitterLayer(const IConnectableLayer* layer, + const SplitterDescriptor& splitterDescriptor, + const char* name = nullptr) override; + + void VisitResizeBilinearLayer(const IConnectableLayer* layer, + const ResizeBilinearDescriptor& resizeDesc, + const char* name = nullptr) override; + + void VisitStridedSliceLayer(const IConnectableLayer* layer, + const StridedSliceDescriptor& stridedSliceDescriptor, + const char* name = nullptr) override; + + void VisitBatchToSpaceNdLayer(const IConnectableLayer* layer, + const BatchToSpaceNdDescriptor& batchToSpaceNdDescriptor, + const char* name = nullptr) override; + + void VisitInputLayer(const IConnectableLayer* layer, + LayerBindingId id, + const char* name = nullptr) override; + + void VisitOutputLayer(const IConnectableLayer* layer, + LayerBindingId id, + const char* name = nullptr) override; + + void FinishVisit() override; + void VisitNonCalibratedLayers(); + + const std::vector& GetOutputLayers(); + +private: + /// Set the range for an output slot on a layer + void SetRange(const IConnectableLayer* layer, unsigned int outputIdx, float min, float max); + + void ForwardParentParameters(const IConnectableLayer* layer); + + /// Mapping from a layer Guid to an array of ranges for outputs + RangeTracker& m_RangeTracker; + + Graph& m_Graph; + + std::vector m_LayersToCalibrate; + std::vector m_LayersNotToCalibrate; + std::vector m_DebugLayers; + + std::vector m_OutputLayers; + + void AddToCalibratedLayers(const IConnectableLayer* layer); + void AddToNonCalibratedLayers(const IConnectableLayer* layer); + void RemoveDebugLayers(); +}; + +} //namespace armnn \ No newline at end of file diff --git a/src/armnn/NetworkQuantizer.cpp b/src/armnn/NetworkQuantizer.cpp index 4692a6803f..12e459d276 100644 --- a/src/armnn/NetworkQuantizer.cpp +++ b/src/armnn/NetworkQuantizer.cpp @@ -7,6 +7,8 @@ #include #include #include +#include +#include #include "Graph.hpp" #include "Layer.hpp" @@ -14,6 +16,7 @@ #include "NetworkQuantizer.hpp" #include "NetworkQuantizerUtils.hpp" +#include "DynamicQuantizationVisitor.hpp" #include "StaticRangeVisitor.hpp" #include "QuantizerVisitor.hpp" #include "OverrideInputRangeVisitor.hpp" @@ -21,9 +24,15 @@ #include #include +#include + + namespace armnn { +using TContainer = boost::variant, std::vector, std::vector>; + + INetworkQuantizer* INetworkQuantizer::CreateRaw(INetwork* inputNetwork, const QuantizerOptions& options) { return new NetworkQuantizer(inputNetwork, options); @@ -51,16 +60,102 @@ void NetworkQuantizer::OverrideInputRange(LayerBindingId layerId, float min, flo void NetworkQuantizer::Refine(const InputTensors& inputTensors) { - //Implementation in a following commit + // The first time Refine is called the m_Runtime and the DynamicQuantizationVisitor + // will not have been created. Need to get the environment set up, Runtime loaded, + // DynamicQuantizationVisitor created and run over the network to initialise itself + // and the RangeTracker the Debug callback registered and an initial inference + // done to set up the first min/max values + if (!m_Runtime) + { + m_RefineCount = 0; + m_Ranges.SetDynamicMode(true); + const Graph& cGraph = boost::polymorphic_downcast(m_InputNetwork)->GetGraph().TopologicalSort(); + + // need to insert Debug layers in the DynamicQuantizationVisitor + Graph& graph = const_cast(cGraph); + + // Initialize RangeTracker to the default values for each layer. + // The default values are overwritten by the min/max that is + // recorded during the first dataset min/max calibration. This + // initialisation is only required for the first call of Refine(). + m_DynamicQuantizationVisitor = DynamicQuantizationVisitor(m_Ranges, graph); + VisitLayers(cGraph, m_DynamicQuantizationVisitor.value()); + + IRuntime::CreationOptions options; + m_Runtime = IRuntime::Create(options); + + // Optimize network - debug already enabled for layers that require quantization + OptimizerOptions optimizerOptions(false, false); + std::vector backends = {"CpuRef"}; + IOptimizedNetworkPtr optimizedNet = Optimize(*m_InputNetwork, + backends, + m_Runtime->GetDeviceSpec(), + optimizerOptions); + + m_Runtime->LoadNetwork(m_NetworkId, std::move(optimizedNet)); + + // Debug callback function to refine min/max in RangeTracker + auto rangeTrackerCallback = [&](LayerGuid guid, unsigned int slotIndex, ITensorHandle *tensorHandle) { + // Get min/max pair from tensor data + std::pair minMax = armnnUtils::FindMinMax(tensorHandle); + + // For first calibration dataset, set min/max range in RangeTracker to + // min/max ranges gathered during inference + if (m_RefineCount == 0) + { + m_Ranges.ResetMinMax(guid, slotIndex, minMax.first, minMax.second); + } + else + { + // For every other calibration dataset, only set min/max range if the + // values gathered are less than / greater than originally recorded. + m_Ranges.RefineMin(guid, slotIndex, minMax.first); + m_Ranges.RefineMax(guid, slotIndex, minMax.second); + } + }; + + m_Runtime->RegisterDebugCallback(m_NetworkId, rangeTrackerCallback); + } + + // Create output tensor for EnqueueWorkload + std::vector outputBindings; + auto outputLayers = m_DynamicQuantizationVisitor.value().GetOutputLayers(); + std::vector outputVectors; + for (auto outputLayerBindingId : outputLayers) + { + auto outputTensorInfo = m_Runtime->GetOutputTensorInfo(m_NetworkId, outputLayerBindingId); + outputBindings.push_back(std::make_pair(outputLayerBindingId, outputTensorInfo)); + outputVectors.push_back(std::vector(outputTensorInfo.GetNumElements(), 0)); + } + OutputTensors outputTensors = armnnUtils::MakeOutputTensors(outputBindings, outputVectors); + + // Execute EnqueueWorkload with calibration image + m_Runtime->EnqueueWorkload(m_NetworkId, inputTensors, outputTensors); + ++m_RefineCount; } INetworkPtr NetworkQuantizer::ExportNetwork() { const Graph& graph = boost::polymorphic_downcast(m_InputNetwork)->GetGraph().TopologicalSort(); - // Step 1) Walk the graph and register min/max values for intermediate tensors - StaticRangeVisitor rangeVisitor(m_Ranges); - VisitLayers(graph, rangeVisitor); + // Step 1) Walk the graph and populate default min/max values for + // intermediate tensors, only if Runtime does not exist (created + // if Refine has been called) + if (!m_Runtime) + { + m_Ranges.SetDynamicMode(false); + StaticRangeVisitor rangeVisitor(m_Ranges); + VisitLayers(graph, rangeVisitor); + } + else + { + // Set min/max range of non-calibrated layers to parent layer's range + m_DynamicQuantizationVisitor.value().VisitNonCalibratedLayers(); + // now tear down the runtime and the dynamic visitor. + m_Runtime.reset(nullptr); + m_DynamicQuantizationVisitor = EmptyOptional(); + m_RefineCount = 0; + } // Step 2) Convert input InputNetwork to Quantized InputNetwork std::unique_ptr quantizationScheme; @@ -79,6 +174,9 @@ INetworkPtr NetworkQuantizer::ExportNetwork() QuantizerVisitor quantizerVisitor(m_Ranges, quantizationScheme.get()); VisitLayers(graph, quantizerVisitor); + // clear the ranges + m_Ranges.Reset(); + return quantizerVisitor.RetrieveFinalNetwork(); } diff --git a/src/armnn/NetworkQuantizer.hpp b/src/armnn/NetworkQuantizer.hpp index 4f6359f36d..d384bdc545 100644 --- a/src/armnn/NetworkQuantizer.hpp +++ b/src/armnn/NetworkQuantizer.hpp @@ -6,9 +6,12 @@ #pragma once #include -#include +#include +#include #include +#include +#include "DynamicQuantizationVisitor.hpp" #include "RangeTracker.hpp" namespace armnn @@ -18,21 +21,41 @@ class NetworkQuantizer : public INetworkQuantizer { public: NetworkQuantizer(INetwork* inputNetwork, const QuantizerOptions& options) - : m_InputNetwork(inputNetwork), m_Options(options) {} + : m_InputNetwork(inputNetwork), + m_NetworkId(0), + m_Runtime(nullptr, &IRuntime::Destroy), + m_RefineCount(0), + m_Options(options) {} void OverrideInputRange(LayerBindingId layerId, float min, float max) override; void Refine(const InputTensors& inputTensors) override; + + // Required for testing? Need some way to get min/max in RangeTracker (m_Ranges) + std::pair GetMinMaxRange(LayerGuid guid, unsigned int idx) { return m_Ranges.GetRange(guid, idx); } INetworkPtr ExportNetwork() override; private: /// Original input network to quantize INetwork* m_InputNetwork; + NetworkId m_NetworkId; + + // if we are run in dynamic mode this unique pointer will hold + // the runtime between invocations of the Refine method. + IRuntimePtr m_Runtime; + + Optional m_DynamicQuantizationVisitor; + + // counts the number of times refine is called + unsigned int m_RefineCount; + /// Mapping from Guid to an array of ranges for outputs RangeTracker m_Ranges; /// Options for the NetworkQuantizer QuantizerOptions m_Options; + + std::pair FindMinMax(ITensorHandle* tensorHandle); }; } //namespace armnn diff --git a/src/armnn/NetworkQuantizerUtils.hpp b/src/armnn/NetworkQuantizerUtils.hpp index 26f67f95b2..6176a9c794 100644 --- a/src/armnn/NetworkQuantizerUtils.hpp +++ b/src/armnn/NetworkQuantizerUtils.hpp @@ -49,10 +49,12 @@ ConstTensor CreateQuantizedConst(const ConstTensor& tensor, std::vector template void VisitLayers(const LayerContainer& layerContainer, ILayerVisitor& visitor) { + visitor.StartVisit(); for (auto layer : layerContainer) { layer->Accept(visitor); } + visitor.FinishVisit(); } } // namespace armnn diff --git a/src/armnn/QuantizerVisitor.hpp b/src/armnn/QuantizerVisitor.hpp index 1751229435..eb9ebac3d9 100644 --- a/src/armnn/QuantizerVisitor.hpp +++ b/src/armnn/QuantizerVisitor.hpp @@ -11,7 +11,7 @@ #include #include -#include +#include #include diff --git a/src/armnn/RangeTracker.cpp b/src/armnn/RangeTracker.cpp index 202510346e..ae756fbb9c 100644 --- a/src/armnn/RangeTracker.cpp +++ b/src/armnn/RangeTracker.cpp @@ -4,17 +4,24 @@ // #include "RangeTracker.hpp" +#include "InternalTypes.hpp" namespace armnn { -void RangeTracker::SetRange(const armnn::IConnectableLayer *layer, unsigned int outputIdx, float min, float max) +void RangeTracker::SetRange(const armnn::IConnectableLayer* layer, unsigned int outputIdx, float min, float max) { auto& ranges = m_GuidToRangesMap[layer->GetGuid()]; - if (ranges.size() < layer->GetNumOutputSlots()) + unsigned int numOfOutputSlots = layer->GetNumOutputSlots(); + // output layers are a special case + if (numOfOutputSlots == 0) { - ranges.resize(layer->GetNumOutputSlots()); + ++numOfOutputSlots; + } + if (ranges.size() < numOfOutputSlots) + { + ranges.resize(numOfOutputSlots); } ranges[outputIdx] = std::make_pair(min, max); } @@ -24,9 +31,49 @@ RangeTracker::MinMaxRange RangeTracker::GetRange(armnn::LayerGuid guid, unsigned auto search = m_GuidToRangesMap.find(guid); if (search == m_GuidToRangesMap.end()) { - return DefaultRange(); + if (IsInDynamicMode()) + { + throw armnn::Exception("Have no entry for layer GUID [" + std::to_string(guid) + "]"); + } + else + { + return DefaultRange(); + } } return search->second.at(idx); } +void RangeTracker::RefineMin(LayerGuid guid, unsigned int idx, float newMin) +{ + auto& currentMin = m_GuidToRangesMap.find(guid)->second.at(idx).first; + if (newMin < currentMin) + { + currentMin = newMin; + } +} + +void RangeTracker::RefineMax(LayerGuid guid, unsigned int idx, float newMax) +{ + auto& currentMax = m_GuidToRangesMap.find(guid)->second.at(idx).second; + if (newMax > currentMax) + { + currentMax = newMax; + } +} + +void RangeTracker::ResetMinMax(LayerGuid guid, unsigned int idx, float newMin, float newMax) +{ + auto minMaxPair = m_GuidToRangesMap.find(guid); + auto& currentMin = minMaxPair->second.at(idx).first; + auto& currentMax = minMaxPair->second.at(idx).second; + + currentMin = newMin; + currentMax = newMax; +} + +void RangeTracker::Reset() +{ + m_GuidToRangesMap.clear(); +} + } //namespace armnn \ No newline at end of file diff --git a/src/armnn/RangeTracker.hpp b/src/armnn/RangeTracker.hpp index 2e8b33ad0f..6662775e38 100644 --- a/src/armnn/RangeTracker.hpp +++ b/src/armnn/RangeTracker.hpp @@ -31,6 +31,21 @@ public: /// Query that there is an entry for a layer bool HasRanges(LayerGuid guid) const { return m_GuidToRangesMap.find(guid) != m_GuidToRangesMap.end(); } + /// Update min in RangeTracker with new_min if it is lower than current value + void RefineMin(LayerGuid guid, unsigned int slotIndex, float newMin); + + /// Update max in RangeTracker with new_max if it is greater than current value + void RefineMax(LayerGuid guid, unsigned int slotIndex, float newMax); + + /// Overwrite min and max in RangeTracker with newMin and newMax + void ResetMinMax(LayerGuid guid, unsigned int idx, float newMin, float newMax); + + void Reset(); + + void SetDynamicMode(bool flag) { m_DynamicMode = flag; } + + bool IsInDynamicMode() const { return m_DynamicMode; } + private: using MinMaxRanges = std::vector; @@ -39,6 +54,8 @@ private: /// Mapping from a layer Guid to an array of ranges for outputs std::unordered_map m_GuidToRangesMap; + + bool m_DynamicMode = false; }; } //namespace armnn \ No newline at end of file diff --git a/src/armnn/StaticRangeVisitor.hpp b/src/armnn/StaticRangeVisitor.hpp index 5a16e184d6..8f2e698a7e 100644 --- a/src/armnn/StaticRangeVisitor.hpp +++ b/src/armnn/StaticRangeVisitor.hpp @@ -9,7 +9,7 @@ #include "RangeTracker.hpp" #include -#include +#include namespace armnn diff --git a/src/armnn/test/QuantizerTest.cpp b/src/armnn/test/QuantizerTest.cpp index 372d0ca002..259e90fcca 100644 --- a/src/armnn/test/QuantizerTest.cpp +++ b/src/armnn/test/QuantizerTest.cpp @@ -5,7 +5,7 @@ #include #include -#include +#include #include #include "armnn/LayerVisitorBase.hpp" @@ -15,6 +15,7 @@ #include "../OverrideInputRangeVisitor.hpp" #include "../RangeTracker.hpp" #include "../backends/backendsCommon/test/QuantizeHelper.hpp" +#include "../../armnnQuantizer/CommandLineProcessor.hpp" #include @@ -207,6 +208,95 @@ INetworkPtr CreateNetworkWithActivationLayer(const ActivationDescriptor& descrip return network; } +INetworkPtr CreateNetworkWithInputOutputLayers() +{ + INetworkPtr network = INetwork::Create(); + + // Add input/output layers + IConnectableLayer* inputLayer = network->AddInputLayer(0); + IConnectableLayer* output = network->AddOutputLayer(1); + + // Establish connections + inputLayer->GetOutputSlot(0).Connect(output->GetInputSlot(0)); + + // Set TensorInfo + TensorShape shape{8U}; + TensorInfo info(shape, DataType::Float32); + inputLayer->GetOutputSlot(0).SetTensorInfo(info); + + return network; +} + +TensorInfo GetInputTensorInfo(const Network* network) +{ + for (auto&& inputLayer : network->GetGraph().GetInputLayers()) + { + BOOST_ASSERT_MSG(inputLayer->GetNumOutputSlots() == 1, "Input layer should have exactly 1 output slot"); + return inputLayer->GetOutputSlot(0).GetTensorInfo(); + } + throw InvalidArgumentException("Network has no input layers"); +} + +BOOST_AUTO_TEST_CASE(InputOutputLayerDynamicQuant) +{ + INetworkPtr network = CreateNetworkWithInputOutputLayers(); + + armnn::TensorInfo tensorInfo = GetInputTensorInfo(boost::polymorphic_downcast(network.get())); + + // Outliers -56 and 98 + std::vector inputData({0, 0, 0, -56, 98, 0, 0, 0}); + armnn::ConstTensor inputTensor(tensorInfo, inputData.data()); + + InputTensors inputTensors; + inputTensors.push_back(std::make_pair(0, inputTensor)); + + armnn::INetworkQuantizerPtr quantizer = armnn::INetworkQuantizer::Create(network.get()); + + quantizer->Refine(inputTensors); + + // Outliers -77 and 65 + std::vector inputData2({0, -77, 0, -56, 65, 0, 0, 0}); + armnn::ConstTensor inputTensor2(tensorInfo, inputData2.data()); + InputTensors inputTensors2; + inputTensors2.push_back(std::make_pair(0, inputTensor2)); + + quantizer->Refine(inputTensors2); + + INetworkPtr quantizedNetwork = quantizer->ExportNetwork(); + // Output Layer should be quantized for a min max of -77 and 98 + // according to QAsymm8 Quantization Scheme + std::unique_ptr quantizationScheme = std::make_unique(); + OffsetScalePair qParams = quantizationScheme->ComputeScheme(-77.0, 98.0); + + class TestOutputLayerVisitor : public LayerVisitorBase + { + public: + TestOutputLayerVisitor(const OffsetScalePair& offsetScalePair, const DataType& dataType) : + m_OffsetScalePair(offsetScalePair), m_DataType(dataType) {} + + void VisitOutputLayer(const IConnectableLayer* layer, + LayerBindingId id, + const char* name = nullptr) override + { + const TensorInfo& info = layer->GetInputSlot(0).GetConnection()->GetTensorInfo(); + BOOST_CHECK_MESSAGE(info.GetDataType() == m_DataType, + std::string(armnn::GetDataTypeName(info.GetDataType())) + .append(" == ").append(armnn::GetDataTypeName(m_DataType))); + // int_32t + BOOST_CHECK(info.GetQuantizationOffset() == m_OffsetScalePair.second); + // float + BOOST_TEST(info.GetQuantizationScale() == m_OffsetScalePair.first, boost::test_tools::tolerance(0.001)); + } + + private: + const OffsetScalePair m_OffsetScalePair; + const DataType m_DataType; + }; + + TestOutputLayerVisitor visitor(qParams, quantizationScheme->GetDataType()); + quantizedNetwork->Accept(visitor); +} + BOOST_AUTO_TEST_CASE(QuantizeAbsActivation) { ActivationDescriptor descriptor; diff --git a/src/armnnQuantizer/ArmNNQuantizerMain.cpp b/src/armnnQuantizer/ArmNNQuantizerMain.cpp index 103597a72d..96d6515ba0 100644 --- a/src/armnnQuantizer/ArmNNQuantizerMain.cpp +++ b/src/armnnQuantizer/ArmNNQuantizerMain.cpp @@ -5,7 +5,7 @@ #include "CommandLineProcessor.hpp" #include -#include +#include #include #include "QuantizationDataSet.hpp" #include "QuantizationInput.hpp" @@ -52,7 +52,7 @@ int main(int argc, char* argv[]) armnnQuantizer::InputLayerVisitor inputLayerVisitor; network->Accept(inputLayerVisitor); - for(armnnQuantizer::QuantizationInput quantizationInput : dataSet) + for (armnnQuantizer::QuantizationInput quantizationInput : dataSet) { armnn::InputTensors inputTensors; std::vector> inputData(quantizationInput.GetNumberOfInputs()); diff --git a/src/armnnQuantizer/QuantizationInput.hpp b/src/armnnQuantizer/QuantizationInput.hpp index ebabdd704f..1bfe84bc19 100644 --- a/src/armnnQuantizer/QuantizationInput.hpp +++ b/src/armnnQuantizer/QuantizationInput.hpp @@ -7,7 +7,7 @@ #include #include -#include +#include namespace armnnQuantizer { diff --git a/src/armnnUtils/TensorUtils.cpp b/src/armnnUtils/TensorUtils.cpp index 57f823fe13..c2fbbe0bcc 100644 --- a/src/armnnUtils/TensorUtils.cpp +++ b/src/armnnUtils/TensorUtils.cpp @@ -4,6 +4,7 @@ // #include "TensorUtils.hpp" +#include namespace armnnUtils { @@ -47,4 +48,31 @@ armnn::TensorInfo GetTensorInfo(unsigned int numberOfBatches, } } +std::pair FindMinMax(armnn::ITensorHandle* tensorHandle) +{ + auto tensor_data = static_cast(tensorHandle->Map(true)); + auto tensor_size = tensorHandle->GetShape().GetNumElements(); + + // Set min/max initially to first value in tensor + float min = tensor_data[0]; + float max = tensor_data[0]; + + // Loop over rest of tensor and update min/max if necessary + for (unsigned int val = 1; val < tensor_size; val++) + { + if (tensor_data[val] < min) + { + min = tensor_data[val]; + } + else if (tensor_data[val] > max) + { + max = tensor_data[val]; + } + } + + tensorHandle->Unmap(); + + return std::make_pair(min, max); +} + } diff --git a/src/armnnUtils/TensorUtils.hpp b/src/armnnUtils/TensorUtils.hpp index fb5e6eb10d..c273b497b3 100644 --- a/src/armnnUtils/TensorUtils.hpp +++ b/src/armnnUtils/TensorUtils.hpp @@ -22,4 +22,6 @@ armnn::TensorInfo GetTensorInfo(unsigned int numberOfBatches, const armnn::DataLayout dataLayout, const armnn::DataType dataType); +std::pair FindMinMax(armnn::ITensorHandle* tensorHandle); + } // namespace armnnUtils -- cgit v1.2.1