From 7ac07f355f4cb75a54ec423670b7078bd0ecb44d Mon Sep 17 00:00:00 2001 From: Nattapat Chaimanowong Date: Wed, 20 Mar 2019 11:51:14 +0000 Subject: IVGCVSW-2858 Add support for QSymm16 quantization Change-Id: Ia7c305c30c39ec0e9db447a461479be17fde250c Signed-off-by: Nattapat Chaimanowong --- src/armnn/NetworkQuantizationScheme.hpp | 80 +++++++++++++++++++++++++++++++++ src/armnn/NetworkQuantizer.cpp | 23 +++++++--- src/armnn/NetworkQuantizer.hpp | 6 ++- src/armnn/NetworkQuantizerUtils.cpp | 28 +++--------- src/armnn/NetworkQuantizerUtils.hpp | 10 +++-- src/armnn/QuantizerVisitor.cpp | 7 +-- src/armnn/QuantizerVisitor.hpp | 5 ++- src/armnn/test/QuantizerTest.cpp | 7 ++- 8 files changed, 127 insertions(+), 39 deletions(-) create mode 100644 src/armnn/NetworkQuantizationScheme.hpp (limited to 'src/armnn') diff --git a/src/armnn/NetworkQuantizationScheme.hpp b/src/armnn/NetworkQuantizationScheme.hpp new file mode 100644 index 0000000000..065205dada --- /dev/null +++ b/src/armnn/NetworkQuantizationScheme.hpp @@ -0,0 +1,80 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include + +#include +#include + +namespace armnn +{ + +using OffsetScalePair = std::pair; + +struct IQuantizationScheme +{ + virtual OffsetScalePair ComputeScheme(double min, double max) const = 0; + + virtual int NumBits() const = 0; + + virtual DataType GetDataType() const = 0; + + virtual ~IQuantizationScheme() {} +}; + +struct QAsymm8QuantizationScheme : IQuantizationScheme +{ + OffsetScalePair ComputeScheme(double min, double max) const override + { + if (min >= max) + { + throw InvalidArgumentException("min >= max will result in invalid quantization."); + } + + double highest = (1 << NumBits()) - 1; + + min = std::min(0.0, min); // min <= 0.0 + max = std::max(0.0, max); // max >= 0.0 + + // Assumes quantization range [0-highest] + double scale = (max-min) / highest; + double offset = -min / scale; + + // Clamp offset [0-highest] + offset = std::max(0.0, std::min(highest, offset)); + + return std::make_pair(static_cast(scale), static_cast(std::round(offset))); + } + + int NumBits() const override { return 8; } + + DataType GetDataType() const override { return DataType::QuantisedAsymm8; } +}; + +struct QSymm16QuantizationScheme : IQuantizationScheme +{ + OffsetScalePair ComputeScheme(double min, double max) const override + { + if (min >= max) + { + throw InvalidArgumentException("min >= max will result in invalid quantization."); + } + + double highest = (1 << (NumBits()-1)) - 1; // (numbits-1) accounts for the sign bit + + double extent = std::max(std::abs(min), std::abs(max)); + double scale = extent / highest; + + return std::make_pair(static_cast(scale), 0); + } + + int NumBits() const override { return 16; } + + DataType GetDataType() const override { return DataType::QuantisedSymm16; } +}; + +} // namespace armnn diff --git a/src/armnn/NetworkQuantizer.cpp b/src/armnn/NetworkQuantizer.cpp index bf5c9ef0f2..f577aea00e 100644 --- a/src/armnn/NetworkQuantizer.cpp +++ b/src/armnn/NetworkQuantizer.cpp @@ -24,14 +24,14 @@ namespace armnn { -INetworkQuantizer* INetworkQuantizer::CreateRaw(INetwork* inputNetwork) +INetworkQuantizer* INetworkQuantizer::CreateRaw(INetwork* inputNetwork, const QuantizerOptions& options) { - return new NetworkQuantizer(inputNetwork); + return new NetworkQuantizer(inputNetwork, options); } -INetworkQuantizerPtr INetworkQuantizer::Create(INetwork* inputNetwork) +INetworkQuantizerPtr INetworkQuantizer::Create(INetwork* inputNetwork, const QuantizerOptions& options) { - return INetworkQuantizerPtr(CreateRaw(inputNetwork), &INetworkQuantizer::Destroy); + return INetworkQuantizerPtr(CreateRaw(inputNetwork, options), &INetworkQuantizer::Destroy); } void INetworkQuantizer::Destroy(INetworkQuantizer *quantizer) @@ -58,7 +58,20 @@ INetworkPtr NetworkQuantizer::ExportNetwork() VisitLayers(graph, rangeVisitor); // Step 2) Convert input InputNetwork to Quantized InputNetwork - QuantizerVisitor quantizerVisitor(m_Ranges); + std::unique_ptr quantizationScheme; + switch (m_Options.m_ActivationFormat) + { + case DataType::QuantisedAsymm8: + quantizationScheme = std::make_unique(); + break; + case DataType::QuantisedSymm16: + quantizationScheme = std::make_unique(); + break; + default: + throw InvalidArgumentException("Unsupported quantization target"); + } + + QuantizerVisitor quantizerVisitor(m_Ranges, quantizationScheme.get()); VisitLayers(graph, quantizerVisitor); return quantizerVisitor.RetrieveFinalNetwork(); diff --git a/src/armnn/NetworkQuantizer.hpp b/src/armnn/NetworkQuantizer.hpp index 5b87851edf..5e93f70290 100644 --- a/src/armnn/NetworkQuantizer.hpp +++ b/src/armnn/NetworkQuantizer.hpp @@ -17,7 +17,8 @@ namespace armnn class NetworkQuantizer : public INetworkQuantizer { public: - NetworkQuantizer(INetwork* inputNetwork) : m_InputNetwork(inputNetwork) {} + NetworkQuantizer(INetwork* inputNetwork, const QuantizerOptions& options) + : m_InputNetwork(inputNetwork), m_Options(options) {} void OverrideInputRange(LayerBindingId layerId, float min, float max) override; INetworkPtr ExportNetwork() override; @@ -28,6 +29,9 @@ private: /// Mapping from Guid to an array of ranges for outputs RangeTracker m_Ranges; + + /// Options for the NetworkQuantizer + QuantizerOptions m_Options; }; } //namespace armnn diff --git a/src/armnn/NetworkQuantizerUtils.cpp b/src/armnn/NetworkQuantizerUtils.cpp index 551760f362..a6f9ebdc42 100644 --- a/src/armnn/NetworkQuantizerUtils.cpp +++ b/src/armnn/NetworkQuantizerUtils.cpp @@ -12,24 +12,6 @@ namespace armnn { -std::pair ComputeQAsymmParams(int numBits, double min, double max) -{ - BOOST_ASSERT_MSG(min < max, "min >= max will result in invalid quantization."); - double highest = (1 << numBits) - 1; - - min = std::min(0.0, min); // min <= 0.0 - max = std::max(0.0, max); // max >= 0.0 - - // Assumes quantization range [0-highest] - double scale = (max-min) / highest; - double offset = -min / scale; - - // Clamp offset [0-highest] - offset = std::max(0.0, std::min(highest, offset)); - - return std::make_pair(static_cast(scale), static_cast(std::round(offset))); -} - ConstTensor CreateQuantizedConst(const ConstTensor& tensor, std::vector& backing) { float scale = 0.0f; @@ -43,11 +25,11 @@ ConstTensor CreateQuantizedConst(const ConstTensor& tensor, std::vector { case DataType::Float32: { - Quantize(static_cast(tensor.GetMemoryArea()), - backing.data(), - backing.size(), - scale, - offset); + QuantizeConstant(static_cast(tensor.GetMemoryArea()), + backing.data(), + backing.size(), + scale, + offset); } break; default: diff --git a/src/armnn/NetworkQuantizerUtils.hpp b/src/armnn/NetworkQuantizerUtils.hpp index c23517e385..26f67f95b2 100644 --- a/src/armnn/NetworkQuantizerUtils.hpp +++ b/src/armnn/NetworkQuantizerUtils.hpp @@ -5,6 +5,8 @@ #pragma once +#include "NetworkQuantizationScheme.hpp" + #include #include #include @@ -17,10 +19,8 @@ namespace armnn { -std::pair ComputeQAsymmParams(int numBits, double min, double max); - template -void Quantize(const srcType* src, uint8_t* dst, size_t numElements, float& scale, int& offset) +void QuantizeConstant(const srcType* src, uint8_t* dst, size_t numElements, float& scale, int& offset) { BOOST_ASSERT(src); BOOST_ASSERT(dst); @@ -33,9 +33,11 @@ void Quantize(const srcType* src, uint8_t* dst, size_t numElements, float& scale max = std::max(max, src[i]); } - auto qParams = ComputeQAsymmParams(8, min, max); + QAsymm8QuantizationScheme quantizationScheme; + OffsetScalePair qParams = quantizationScheme.ComputeScheme(min, max); scale = qParams.first; offset = qParams.second; + for (size_t i = 0; i < numElements; ++i) { dst[i] = armnn::Quantize(src[i], scale, offset); diff --git a/src/armnn/QuantizerVisitor.cpp b/src/armnn/QuantizerVisitor.cpp index 110594c1ab..95f7c50735 100644 --- a/src/armnn/QuantizerVisitor.cpp +++ b/src/armnn/QuantizerVisitor.cpp @@ -11,9 +11,10 @@ namespace armnn { -QuantizerVisitor::QuantizerVisitor(const RangeTracker& rangeTracker) +QuantizerVisitor::QuantizerVisitor(const RangeTracker& rangeTracker, const IQuantizationScheme* quantizationScheme) : m_Ranges(rangeTracker) , m_QuantizedNetwork(INetwork::Create()) + , m_QuantizationScheme(quantizationScheme) { } @@ -45,11 +46,11 @@ void QuantizerVisitor::SetQuantizedInputConnections(const IConnectableLayer* src // Fetch the min/max ranges that were computed earlier auto range = m_Ranges.GetRange(layerToFind.GetGuid(), slotIdx); - auto qParams = ComputeQAsymmParams(8, range.first, range.second); + OffsetScalePair qParams = m_QuantizationScheme->ComputeScheme(range.first, range.second); // Set the quantization params TensorInfo info(newOutputSlot.GetTensorInfo()); - info.SetDataType(DataType::QuantisedAsymm8); + info.SetDataType(m_QuantizationScheme->GetDataType()); info.SetQuantizationOffset(qParams.second); info.SetQuantizationScale(qParams.first); newOutputSlot.SetTensorInfo(info); diff --git a/src/armnn/QuantizerVisitor.hpp b/src/armnn/QuantizerVisitor.hpp index 2682663047..1751229435 100644 --- a/src/armnn/QuantizerVisitor.hpp +++ b/src/armnn/QuantizerVisitor.hpp @@ -7,6 +7,7 @@ #include "armnn/LayerVisitorBase.hpp" #include "StaticRangeVisitor.hpp" +#include "NetworkQuantizationScheme.hpp" #include #include @@ -24,7 +25,7 @@ class StaticRangeVisitor; class QuantizerVisitor : public LayerVisitorBase { public: - QuantizerVisitor(const RangeTracker& rangeTracker); + QuantizerVisitor(const RangeTracker& rangeTracker, const IQuantizationScheme* quantizationScheme); ~QuantizerVisitor() = default; /// Functions to quantize the individual layers, overridden from ILayerVisitor @@ -129,6 +130,8 @@ private: /// Mapping from guid to layer in quantized network std::unordered_map m_QuantizedGuidToLayerMap; + + const IQuantizationScheme* m_QuantizationScheme; }; } //namespace armnn diff --git a/src/armnn/test/QuantizerTest.cpp b/src/armnn/test/QuantizerTest.cpp index f7723bd0c0..eead9b7f36 100644 --- a/src/armnn/test/QuantizerTest.cpp +++ b/src/armnn/test/QuantizerTest.cpp @@ -9,8 +9,9 @@ #include #include "armnn/LayerVisitorBase.hpp" -#include "../Network.hpp" #include "../Graph.hpp" +#include "../Network.hpp" +#include "../NetworkQuantizationScheme.hpp" #include "../NetworkQuantizerUtils.hpp" #include "../OverrideInputRangeVisitor.hpp" #include "../RangeTracker.hpp" @@ -997,7 +998,9 @@ BOOST_AUTO_TEST_CASE(QuantizeMerger) const OriginsDescriptor& mergerDescriptor, const char* name = nullptr) { - std::pair expectedValues = ComputeQAsymmParams(8, m_Min, m_Max); + + QAsymm8QuantizationScheme quantizationScheme; + OffsetScalePair expectedValues = quantizationScheme.ComputeScheme(m_Min, m_Max); TensorInfo info = layer->GetOutputSlot(0).GetTensorInfo(); -- cgit v1.2.1