aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorNattapat Chaimanowong <nattapat.chaimanowong@arm.com>2019-03-20 11:51:14 +0000
committernattapat.chaimanowong <nattapat.chaimanowong@arm.com>2019-03-20 14:49:03 +0000
commit7ac07f355f4cb75a54ec423670b7078bd0ecb44d (patch)
tree5f28c73decbfe0221c2ecedc204f48a7c00884f0
parent2a434a8a23d75fb62ac0cb3ecb83ba7aab89b8c6 (diff)
downloadarmnn-7ac07f355f4cb75a54ec423670b7078bd0ecb44d.tar.gz
IVGCVSW-2858 Add support for QSymm16 quantization
Change-Id: Ia7c305c30c39ec0e9db447a461479be17fde250c Signed-off-by: Nattapat Chaimanowong <nattapat.chaimanowong@arm.com>
-rw-r--r--CMakeLists.txt1
-rw-r--r--include/armnn/INetworkQuantizer.hpp20
-rw-r--r--src/armnn/NetworkQuantizationScheme.hpp80
-rw-r--r--src/armnn/NetworkQuantizer.cpp23
-rw-r--r--src/armnn/NetworkQuantizer.hpp6
-rw-r--r--src/armnn/NetworkQuantizerUtils.cpp28
-rw-r--r--src/armnn/NetworkQuantizerUtils.hpp10
-rw-r--r--src/armnn/QuantizerVisitor.cpp7
-rw-r--r--src/armnn/QuantizerVisitor.hpp5
-rw-r--r--src/armnn/test/QuantizerTest.cpp7
10 files changed, 145 insertions, 42 deletions
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 99f00d8e9c..fe1c1a71c9 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -297,6 +297,7 @@ list(APPEND armnn_sources
src/armnn/LoadedNetwork.hpp
src/armnn/Network.cpp
src/armnn/Network.hpp
+ src/armnn/NetworkQuantizationScheme.hpp
src/armnn/NetworkQuantizer.cpp
src/armnn/NetworkQuantizer.hpp
src/armnn/NetworkQuantizerUtils.cpp
diff --git a/include/armnn/INetworkQuantizer.hpp b/include/armnn/INetworkQuantizer.hpp
index 5969fa4edf..54c1c889d3 100644
--- a/include/armnn/INetworkQuantizer.hpp
+++ b/include/armnn/INetworkQuantizer.hpp
@@ -6,19 +6,33 @@
#pragma once
#include <armnn/INetwork.hpp>
+#include <armnn/Types.hpp>
namespace armnn
{
+struct QuantizerOptions
+{
+ QuantizerOptions() : m_ActivationFormat(DataType::QuantisedAsymm8) {}
+ QuantizerOptions(DataType activationFormat) : m_ActivationFormat(activationFormat) {}
+
+ DataType m_ActivationFormat;
+};
+
using INetworkQuantizerPtr = std::unique_ptr<class INetworkQuantizer, void(*)(INetworkQuantizer* quantizer)>;
/// Quantizer class Quantizes a float32 InputNetwork
class INetworkQuantizer
{
public:
- static INetworkQuantizer* CreateRaw(INetwork* inputNetwork); ///< Create Quantizer object and return raw pointer
- static INetworkQuantizerPtr Create(INetwork* inputNetwork); ///< Create Quantizer object wrapped in unique_ptr
- static void Destroy(INetworkQuantizer* quantizer); ///< Destroy Quantizer object
+ /// Create Quantizer object and return raw pointer
+ static INetworkQuantizer* CreateRaw(INetwork* inputNetwork, const QuantizerOptions& options = QuantizerOptions());
+
+ /// Create Quantizer object wrapped in unique_ptr
+ static INetworkQuantizerPtr Create(INetwork* inputNetwork, const QuantizerOptions& options = QuantizerOptions());
+
+ /// Destroy Quantizer object
+ static void Destroy(INetworkQuantizer* quantizer);
/// Overrides the default quantization values for the input layer with the given id
virtual void OverrideInputRange(LayerBindingId layerId, float min, float max) = 0;
diff --git a/src/armnn/NetworkQuantizationScheme.hpp b/src/armnn/NetworkQuantizationScheme.hpp
new file mode 100644
index 0000000000..065205dada
--- /dev/null
+++ b/src/armnn/NetworkQuantizationScheme.hpp
@@ -0,0 +1,80 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#pragma once
+
+#include <armnn/Types.hpp>
+
+#include <cmath>
+#include <algorithm>
+
+namespace armnn
+{
+
+using OffsetScalePair = std::pair<float, int>;
+
+struct IQuantizationScheme
+{
+ virtual OffsetScalePair ComputeScheme(double min, double max) const = 0;
+
+ virtual int NumBits() const = 0;
+
+ virtual DataType GetDataType() const = 0;
+
+ virtual ~IQuantizationScheme() {}
+};
+
+struct QAsymm8QuantizationScheme : IQuantizationScheme
+{
+ OffsetScalePair ComputeScheme(double min, double max) const override
+ {
+ if (min >= max)
+ {
+ throw InvalidArgumentException("min >= max will result in invalid quantization.");
+ }
+
+ double highest = (1 << NumBits()) - 1;
+
+ min = std::min(0.0, min); // min <= 0.0
+ max = std::max(0.0, max); // max >= 0.0
+
+ // Assumes quantization range [0-highest]
+ double scale = (max-min) / highest;
+ double offset = -min / scale;
+
+ // Clamp offset [0-highest]
+ offset = std::max(0.0, std::min(highest, offset));
+
+ return std::make_pair(static_cast<float>(scale), static_cast<int>(std::round(offset)));
+ }
+
+ int NumBits() const override { return 8; }
+
+ DataType GetDataType() const override { return DataType::QuantisedAsymm8; }
+};
+
+struct QSymm16QuantizationScheme : IQuantizationScheme
+{
+ OffsetScalePair ComputeScheme(double min, double max) const override
+ {
+ if (min >= max)
+ {
+ throw InvalidArgumentException("min >= max will result in invalid quantization.");
+ }
+
+ double highest = (1 << (NumBits()-1)) - 1; // (numbits-1) accounts for the sign bit
+
+ double extent = std::max(std::abs(min), std::abs(max));
+ double scale = extent / highest;
+
+ return std::make_pair(static_cast<float>(scale), 0);
+ }
+
+ int NumBits() const override { return 16; }
+
+ DataType GetDataType() const override { return DataType::QuantisedSymm16; }
+};
+
+} // namespace armnn
diff --git a/src/armnn/NetworkQuantizer.cpp b/src/armnn/NetworkQuantizer.cpp
index bf5c9ef0f2..f577aea00e 100644
--- a/src/armnn/NetworkQuantizer.cpp
+++ b/src/armnn/NetworkQuantizer.cpp
@@ -24,14 +24,14 @@
namespace armnn
{
-INetworkQuantizer* INetworkQuantizer::CreateRaw(INetwork* inputNetwork)
+INetworkQuantizer* INetworkQuantizer::CreateRaw(INetwork* inputNetwork, const QuantizerOptions& options)
{
- return new NetworkQuantizer(inputNetwork);
+ return new NetworkQuantizer(inputNetwork, options);
}
-INetworkQuantizerPtr INetworkQuantizer::Create(INetwork* inputNetwork)
+INetworkQuantizerPtr INetworkQuantizer::Create(INetwork* inputNetwork, const QuantizerOptions& options)
{
- return INetworkQuantizerPtr(CreateRaw(inputNetwork), &INetworkQuantizer::Destroy);
+ return INetworkQuantizerPtr(CreateRaw(inputNetwork, options), &INetworkQuantizer::Destroy);
}
void INetworkQuantizer::Destroy(INetworkQuantizer *quantizer)
@@ -58,7 +58,20 @@ INetworkPtr NetworkQuantizer::ExportNetwork()
VisitLayers(graph, rangeVisitor);
// Step 2) Convert input InputNetwork to Quantized InputNetwork
- QuantizerVisitor quantizerVisitor(m_Ranges);
+ std::unique_ptr<IQuantizationScheme> quantizationScheme;
+ switch (m_Options.m_ActivationFormat)
+ {
+ case DataType::QuantisedAsymm8:
+ quantizationScheme = std::make_unique<QAsymm8QuantizationScheme>();
+ break;
+ case DataType::QuantisedSymm16:
+ quantizationScheme = std::make_unique<QSymm16QuantizationScheme>();
+ break;
+ default:
+ throw InvalidArgumentException("Unsupported quantization target");
+ }
+
+ QuantizerVisitor quantizerVisitor(m_Ranges, quantizationScheme.get());
VisitLayers(graph, quantizerVisitor);
return quantizerVisitor.RetrieveFinalNetwork();
diff --git a/src/armnn/NetworkQuantizer.hpp b/src/armnn/NetworkQuantizer.hpp
index 5b87851edf..5e93f70290 100644
--- a/src/armnn/NetworkQuantizer.hpp
+++ b/src/armnn/NetworkQuantizer.hpp
@@ -17,7 +17,8 @@ namespace armnn
class NetworkQuantizer : public INetworkQuantizer
{
public:
- NetworkQuantizer(INetwork* inputNetwork) : m_InputNetwork(inputNetwork) {}
+ NetworkQuantizer(INetwork* inputNetwork, const QuantizerOptions& options)
+ : m_InputNetwork(inputNetwork), m_Options(options) {}
void OverrideInputRange(LayerBindingId layerId, float min, float max) override;
INetworkPtr ExportNetwork() override;
@@ -28,6 +29,9 @@ private:
/// Mapping from Guid to an array of ranges for outputs
RangeTracker m_Ranges;
+
+ /// Options for the NetworkQuantizer
+ QuantizerOptions m_Options;
};
} //namespace armnn
diff --git a/src/armnn/NetworkQuantizerUtils.cpp b/src/armnn/NetworkQuantizerUtils.cpp
index 551760f362..a6f9ebdc42 100644
--- a/src/armnn/NetworkQuantizerUtils.cpp
+++ b/src/armnn/NetworkQuantizerUtils.cpp
@@ -12,24 +12,6 @@
namespace armnn
{
-std::pair<float, int> ComputeQAsymmParams(int numBits, double min, double max)
-{
- BOOST_ASSERT_MSG(min < max, "min >= max will result in invalid quantization.");
- double highest = (1 << numBits) - 1;
-
- min = std::min(0.0, min); // min <= 0.0
- max = std::max(0.0, max); // max >= 0.0
-
- // Assumes quantization range [0-highest]
- double scale = (max-min) / highest;
- double offset = -min / scale;
-
- // Clamp offset [0-highest]
- offset = std::max(0.0, std::min(highest, offset));
-
- return std::make_pair(static_cast<float>(scale), static_cast<int>(std::round(offset)));
-}
-
ConstTensor CreateQuantizedConst(const ConstTensor& tensor, std::vector<uint8_t>& backing)
{
float scale = 0.0f;
@@ -43,11 +25,11 @@ ConstTensor CreateQuantizedConst(const ConstTensor& tensor, std::vector<uint8_t>
{
case DataType::Float32:
{
- Quantize(static_cast<const float*>(tensor.GetMemoryArea()),
- backing.data(),
- backing.size(),
- scale,
- offset);
+ QuantizeConstant(static_cast<const float*>(tensor.GetMemoryArea()),
+ backing.data(),
+ backing.size(),
+ scale,
+ offset);
}
break;
default:
diff --git a/src/armnn/NetworkQuantizerUtils.hpp b/src/armnn/NetworkQuantizerUtils.hpp
index c23517e385..26f67f95b2 100644
--- a/src/armnn/NetworkQuantizerUtils.hpp
+++ b/src/armnn/NetworkQuantizerUtils.hpp
@@ -5,6 +5,8 @@
#pragma once
+#include "NetworkQuantizationScheme.hpp"
+
#include <armnn/Tensor.hpp>
#include <armnn/TypesUtils.hpp>
#include <armnn/ILayerVisitor.hpp>
@@ -17,10 +19,8 @@
namespace armnn
{
-std::pair<float, int> ComputeQAsymmParams(int numBits, double min, double max);
-
template<typename srcType>
-void Quantize(const srcType* src, uint8_t* dst, size_t numElements, float& scale, int& offset)
+void QuantizeConstant(const srcType* src, uint8_t* dst, size_t numElements, float& scale, int& offset)
{
BOOST_ASSERT(src);
BOOST_ASSERT(dst);
@@ -33,9 +33,11 @@ void Quantize(const srcType* src, uint8_t* dst, size_t numElements, float& scale
max = std::max(max, src[i]);
}
- auto qParams = ComputeQAsymmParams(8, min, max);
+ QAsymm8QuantizationScheme quantizationScheme;
+ OffsetScalePair qParams = quantizationScheme.ComputeScheme(min, max);
scale = qParams.first;
offset = qParams.second;
+
for (size_t i = 0; i < numElements; ++i)
{
dst[i] = armnn::Quantize<uint8_t>(src[i], scale, offset);
diff --git a/src/armnn/QuantizerVisitor.cpp b/src/armnn/QuantizerVisitor.cpp
index 110594c1ab..95f7c50735 100644
--- a/src/armnn/QuantizerVisitor.cpp
+++ b/src/armnn/QuantizerVisitor.cpp
@@ -11,9 +11,10 @@
namespace armnn
{
-QuantizerVisitor::QuantizerVisitor(const RangeTracker& rangeTracker)
+QuantizerVisitor::QuantizerVisitor(const RangeTracker& rangeTracker, const IQuantizationScheme* quantizationScheme)
: m_Ranges(rangeTracker)
, m_QuantizedNetwork(INetwork::Create())
+ , m_QuantizationScheme(quantizationScheme)
{
}
@@ -45,11 +46,11 @@ void QuantizerVisitor::SetQuantizedInputConnections(const IConnectableLayer* src
// Fetch the min/max ranges that were computed earlier
auto range = m_Ranges.GetRange(layerToFind.GetGuid(), slotIdx);
- auto qParams = ComputeQAsymmParams(8, range.first, range.second);
+ OffsetScalePair qParams = m_QuantizationScheme->ComputeScheme(range.first, range.second);
// Set the quantization params
TensorInfo info(newOutputSlot.GetTensorInfo());
- info.SetDataType(DataType::QuantisedAsymm8);
+ info.SetDataType(m_QuantizationScheme->GetDataType());
info.SetQuantizationOffset(qParams.second);
info.SetQuantizationScale(qParams.first);
newOutputSlot.SetTensorInfo(info);
diff --git a/src/armnn/QuantizerVisitor.hpp b/src/armnn/QuantizerVisitor.hpp
index 2682663047..1751229435 100644
--- a/src/armnn/QuantizerVisitor.hpp
+++ b/src/armnn/QuantizerVisitor.hpp
@@ -7,6 +7,7 @@
#include "armnn/LayerVisitorBase.hpp"
#include "StaticRangeVisitor.hpp"
+#include "NetworkQuantizationScheme.hpp"
#include <armnn/INetwork.hpp>
#include <armnn/Types.hpp>
@@ -24,7 +25,7 @@ class StaticRangeVisitor;
class QuantizerVisitor : public LayerVisitorBase<VisitorNoThrowPolicy>
{
public:
- QuantizerVisitor(const RangeTracker& rangeTracker);
+ QuantizerVisitor(const RangeTracker& rangeTracker, const IQuantizationScheme* quantizationScheme);
~QuantizerVisitor() = default;
/// Functions to quantize the individual layers, overridden from ILayerVisitor
@@ -129,6 +130,8 @@ private:
/// Mapping from guid to layer in quantized network
std::unordered_map<LayerGuid, IConnectableLayer*> m_QuantizedGuidToLayerMap;
+
+ const IQuantizationScheme* m_QuantizationScheme;
};
} //namespace armnn
diff --git a/src/armnn/test/QuantizerTest.cpp b/src/armnn/test/QuantizerTest.cpp
index f7723bd0c0..eead9b7f36 100644
--- a/src/armnn/test/QuantizerTest.cpp
+++ b/src/armnn/test/QuantizerTest.cpp
@@ -9,8 +9,9 @@
#include <armnn/Types.hpp>
#include "armnn/LayerVisitorBase.hpp"
-#include "../Network.hpp"
#include "../Graph.hpp"
+#include "../Network.hpp"
+#include "../NetworkQuantizationScheme.hpp"
#include "../NetworkQuantizerUtils.hpp"
#include "../OverrideInputRangeVisitor.hpp"
#include "../RangeTracker.hpp"
@@ -997,7 +998,9 @@ BOOST_AUTO_TEST_CASE(QuantizeMerger)
const OriginsDescriptor& mergerDescriptor,
const char* name = nullptr)
{
- std::pair<float, int> expectedValues = ComputeQAsymmParams(8, m_Min, m_Max);
+
+ QAsymm8QuantizationScheme quantizationScheme;
+ OffsetScalePair expectedValues = quantizationScheme.ComputeScheme(m_Min, m_Max);
TensorInfo info = layer->GetOutputSlot(0).GetTensorInfo();