diff options
author | Ryan OShea <Ryan.OShea2@arm.com> | 2020-02-07 10:06:33 +0000 |
---|---|---|
committer | Jan Eilers <jan.eilers@arm.com> | 2020-02-07 17:51:59 +0000 |
commit | 9add1200f5840e263115b48e17a6397ce3ae2d74 (patch) | |
tree | 6de2960e9d8c1f701f05cae6240abba674226416 /src/armnn/NetworkQuantizationScheme.hpp | |
parent | 13b59a4fbb0cc719f9388bcb7093a85e4741deb9 (diff) | |
download | armnn-9add1200f5840e263115b48e17a6397ce3ae2d74.tar.gz |
IVGCVSW-4386 Add ArmNN reference support for QAsymmS8
* Added Quantization Scheme for QAsymmS8
* Added Unit Tests for QAsymmS8
* Renamed QAsymm8 calls to QAsymmU8
Signed-off-by: Ryan OShea <Ryan.OShea2@arm.com>
Change-Id: I897b4e018ba1d808cc3f8c113f2be2dbad49c8db
Diffstat (limited to 'src/armnn/NetworkQuantizationScheme.hpp')
-rw-r--r-- | src/armnn/NetworkQuantizationScheme.hpp | 42 |
1 files changed, 37 insertions, 5 deletions
diff --git a/src/armnn/NetworkQuantizationScheme.hpp b/src/armnn/NetworkQuantizationScheme.hpp index a5b7542748..a78fd725b4 100644 --- a/src/armnn/NetworkQuantizationScheme.hpp +++ b/src/armnn/NetworkQuantizationScheme.hpp @@ -26,7 +26,7 @@ struct IQuantizationScheme virtual ~IQuantizationScheme() {} }; -struct QAsymm8QuantizationScheme : IQuantizationScheme +struct QAsymmU8QuantizationScheme : IQuantizationScheme { OffsetScalePair ComputeScheme(double min, double max) const override { @@ -61,6 +61,42 @@ struct QAsymm8QuantizationScheme : IQuantizationScheme DataType GetDataType() const override { return DataType::QAsymmU8; } }; +struct QAsymmS8QuantizationScheme : IQuantizationScheme +{ + OffsetScalePair ComputeScheme(double min, double max) const override + { + if (min > max) + { + throw InvalidArgumentException("min > max will result in invalid quantization."); + } + + double highest = (1 << NumBits()) - 1; + + min = std::min(0.0, min); // min <= 0.0 + max = std::max(0.0, max); // max >= 0.0 + + // To avoid dividing by zero when quantizing a zero filled tensor + if (min == 0.0 && max == 0.0) + { + max = 1.0; + } + + // Assumes quantization range [0-255] + double scale = (max-min) / highest ; + double offset = - min / scale; + + //Clamp 0 to Highest + offset = std::max(0.0, std::min(highest, offset)); + + //-128 on offset to cast to signed range + return std::make_pair(static_cast<float>(scale), static_cast<int>(std::round(offset)-128)); + } + + int NumBits() const override { return 8; } + + DataType GetDataType() const override { return DataType::QAsymmS8; } +}; + struct QSymmS8QuantizationScheme : IQuantizationScheme { OffsetScalePair ComputeScheme(double min, double max) const override @@ -109,10 +145,6 @@ struct QSymm16QuantizationScheme : IQuantizationScheme double extent = std::max(std::abs(min), std::abs(max)); double scale = extent / highest; - if(scale == 0.000457777642) - { - return std::make_pair(static_cast<float>(scale), 0); - } return std::make_pair(static_cast<float>(scale), 0); } |