diff options
author | Finn Williams <Finn.Williams@arm.com> | 2019-12-04 14:27:27 +0000 |
---|---|---|
committer | Jim Flynn Arm <jim.flynn@arm.com> | 2019-12-09 15:39:16 +0000 |
commit | fd2710651ada27fc82f28c07fb1e09effc3bda2d (patch) | |
tree | 7c2200489c7a3f845b91362c2c8d66ab9c6101e8 /src/armnn/NetworkQuantizationScheme.hpp | |
parent | 6a5e5e8b7e56f927d70ced3203d6e16df3fdd189 (diff) | |
download | armnn-fd2710651ada27fc82f28c07fb1e09effc3bda2d.tar.gz |
IVGCVSW-4211 Add Signed 8 bit Quantisation support into the Reference backend
!android-nn-driver:2435
Signed-off-by: Finn Williams <Finn.Williams@arm.com>
Change-Id: I10ecd4a8937725953396805f33a3562a5384c4d4
Diffstat (limited to 'src/armnn/NetworkQuantizationScheme.hpp')
-rw-r--r-- | src/armnn/NetworkQuantizationScheme.hpp | 33 |
1 files changed, 33 insertions, 0 deletions
diff --git a/src/armnn/NetworkQuantizationScheme.hpp b/src/armnn/NetworkQuantizationScheme.hpp index 0effa1fd64..ea3c29102b 100644 --- a/src/armnn/NetworkQuantizationScheme.hpp +++ b/src/armnn/NetworkQuantizationScheme.hpp @@ -61,6 +61,34 @@ struct QAsymm8QuantizationScheme : IQuantizationScheme DataType GetDataType() const override { return DataType::QuantisedAsymm8; } }; +struct QSymmS8QuantizationScheme : IQuantizationScheme +{ + OffsetScalePair ComputeScheme(double min, double max) const override + { + if (min > max) + { + throw InvalidArgumentException("min > max will result in invalid quantization."); + } + + // To avoid dividing by zero when quantizing a zero filled tensor + if (min == 0.0 && max == 0.0) + { + max = 1.0; + } + + double highest = (1 << (NumBits()-1)) - 1; // (numbits-1) accounts for the sign bit + + double extent = std::max(std::abs(min), std::abs(max)); + double scale = extent / highest; + + return std::make_pair(static_cast<float>(scale), 0); + } + + int NumBits() const override { return 8; } + + DataType GetDataType() const override { return DataType::QSymmS8; } +}; + struct QSymm16QuantizationScheme : IQuantizationScheme { OffsetScalePair ComputeScheme(double min, double max) const override @@ -81,7 +109,12 @@ struct QSymm16QuantizationScheme : IQuantizationScheme double extent = std::max(std::abs(min), std::abs(max)); double scale = extent / highest; + if(scale == 0.000457777642) + { + return std::make_pair(static_cast<float>(scale), 0); + } return std::make_pair(static_cast<float>(scale), 0); + } int NumBits() const override { return 16; } |