diff options
author | Nattapat Chaimanowong <nattapat.chaimanowong@arm.com> | 2019-03-20 11:51:14 +0000 |
---|---|---|
committer | nattapat.chaimanowong <nattapat.chaimanowong@arm.com> | 2019-03-20 14:49:03 +0000 |
commit | 7ac07f355f4cb75a54ec423670b7078bd0ecb44d (patch) | |
tree | 5f28c73decbfe0221c2ecedc204f48a7c00884f0 /src/armnn/NetworkQuantizationScheme.hpp | |
parent | 2a434a8a23d75fb62ac0cb3ecb83ba7aab89b8c6 (diff) | |
download | armnn-7ac07f355f4cb75a54ec423670b7078bd0ecb44d.tar.gz |
IVGCVSW-2858 Add support for QSymm16 quantization
Change-Id: Ia7c305c30c39ec0e9db447a461479be17fde250c
Signed-off-by: Nattapat Chaimanowong <nattapat.chaimanowong@arm.com>
Diffstat (limited to 'src/armnn/NetworkQuantizationScheme.hpp')
-rw-r--r-- | src/armnn/NetworkQuantizationScheme.hpp | 80 |
1 files changed, 80 insertions, 0 deletions
diff --git a/src/armnn/NetworkQuantizationScheme.hpp b/src/armnn/NetworkQuantizationScheme.hpp new file mode 100644 index 0000000000..065205dada --- /dev/null +++ b/src/armnn/NetworkQuantizationScheme.hpp @@ -0,0 +1,80 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include <armnn/Types.hpp> + +#include <cmath> +#include <algorithm> + +namespace armnn +{ + +using OffsetScalePair = std::pair<float, int>; + +struct IQuantizationScheme +{ + virtual OffsetScalePair ComputeScheme(double min, double max) const = 0; + + virtual int NumBits() const = 0; + + virtual DataType GetDataType() const = 0; + + virtual ~IQuantizationScheme() {} +}; + +struct QAsymm8QuantizationScheme : IQuantizationScheme +{ + OffsetScalePair ComputeScheme(double min, double max) const override + { + if (min >= max) + { + throw InvalidArgumentException("min >= max will result in invalid quantization."); + } + + double highest = (1 << NumBits()) - 1; + + min = std::min(0.0, min); // min <= 0.0 + max = std::max(0.0, max); // max >= 0.0 + + // Assumes quantization range [0-highest] + double scale = (max-min) / highest; + double offset = -min / scale; + + // Clamp offset [0-highest] + offset = std::max(0.0, std::min(highest, offset)); + + return std::make_pair(static_cast<float>(scale), static_cast<int>(std::round(offset))); + } + + int NumBits() const override { return 8; } + + DataType GetDataType() const override { return DataType::QuantisedAsymm8; } +}; + +struct QSymm16QuantizationScheme : IQuantizationScheme +{ + OffsetScalePair ComputeScheme(double min, double max) const override + { + if (min >= max) + { + throw InvalidArgumentException("min >= max will result in invalid quantization."); + } + + double highest = (1 << (NumBits()-1)) - 1; // (numbits-1) accounts for the sign bit + + double extent = std::max(std::abs(min), std::abs(max)); + double scale = extent / highest; + + return std::make_pair(static_cast<float>(scale), 0); + } + + int NumBits() const override { return 16; } + + DataType GetDataType() const override { return DataType::QuantisedSymm16; } +}; + +} // namespace armnn |