diff options
Diffstat (limited to 'arm_compute/core/QuantizationInfo.h')
-rw-r--r-- | arm_compute/core/QuantizationInfo.h | 145 |
1 files changed, 108 insertions, 37 deletions
diff --git a/arm_compute/core/QuantizationInfo.h b/arm_compute/core/QuantizationInfo.h index 52ef149e9b..aecba3712e 100644 --- a/arm_compute/core/QuantizationInfo.h +++ b/arm_compute/core/QuantizationInfo.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2020 ARM Limited. + * Copyright (c) 2019-2024 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -21,15 +21,14 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#ifndef ARM_COMPUTE_QUANTIZATION_INFO_H -#define ARM_COMPUTE_QUANTIZATION_INFO_H +#ifndef ACL_ARM_COMPUTE_CORE_QUANTIZATIONINFO_H +#define ACL_ARM_COMPUTE_CORE_QUANTIZATIONINFO_H #include "arm_compute/core/Rounding.h" -#include "utils/misc/Utility.h" -#include "arm_compute/core/Error.h" +#include "arm_compute/core/utils/misc/Utility.h" + +#include "support/ToolchainSupport.h" -#include <cstddef> -#include <type_traits> #include <vector> namespace arm_compute @@ -43,8 +42,7 @@ using qasymm16_t = uint16_t; /**< 16 bit quantized asymmetric scalar value struct UniformQuantizationInfo { /** Default constructor */ - UniformQuantizationInfo() - : scale(0.f), offset(0) + UniformQuantizationInfo() : scale(0.f), offset(0) { } /** Constructor @@ -52,8 +50,7 @@ struct UniformQuantizationInfo * @param[in] scale Quantization scale * @param[in] offset Quantization offset */ - UniformQuantizationInfo(float scale, int32_t offset) - : scale(scale), offset(offset) + UniformQuantizationInfo(float scale, int32_t offset) : scale(scale), offset(offset) { } /** Checks if the scale and offset are both zero */ @@ -71,9 +68,7 @@ class QuantizationInfo { public: /** Default constructor */ - QuantizationInfo() noexcept - : _scale(), - _offset() + QuantizationInfo() noexcept : _scale(), _offset() { } /** Construct quantization info. @@ -82,19 +77,19 @@ public: * * @param[in] scale Scale. */ - QuantizationInfo(float scale) - : _scale(1, scale), _offset() + QuantizationInfo(float scale) : _scale(1, scale), _offset() { } /** Construct quantization info. * * @note Used for asymmetric quantization * - * @param[in] scale Scale. - * @param[in] offset Offset. + * @param[in] scale Scale. + * @param[in] offset Offset. + * @param[in] is_dynamic Whether this QuantizationInfo is dynamic, i.e. the scale and offset may change. */ - QuantizationInfo(float scale, int offset) - : _scale(1, scale), _offset(1, offset) + QuantizationInfo(float scale, int offset, bool is_dynamic = false) + : _scale(1, scale), _offset(1, offset), _is_dynamic(is_dynamic) { } /** Construct quantization info. @@ -103,19 +98,19 @@ public: * * @param[in] scale Scale. */ - QuantizationInfo(std::vector<float> scale) - : _scale(scale), _offset() + QuantizationInfo(std::vector<float> scale) : _scale(scale), _offset() { } /** Construct quantization info. * * @note Used for asymmetric per channel quantization * - * @param[in] scale Scale. - * @param[in] offset Offset. + * @param[in] scale Scale. + * @param[in] offset Offset. + * @param[in] is_dynamic Whether this QuantizationInfo is dynamic, i.e. the scale and offset may change. */ - QuantizationInfo(std::vector<float> scale, std::vector<int32_t> offset) - : _scale(scale), _offset(offset) + QuantizationInfo(std::vector<float> scale, std::vector<int32_t> offset, bool is_dynamic = false) + : _scale(scale), _offset(offset), _is_dynamic(is_dynamic) { } /** Scale vector accessor @@ -134,6 +129,14 @@ public: { return _offset; } + /** is_dynamic accessor + * + * @return If true, the scale and offset may change, so operators will need to read on every run + */ + bool is_dynamic() const + { + return _is_dynamic; + } /** Indicates whether this QuantizationInfo has valid settings or not * * @return True if the this has invalid settings. @@ -158,6 +161,8 @@ public: private: std::vector<float> _scale; /**< Vector containing scaling factors */ std::vector<int32_t> _offset; /**< Vector containing zero offsets */ + bool _is_dynamic = + false; /**< If true, the scale and offset may change, so operators will need to read on every run */ }; /** Check whether two quantization info are equal. @@ -210,20 +215,39 @@ inline bool operator!=(const UniformQuantizationInfo &lhs, const UniformQuantiza template <typename QUANTIZED_TYPE = uint8_t> struct Qasymm8QuantizationHelper { - static_assert(std::is_same<QUANTIZED_TYPE, uint8_t>::value - || std::is_same<QUANTIZED_TYPE, int8_t>::value, + static_assert(std::is_same<QUANTIZED_TYPE, uint8_t>::value || std::is_same<QUANTIZED_TYPE, int8_t>::value, "quantized type should be either uint8_t or int8_t."); /** Quantize a value given a 8-bit asymmetric quantization scheme * + * @param[in] value Value to quantize + * @param[in] qinfo Quantization information to use for quantizing + * + * @return Quantized value + */ + static inline QUANTIZED_TYPE quantize(float value, const UniformQuantizationInfo &qinfo) + { + ARM_COMPUTE_ERROR_ON(qinfo.scale == 0); + const int quantized = support::cpp11::lround(value / qinfo.scale) + qinfo.offset; + return static_cast<QUANTIZED_TYPE>(arm_compute::utility::clamp<decltype(quantized), QUANTIZED_TYPE>(quantized)); + } + + /** Quantize a value given a 8-bit asymmetric quantization scheme using a specific rounding policy + * * @param[in] value Value to quantize * @param[in] qinfo Quantization information to use for quantizing - * @param[in] rounding_policy (Optional) Rounding policy to use. Default: nearest up + * @param[in] rounding_policy Rounding policy to use * * @return Quantized value */ - static inline QUANTIZED_TYPE quantize(float value, const UniformQuantizationInfo &qinfo, RoundingPolicy rounding_policy = RoundingPolicy::TO_NEAREST_UP) + static inline QUANTIZED_TYPE + quantize(float value, const UniformQuantizationInfo &qinfo, RoundingPolicy rounding_policy) { + if (rounding_policy == RoundingPolicy::TO_NEAREST_UP) + { + return quantize(value, qinfo); + } + ARM_COMPUTE_ERROR_ON(qinfo.scale == 0); const int quantized = arm_compute::round(value / qinfo.scale, rounding_policy) + qinfo.offset; return static_cast<QUANTIZED_TYPE>(arm_compute::utility::clamp<decltype(quantized), QUANTIZED_TYPE>(quantized)); @@ -237,7 +261,8 @@ struct Qasymm8QuantizationHelper * * @return Quantized value */ - static inline QUANTIZED_TYPE quantize(float value, const QuantizationInfo &qinfo, RoundingPolicy rounding_policy = RoundingPolicy::TO_NEAREST_UP) + static inline QUANTIZED_TYPE + quantize(float value, const QuantizationInfo &qinfo, RoundingPolicy rounding_policy = RoundingPolicy::TO_NEAREST_UP) { const UniformQuantizationInfo uqinfo = qinfo.uniform(); ARM_COMPUTE_ERROR_ON(uqinfo.scale == 0); @@ -280,7 +305,8 @@ struct Qasymm8QuantizationHelper * @return Quantized value */ template <typename INFO_TYPE> -inline uint8_t quantize_qasymm8(float value, const INFO_TYPE &qinfo, RoundingPolicy rounding_policy = RoundingPolicy::TO_NEAREST_UP) +inline uint8_t +quantize_qasymm8(float value, const INFO_TYPE &qinfo, RoundingPolicy rounding_policy = RoundingPolicy::TO_NEAREST_UP) { return Qasymm8QuantizationHelper<uint8_t>::quantize(value, qinfo, rounding_policy); } @@ -294,7 +320,9 @@ inline uint8_t quantize_qasymm8(float value, const INFO_TYPE &qinfo, RoundingPol * @return Quantized value */ template <typename INFO_TYPE> -inline int8_t quantize_qasymm8_signed(float value, const INFO_TYPE &qinfo, RoundingPolicy rounding_policy = RoundingPolicy::TO_NEAREST_UP) +inline int8_t quantize_qasymm8_signed(float value, + const INFO_TYPE &qinfo, + RoundingPolicy rounding_policy = RoundingPolicy::TO_NEAREST_UP) { return Qasymm8QuantizationHelper<int8_t>::quantize(value, qinfo, rounding_policy); } @@ -416,6 +444,19 @@ inline float dequantize(uint16_t value, float scale, int32_t offset) return (static_cast<int>(value) - offset) * scale; } +/** Dequantize a value given a 32-bit asymmetric quantization scheme + * + * @param[in] value Value to dequantize + * @param[in] scale Scale to use for dequantization + * @param[in] offset Zero-offset to use for dequantization + * + * @return Dequantized value + */ +inline float dequantize(int32_t value, float scale, int32_t offset) +{ + return (static_cast<int>(value) - offset) * scale; +} + /** Quantize a value given a 16-bit symmetric quantization scheme * * @param[in] value Value to quantize @@ -424,7 +465,9 @@ inline float dequantize(uint16_t value, float scale, int32_t offset) * * @return Quantized value */ -inline int16_t quantize_qsymm16(float value, const UniformQuantizationInfo &qinfo, RoundingPolicy rounding_policy = RoundingPolicy::TO_NEAREST_UP) +inline int16_t quantize_qsymm16(float value, + const UniformQuantizationInfo &qinfo, + RoundingPolicy rounding_policy = RoundingPolicy::TO_NEAREST_UP) { int quantized = arm_compute::round(value / qinfo.scale, rounding_policy); quantized = arm_compute::utility::clamp<int, int16_t>(quantized); @@ -475,7 +518,9 @@ inline float dequantize_qsymm16(int16_t value, const QuantizationInfo &qinfo) * * @return Quantized value */ -inline uint16_t quantize_qasymm16(float value, const UniformQuantizationInfo &qinfo, RoundingPolicy rounding_policy = RoundingPolicy::TO_NEAREST_UP) +inline uint16_t quantize_qasymm16(float value, + const UniformQuantizationInfo &qinfo, + RoundingPolicy rounding_policy = RoundingPolicy::TO_NEAREST_UP) { int quantized = arm_compute::round(value / qinfo.scale, rounding_policy) + qinfo.offset; quantized = arm_compute::utility::clamp<int, uint16_t>(quantized); @@ -518,6 +563,31 @@ inline float dequantize_qasymm16(uint16_t value, const QuantizationInfo &qinfo) return dequantize_qasymm16(value, qinfo.uniform()); } +/** Dequantize a value given a 32-bit asymmetric quantization scheme + * + * @param[in] value Value to dequantize + * @param[in] qinfo Quantization information to use for dequantizing + * + * @return Dequantized value + */ +inline float dequantize_s32(int32_t value, const UniformQuantizationInfo &qinfo) +{ + return (static_cast<int>(value) - qinfo.offset) * qinfo.scale; +} + +/** Dequantize a value given a 32-bit asymmetric quantization scheme + * + * @param[in] value Value to dequantize + * @param[in] qinfo Quantization information to use for dequantizing + * + * @return Dequantized value + */ + +inline float dequantize_s32(int32_t value, const QuantizationInfo &qinfo) +{ + return dequantize_s32(value, qinfo.uniform()); +} + /* * In case of requantization of a quantized input tensor to an output tensor with another quantization * instead of applying dequantization and then a quantization functions, we just compute new scale and @@ -548,7 +618,8 @@ inline float dequantize_qasymm16(uint16_t value, const QuantizationInfo &qinfo) * z_n = - z_i * s_i / s_o + z_o * */ -inline UniformQuantizationInfo compute_requantization_scale_offset(const UniformQuantizationInfo &uqinfo_in, const UniformQuantizationInfo &uqinfo_out) +inline UniformQuantizationInfo compute_requantization_scale_offset(const UniformQuantizationInfo &uqinfo_in, + const UniformQuantizationInfo &uqinfo_out) { float scale_to_apply = uqinfo_out.scale; int32_t offset_to_apply = uqinfo_out.offset; @@ -562,4 +633,4 @@ inline UniformQuantizationInfo compute_requantization_scale_offset(const Uniform } } // namespace arm_compute -#endif /* ARM_COMPUTE_QUANTIZATION_INFO_H */ +#endif // ACL_ARM_COMPUTE_CORE_QUANTIZATIONINFO_H |