// // Copyright © 2017, 2024 Arm Ltd. All rights reserved. // SPDX-License-Identifier: MIT // #include #include #include namespace { /// Workaround for std:isnan() not being implemented correctly for integral types in MSVC. /// https://stackoverflow.com/a/56356405 /// @{ template ::value, T>::type* = nullptr> inline int IsNan(T x) { // The spec defines integral types to be handled as if they were casted to doubles. return std::isnan(static_cast(x)); } template ::value, T>::type * = nullptr> inline int IsNan(T x) { return std::isnan(x); } /// @} } // namespace std template QuantizedType armnn::Quantize(float value, float scale, int32_t offset) { static_assert(IsQuantizedType(), "Not an integer type."); constexpr QuantizedType max = std::numeric_limits::max(); constexpr QuantizedType min = std::numeric_limits::lowest(); if (std::isnan(value)) { throw armnn::InvalidArgumentException("Quantize: Value is NaN"); } float clampedValue = std::min(std::max((static_cast(offset) + static_cast(round(value/scale))), static_cast(min)), static_cast(max)); auto quantizedBits = static_cast(clampedValue); return quantizedBits; } template float armnn::Dequantize(QuantizedType value, float scale, int32_t offset) { static_assert(IsQuantizedType(), "Not an integer type."); if (std::isnan(value)) { throw armnn::InvalidArgumentException("Dequantize: Value is NaN"); } return (armnn::numeric_cast(value - offset)) * scale; } /// Explicit specialization of Quantize for int8_t template int8_t armnn::Quantize(float value, float scale, int32_t offset); /// Explicit specialization of Quantize for uint8_t template uint8_t armnn::Quantize(float value, float scale, int32_t offset); /// Explicit specialization of Quantize for int16_t template int16_t armnn::Quantize(float value, float scale, int32_t offset); /// Explicit specialization of Quantize for int32_t template int32_t armnn::Quantize(float value, float scale, int32_t offset); /// Explicit specialization of Dequantize for int8_t template float armnn::Dequantize(int8_t value, float scale, int32_t offset); /// Explicit specialization of Dequantize for uint8_t template float armnn::Dequantize(uint8_t value, float scale, int32_t offset); /// Explicit specialization of Dequantize for int16_t template float armnn::Dequantize(int16_t value, float scale, int32_t offset); /// Explicit specialization of Dequantize for int32_t template float armnn::Dequantize(int32_t value, float scale, int32_t offset); /// Explicit specialization of Dequantize for int64_t template float armnn::Dequantize(int64_t value, float scale, int32_t offset);