// // Copyright © 2017 Arm Ltd. All rights reserved. // SPDX-License-Identifier: MIT // #include #include #include namespace { /// Workaround for std:isnan() not being implemented correctly for integral types in MSVC. /// https://stackoverflow.com/a/56356405 /// @{ template ::value, T>::type* = nullptr> inline int IsNan(T x) { // The spec defines integral types to be handled as if they were casted to doubles. return std::isnan(static_cast(x)); } template ::value, T>::type * = nullptr> inline int IsNan(T x) { return std::isnan(x); } /// @} } // namespace std template QuantizedType armnn::Quantize(float value, float scale, int32_t offset) { static_assert(IsQuantizedType(), "Not an integer type."); constexpr QuantizedType max = std::numeric_limits::max(); constexpr QuantizedType min = std::numeric_limits::lowest(); ARMNN_ASSERT(scale != 0.f); ARMNN_ASSERT(!std::isnan(value)); float clampedValue = std::min(std::max(static_cast(round(value/scale) + offset), static_cast(min)), static_cast(max)); auto quantizedBits = static_cast(clampedValue); return quantizedBits; } template float armnn::Dequantize(QuantizedType value, float scale, int32_t offset) { static_assert(IsQuantizedType(), "Not an integer type."); ARMNN_ASSERT(scale != 0.f); ARMNN_ASSERT(!IsNan(value)); return (armnn::numeric_cast(value - offset)) * scale; } /// Explicit specialization of Quantize for int8_t template int8_t armnn::Quantize(float value, float scale, int32_t offset); /// Explicit specialization of Quantize for uint8_t template uint8_t armnn::Quantize(float value, float scale, int32_t offset); /// Explicit specialization of Quantize for int16_t template int16_t armnn::Quantize(float value, float scale, int32_t offset); /// Explicit specialization of Quantize for int32_t template int32_t armnn::Quantize(float value, float scale, int32_t offset); /// Explicit specialization of Dequantize for int8_t template float armnn::Dequantize(int8_t value, float scale, int32_t offset); /// Explicit specialization of Dequantize for uint8_t template float armnn::Dequantize(uint8_t value, float scale, int32_t offset); /// Explicit specialization of Dequantize for int16_t template float armnn::Dequantize(int16_t value, float scale, int32_t offset); /// Explicit specialization of Dequantize for int32_t template float armnn::Dequantize(int32_t value, float scale, int32_t offset);