14 template <typename T, typename std::enable_if<std::is_integral<T>::value, T>::type* =
nullptr>
18 return std::isnan(static_cast<double>(x));
21 template <typename T, typename std::enable_if<!std::is_integral<T>::value, T>::type * =
nullptr>
29 template<
typename QuantizedType>
32 static_assert(IsQuantizedType<QuantizedType>(),
"Not an integer type.");
33 constexpr QuantizedType max = std::numeric_limits<QuantizedType>::max();
34 constexpr QuantizedType min = std::numeric_limits<QuantizedType>::lowest();
38 float clampedValue = std::min(std::max(static_cast<float>(round(value/scale) + offset), static_cast<float>(min)),
39 static_cast<float>(max));
40 auto quantizedBits =
static_cast<QuantizedType
>(clampedValue);
45 template <
typename QuantizedType>
48 static_assert(IsQuantizedType<QuantizedType>(),
"Not an integer type.");
51 return (armnn::numeric_cast<float>(value - offset)) * scale;
56 int8_t armnn::Quantize<int8_t>(
float value,
float scale, int32_t offset);
60 uint8_t armnn::Quantize<uint8_t>(
float value,
float scale, int32_t offset);
64 int16_t armnn::Quantize<int16_t>(
float value,
float scale, int32_t offset);
68 int32_t armnn::Quantize<int32_t>(
float value,
float scale, int32_t offset);
72 float armnn::Dequantize<int8_t>(int8_t value,
float scale, int32_t offset);
76 float armnn::Dequantize<uint8_t>(uint8_t value,
float scale, int32_t offset);
80 float armnn::Dequantize<int16_t>(int16_t value,
float scale, int32_t offset);
84 float armnn::Dequantize<int32_t>(int32_t value,
float scale, int32_t offset);
float Dequantize(QuantizedType value, float scale, int32_t offset)
Dequantize an 8-bit data type into a floating point data type.
#define ARMNN_ASSERT(COND)
QuantizedType Quantize(float value, float scale, int32_t offset)
Quantize a floating point data type into an 8-bit data type.