7 #include <boost/assert.hpp> 8 #include <boost/numeric/conversion/cast.hpp> 15 template <typename T, typename std::enable_if<std::is_integral<T>::value, T>::type* =
nullptr>
19 return std::isnan(static_cast<double>(x));
22 template <typename T, typename std::enable_if<!std::is_integral<T>::value, T>::type * =
nullptr>
30 template<
typename QuantizedType>
33 static_assert(IsQuantizedType<QuantizedType>(),
"Not an integer type.");
34 constexpr QuantizedType max = std::numeric_limits<QuantizedType>::max();
35 constexpr QuantizedType min = std::numeric_limits<QuantizedType>::lowest();
36 BOOST_ASSERT(scale != 0.f);
37 BOOST_ASSERT(!std::isnan(value));
39 float clampedValue = std::min(std::max(static_cast<float>(round(value/scale) + offset), static_cast<float>(min)),
40 static_cast<float>(max));
41 auto quantizedBits =
static_cast<QuantizedType
>(clampedValue);
46 template <
typename QuantizedType>
49 static_assert(IsQuantizedType<QuantizedType>(),
"Not an integer type.");
50 BOOST_ASSERT(scale != 0.f);
51 BOOST_ASSERT(!IsNan(value));
58 int8_t armnn::Quantize<int8_t>(
float value,
float scale, int32_t offset);
62 uint8_t armnn::Quantize<uint8_t>(
float value,
float scale, int32_t offset);
66 int16_t armnn::Quantize<int16_t>(
float value,
float scale, int32_t offset);
70 int32_t armnn::Quantize<int32_t>(
float value,
float scale, int32_t offset);
74 float armnn::Dequantize<int8_t>(int8_t value,
float scale, int32_t offset);
78 float armnn::Dequantize<uint8_t>(uint8_t value,
float scale, int32_t offset);
82 float armnn::Dequantize<int16_t>(int16_t value,
float scale, int32_t offset);
86 float armnn::Dequantize<int32_t>(int32_t value,
float scale, int32_t offset);
float Dequantize(QuantizedType value, float scale, int32_t offset)
Dequantize an 8-bit data type into a floating point data type.
QuantizedType Quantize(float value, float scale, int32_t offset)
Quantize a floating point data type into an 8-bit data type.
std::enable_if_t< std::is_unsigned< Source >::value &&std::is_unsigned< Dest >::value, Dest > numeric_cast(Source source)