From e66448491b836049df62e63e1e5151eefe3bfcf8 Mon Sep 17 00:00:00 2001 From: Narumol Prangnawarat Date: Thu, 5 Mar 2020 17:27:45 +0000 Subject: IVGCVSW-4517 Add BFloat16 class and unit tests Signed-off-by: Narumol Prangnawarat Change-Id: Ie2e9e617b9210d79a26e7ba58ecc874d1202e599 --- src/armnn/test/UtilsTests.cpp | 67 ++++++++++++++++++++++++- src/armnnUtils/BFloat16.hpp | 110 +++++++++++++++++++++++++++++++++++++++++- 2 files changed, 175 insertions(+), 2 deletions(-) (limited to 'src') diff --git a/src/armnn/test/UtilsTests.cpp b/src/armnn/test/UtilsTests.cpp index faf4480029..27b7eded3e 100644 --- a/src/armnn/test/UtilsTests.cpp +++ b/src/armnn/test/UtilsTests.cpp @@ -87,10 +87,27 @@ BOOST_AUTO_TEST_CASE(HalfType) BOOST_AUTO_TEST_CASE(BFloatType) { - armnn::BFloat16 a = 16256; + uint16_t v = 16256; + armnn::BFloat16 a(v); + armnn::BFloat16 b(1.0f); + armnn::BFloat16 zero; // Test BFloat16 type BOOST_CHECK_EQUAL(sizeof(a), 2); + BOOST_CHECK_EQUAL(a, b); + BOOST_CHECK_EQUAL(a.val(), v); + BOOST_CHECK_EQUAL(a, 1.0f); + BOOST_CHECK_EQUAL(zero, 0.0f); + + // Infinity + float infFloat = std::numeric_limits::infinity(); + armnn::BFloat16 infBF(infFloat); + BOOST_CHECK_EQUAL(infBF, armnn::BFloat16::inf()); + + // NaN + float nan = std::numeric_limits::quiet_NaN(); + armnn::BFloat16 nanBF(nan); + BOOST_CHECK_EQUAL(nanBF, armnn::BFloat16::nan()); // Test utility function returns correct type. using ResolvedType = armnn::ResolveType; @@ -104,6 +121,54 @@ BOOST_AUTO_TEST_CASE(BFloatType) BOOST_CHECK((GetDataTypeName(armnn::DataType::BFloat16) == std::string("BFloat16"))); } +BOOST_AUTO_TEST_CASE(Float32ToBFloat16Test) +{ + // LSB = 0, R = 0 -> round down + armnn::BFloat16 roundDown0 = armnn::BFloat16::float32ToBFloat16(1.704735E38f); // 0x7F004000 + BOOST_CHECK_EQUAL(roundDown0.val(), 0x7F00); + // LSB = 1, R = 0 -> round down + armnn::BFloat16 roundDown1 = armnn::BFloat16::float32ToBFloat16(9.18355E-41f); // 0x00010000 + BOOST_CHECK_EQUAL(roundDown1.val(), 0x0001); + // LSB = 0, R = 1 all 0 -> round down + armnn::BFloat16 roundDown2 = armnn::BFloat16::float32ToBFloat16(1.14794E-40f); // 0x00014000 + BOOST_CHECK_EQUAL(roundDown2.val(), 0x0001); + // LSB = 1, R = 1 -> round up + armnn::BFloat16 roundUp = armnn::BFloat16::float32ToBFloat16(-2.0234377f); // 0xC0018001 + BOOST_CHECK_EQUAL(roundUp.val(), 0xC002); + // LSB = 0, R = 1 -> round up + armnn::BFloat16 roundUp1 = armnn::BFloat16::float32ToBFloat16(4.843037E-35f); // 0x0680C000 + BOOST_CHECK_EQUAL(roundUp1.val(), 0x0681); + // Max positive value -> infinity + armnn::BFloat16 maxPositive = armnn::BFloat16::float32ToBFloat16(3.4028235E38f); // 0x7F7FFFFF + BOOST_CHECK_EQUAL(maxPositive, armnn::BFloat16::inf()); + // Max negative value -> -infinity + armnn::BFloat16 maxNeg = armnn::BFloat16::float32ToBFloat16(-3.4028235E38f); // 0xFF7FFFFF + BOOST_CHECK_EQUAL(maxNeg.val(), 0xFF80); + // Min positive value + armnn::BFloat16 minPositive = armnn::BFloat16::float32ToBFloat16(1.1754942E-38f); // 0x007FFFFF + BOOST_CHECK_EQUAL(minPositive.val(), 0x0080); + // Min negative value + armnn::BFloat16 minNeg = armnn::BFloat16::float32ToBFloat16(-1.1754942E-38f); // 0x807FFFFF + BOOST_CHECK_EQUAL(minNeg.val(), 0x8080); +} + +BOOST_AUTO_TEST_CASE(BFloat16ToFloat32Test) +{ + armnn::BFloat16 bf0(1.5f); + BOOST_CHECK_EQUAL(bf0.toFloat32(), 1.5f); + armnn::BFloat16 bf1(-5.525308E-25f); + BOOST_CHECK_EQUAL(bf1.toFloat32(), -5.525308E-25f); + armnn::BFloat16 bf2(-2.0625f); + BOOST_CHECK_EQUAL(bf2.toFloat32(), -2.0625f); + uint16_t v = 32639; + armnn::BFloat16 bf3(v); + BOOST_CHECK_EQUAL(bf3.toFloat32(), 3.3895314E38f); + // Infinity + BOOST_CHECK_EQUAL(armnn::BFloat16::inf().toFloat32(), std::numeric_limits::infinity()); + // NaN + BOOST_CHECK(std::isnan(armnn::BFloat16::nan().toFloat32())); +} + BOOST_AUTO_TEST_CASE(GraphTopologicalSortSimpleTest) { std::map> graph; diff --git a/src/armnnUtils/BFloat16.hpp b/src/armnnUtils/BFloat16.hpp index bce45aa1ff..bb56b7d37c 100644 --- a/src/armnnUtils/BFloat16.hpp +++ b/src/armnnUtils/BFloat16.hpp @@ -5,9 +5,117 @@ #pragma once +#include +#include #include namespace armnn { - using BFloat16 = uint16_t; +class BFloat16 +{ +public: + BFloat16() + : value(0) + {} + + explicit BFloat16(uint16_t v) + : value(v) + {} + + explicit BFloat16(float v) + { + value = float32ToBFloat16(v).val(); + } + + BFloat16& operator=(float v) + { + value = float32ToBFloat16(v).val(); + return *this; + } + + bool operator==(const BFloat16& r) const + { + return value == r.val(); + } + + bool operator==(const float& r) const + { + return toFloat32() == r; + } + + static BFloat16 float32ToBFloat16(const float v) + { + if (std::isnan(v)) + { + return nan(); + } + else + { + // Round value to the nearest even + // Float32 + // S EEEEEEEE MMMMMMLRMMMMMMMMMMMMMMM + // BFloat16 + // S EEEEEEEE MMMMMML + // LSB (L): Least significat bit of BFloat16 (last bit of the Mantissa of BFloat16) + // R: Rounding bit + // LSB = 0, R = 0 -> round down + // LSB = 1, R = 0 -> round down + // LSB = 0, R = 1, all the rest = 0 -> round down + // LSB = 1, R = 1 -> round up + // LSB = 0, R = 1 -> round up + const uint32_t* u32 = reinterpret_cast(&v); + uint16_t u16 = static_cast(*u32 >> 16u); + // Mark the LSB + const uint16_t lsb = u16 & 0x0001; + // Mark the error to be truncate (the rest of 16 bits of FP32) + const uint16_t error = static_cast((*u32 & 0x0000FFFF)); + if ((error > 0x8000 || (error == 0x8000 && lsb == 1))) + { + u16++; + } + BFloat16 b(u16); + return b; + } + } + + float toFloat32() const + { + const uint32_t u32 = static_cast(value << 16u); + const float* f32 = reinterpret_cast(&u32); + return *f32; + } + + uint16_t val() const + { + return value; + } + + static BFloat16 max() + { + uint16_t max = 0x7F7F; + return BFloat16(max); + } + + static BFloat16 nan() + { + uint16_t nan = 0x7FC0; + return BFloat16(nan); + } + + static BFloat16 inf() + { + uint16_t infVal = 0x7F80; + return BFloat16(infVal); + } + +private: + uint16_t value; +}; + +inline std::ostream& operator<<(std::ostream& os, const BFloat16& b) +{ + os << b.toFloat32() << "(0x" << std::hex << b.val() << ")"; + return os; +} + } //namespace armnn -- cgit v1.2.1