ArmNN
 20.08
BFloat16 Class Reference

#include <BFloat16.hpp>

Public Member Functions

 BFloat16 ()
 
 BFloat16 (const BFloat16 &v)=default
 
 BFloat16 (uint16_t v)
 
 BFloat16 (float v)
 
 operator float () const
 
BFloat16operator= (const BFloat16 &other)=default
 
BFloat16operator= (float v)
 
bool operator== (const BFloat16 &r) const
 
float ToFloat32 () const
 
uint16_t Val () const
 

Static Public Member Functions

static BFloat16 Float32ToBFloat16 (const float v)
 
static BFloat16 Max ()
 
static BFloat16 Nan ()
 
static BFloat16 Inf ()
 

Detailed Description

Definition at line 14 of file BFloat16.hpp.

Constructor & Destructor Documentation

◆ BFloat16() [1/4]

BFloat16 ( )
inline

Definition at line 17 of file BFloat16.hpp.

Referenced by BFloat16::Inf(), BFloat16::Max(), and BFloat16::Nan().

18  : m_Value(0)
19  {}

◆ BFloat16() [2/4]

BFloat16 ( const BFloat16 v)
default

◆ BFloat16() [3/4]

BFloat16 ( uint16_t  v)
inlineexplicit

Definition at line 23 of file BFloat16.hpp.

24  : m_Value(v)
25  {}

◆ BFloat16() [4/4]

BFloat16 ( float  v)
inlineexplicit

Definition at line 27 of file BFloat16.hpp.

References BFloat16::Float32ToBFloat16(), and BFloat16::Val().

28  {
29  m_Value = Float32ToBFloat16(v).Val();
30  }
uint16_t Val() const
Definition: BFloat16.hpp:92
static BFloat16 Float32ToBFloat16(const float v)
Definition: BFloat16.hpp:50

Member Function Documentation

◆ Float32ToBFloat16()

static BFloat16 Float32ToBFloat16 ( const float  v)
inlinestatic

Definition at line 50 of file BFloat16.hpp.

References armnn::error, and BFloat16::Nan().

Referenced by BFloat16::BFloat16(), BOOST_AUTO_TEST_CASE(), and BFloat16::operator=().

51  {
52  if (std::isnan(v))
53  {
54  return Nan();
55  }
56  else
57  {
58  // Round value to the nearest even
59  // Float32
60  // S EEEEEEEE MMMMMMLRMMMMMMMMMMMMMMM
61  // BFloat16
62  // S EEEEEEEE MMMMMML
63  // LSB (L): Least significat bit of BFloat16 (last bit of the Mantissa of BFloat16)
64  // R: Rounding bit
65  // LSB = 0, R = 0 -> round down
66  // LSB = 1, R = 0 -> round down
67  // LSB = 0, R = 1, all the rest = 0 -> round down
68  // LSB = 1, R = 1 -> round up
69  // LSB = 0, R = 1 -> round up
70  const uint32_t* u32 = reinterpret_cast<const uint32_t*>(&v);
71  uint16_t u16 = static_cast<uint16_t>(*u32 >> 16u);
72  // Mark the LSB
73  const uint16_t lsb = u16 & 0x0001;
74  // Mark the error to be truncate (the rest of 16 bits of FP32)
75  const uint16_t error = static_cast<uint16_t>((*u32 & 0x0000FFFF));
76  if ((error > 0x8000 || (error == 0x8000 && lsb == 1)))
77  {
78  u16++;
79  }
80  BFloat16 b(u16);
81  return b;
82  }
83  }
static BFloat16 Nan()
Definition: BFloat16.hpp:103

◆ Inf()

static BFloat16 Inf ( )
inlinestatic

Definition at line 109 of file BFloat16.hpp.

References BFloat16::BFloat16().

Referenced by BOOST_AUTO_TEST_CASE().

110  {
111  uint16_t infVal = 0x7F80;
112  return BFloat16(infVal);
113  }

◆ Max()

static BFloat16 Max ( )
inlinestatic

Definition at line 97 of file BFloat16.hpp.

References BFloat16::BFloat16().

98  {
99  uint16_t max = 0x7F7F;
100  return BFloat16(max);
101  }

◆ Nan()

static BFloat16 Nan ( )
inlinestatic

Definition at line 103 of file BFloat16.hpp.

References BFloat16::BFloat16().

Referenced by BOOST_AUTO_TEST_CASE(), and BFloat16::Float32ToBFloat16().

104  {
105  uint16_t nan = 0x7FC0;
106  return BFloat16(nan);
107  }

◆ operator float()

operator float ( ) const
inline

Definition at line 32 of file BFloat16.hpp.

References BFloat16::operator=(), and BFloat16::ToFloat32().

33  {
34  return ToFloat32();
35  }
float ToFloat32() const
Definition: BFloat16.hpp:85

◆ operator=() [1/2]

BFloat16& operator= ( const BFloat16 other)
default

◆ operator=() [2/2]

BFloat16& operator= ( float  v)
inline

Definition at line 39 of file BFloat16.hpp.

References BFloat16::Float32ToBFloat16(), and BFloat16::Val().

40  {
41  m_Value = Float32ToBFloat16(v).Val();
42  return *this;
43  }
uint16_t Val() const
Definition: BFloat16.hpp:92
static BFloat16 Float32ToBFloat16(const float v)
Definition: BFloat16.hpp:50

◆ operator==()

bool operator== ( const BFloat16 r) const
inline

Definition at line 45 of file BFloat16.hpp.

References BFloat16::Val().

46  {
47  return m_Value == r.Val();
48  }

◆ ToFloat32()

float ToFloat32 ( ) const
inline

Definition at line 85 of file BFloat16.hpp.

Referenced by BOOST_AUTO_TEST_CASE(), FloatingPointConverter::ConvertBFloat16ToFloat32(), BFloat16::operator float(), and armnn::operator<<().

86  {
87  const uint32_t u32 = static_cast<uint32_t>(m_Value << 16u);
88  const float* f32 = reinterpret_cast<const float*>(&u32);
89  return *f32;
90  }

◆ Val()

uint16_t Val ( ) const
inline

Definition at line 92 of file BFloat16.hpp.

Referenced by BFloat16::BFloat16(), BOOST_AUTO_TEST_CASE(), armnn::operator<<(), BFloat16::operator=(), and BFloat16::operator==().

93  {
94  return m_Value;
95  }

The documentation for this class was generated from the following file: