ArmNN
 21.11
FloatingPointConverterTest.cpp File Reference
#include <armnnUtils/FloatingPointConverter.hpp>
#include <BFloat16.hpp>
#include <Half.hpp>
#include <vector>
#include <doctest/doctest.h>

Go to the source code of this file.

Functions

 TEST_SUITE ("TestFPConversion")
 

Function Documentation

◆ TEST_SUITE()

TEST_SUITE ( "TestFPConversion"  )

Definition at line 15 of file FloatingPointConverterTest.cpp.

References FloatingPointConverter::ConvertBFloat16ToFloat32(), FloatingPointConverter::ConvertFloat16To32(), FloatingPointConverter::ConvertFloat32To16(), FloatingPointConverter::ConvertFloat32ToBFloat16(), and BFloat16::Val().

16 {
17 TEST_CASE("TestConvertFp32ToFp16")
18 {
19  using namespace half_float::literal;
20 
21  float floatArray[] = { 1.0f, 2.0f, 0.5f, 3.1f, 2.4f,
22  5.666f, 6.444f, 7.1f, 432.121f, 12.22f };
23  size_t numFloats = sizeof(floatArray) / sizeof(floatArray[0]);
24  std::vector<armnn::Half> convertedBuffer(numFloats, 0.0_h);
25 
26  armnnUtils::FloatingPointConverter::ConvertFloat32To16(floatArray, numFloats, convertedBuffer.data());
27 
28  for (size_t i = 0; i < numFloats; i++)
29  {
30  armnn::Half expected(floatArray[i]);
31  armnn::Half actual = convertedBuffer[i];
32  CHECK_EQ(expected, actual);
33 
34  float convertedHalf = actual;
35  CHECK_EQ(floatArray[i], doctest::Approx(convertedHalf).epsilon(0.07));
36  }
37 }
38 
39 TEST_CASE("TestConvertFp16ToFp32")
40 {
41  using namespace half_float::literal;
42 
43  armnn::Half halfArray[] = { 1.0_h, 2.0_h, 0.5_h, 3.1_h, 2.4_h,
44  5.666_h, 6.444_h, 7.1_h, 432.121_h, 12.22_h };
45  size_t numFloats = sizeof(halfArray) / sizeof(halfArray[0]);
46  std::vector<float> convertedBuffer(numFloats, 0.0f);
47 
48  armnnUtils::FloatingPointConverter::ConvertFloat16To32(halfArray, numFloats, convertedBuffer.data());
49 
50  for (size_t i = 0; i < numFloats; i++)
51  {
52  float expected(halfArray[i]);
53  float actual = convertedBuffer[i];
54  CHECK_EQ(expected, actual);
55  }
56 }
57 
58 TEST_CASE("TestConvertFloat32ToBFloat16")
59 {
60  float floatArray[] = { 1.704735E38f, // 0x7F004000 round down
61  0.0f, // 0x00000000 round down
62  2.2959E-41f, // 0x00004000 round down
63  1.7180272E38f, // 0x7F014000 round down
64  9.18355E-41f, // 0x00010000 round down
65  1.14794E-40f, // 0x00014000 round down
66  4.5918E-41f, // 0x00008000 round down
67  -1.708058E38f, // 0xFF008000 round down
68  -4.3033756E37f, // 0xFE018000 round up
69  1.60712E-40f, // 0x0001C000 round up
70  -2.0234377f, // 0xC0018001 round up
71  -1.1800863E-38f,// 0x80808001 round up
72  4.843037E-35f, // 0x0680C000 round up
73  3.9999998f, // 0x407FFFFF round up
74  std::numeric_limits<float>::max(), // 0x7F7FFFFF max positive value
75  std::numeric_limits<float>::lowest(), // 0xFF7FFFFF max negative value
76  1.1754942E-38f, // 0x007FFFFF min positive value
77  -1.1754942E-38f // 0x807FFFFF min negative value
78  };
79  uint16_t expectedResult[] = { 0x7F00,
80  0x0000,
81  0x0000,
82  0x7F01,
83  0x0001,
84  0x0001,
85  0x0000,
86  0xFF00,
87  0xFE02,
88  0x0002,
89  0xC002,
90  0x8081,
91  0x0681,
92  0x4080,
93  0x7F80,
94  0xFF80,
95  0x0080,
96  0x8080
97  };
98  size_t numFloats = sizeof(floatArray) / sizeof(floatArray[0]);
99 
100  std::vector<armnn::BFloat16> convertedBuffer(numFloats);
101 
102  armnnUtils::FloatingPointConverter::ConvertFloat32ToBFloat16(floatArray, numFloats, convertedBuffer.data());
103 
104  for (size_t i = 0; i < numFloats; i++)
105  {
106  armnn::BFloat16 actual = convertedBuffer[i];
107  CHECK_EQ(expectedResult[i], actual.Val());
108  }
109 }
110 
111 TEST_CASE("TestConvertBFloat16ToFloat32")
112 {
113  uint16_t bf16Array[] = { 16256, 16320, 38699, 16384, 49156, 32639 };
114  size_t numFloats = sizeof(bf16Array) / sizeof(bf16Array[0]);
115  float expectedResult[] = { 1.0f, 1.5f, -5.525308E-25f, 2.0f, -2.0625f, 3.3895314E38f };
116  std::vector<float> convertedBuffer(numFloats, 0.0f);
117 
118  armnnUtils::FloatingPointConverter::ConvertBFloat16ToFloat32(bf16Array, numFloats, convertedBuffer.data());
119 
120  for (size_t i = 0; i < numFloats; i++)
121  {
122  float actual = convertedBuffer[i];
123  CHECK_EQ(expectedResult[i], actual);
124  }
125 }
126 
127 }
static void ConvertBFloat16ToFloat32(const void *srcBFloat16Buffer, size_t numElements, float *dstFloat32Buffer)
static void ConvertFloat32To16(const float *srcFloat32Buffer, size_t numElements, void *dstFloat16Buffer)
Converts a buffer of FP32 values to FP16, and stores in the given dstFloat16Buffer.
uint16_t Val() const
Definition: BFloat16.hpp:95
static void ConvertFloat16To32(const void *srcFloat16Buffer, size_t numElements, float *dstFloat32Buffer)
static void ConvertFloat32ToBFloat16(const float *srcFloat32Buffer, size_t numElements, void *dstBFloat16Buffer)
half_float::half Half
Definition: Half.hpp:18