aboutsummaryrefslogtreecommitdiff
path: root/src/armnn
diff options
context:
space:
mode:
authorNarumol Prangnawarat <narumol.prangnawarat@arm.com>2020-03-06 14:45:57 +0000
committerJan Eilers <jan.eilers@arm.com>2020-03-09 16:13:56 +0000
commit8832522f47b701f5f042069e7bf8deae9b75d449 (patch)
treef217ab7fbda860a947eba88c9508eb1ac1b1d670 /src/armnn
parent97da5e2e6c8aaaf4249af60e8305431315226f15 (diff)
downloadarmnn-8832522f47b701f5f042069e7bf8deae9b75d449.tar.gz
IVGCVSW-4517 Implement BFloat16 Encoder and Decoder
* Add ConvertFloat32ToBFloat16 * Add ConvertBFloat16ToFloat32 * Add BFloat16Encoder * Add BFloat16Decoder * Unit tests Signed-off-by: Narumol Prangnawarat <narumol.prangnawarat@arm.com> Change-Id: I198888384c923aba28cfbed09a02edc6f8194b3e
Diffstat (limited to 'src/armnn')
-rw-r--r--src/armnn/test/FloatingPointConverterTest.cpp70
1 files changed, 70 insertions, 0 deletions
diff --git a/src/armnn/test/FloatingPointConverterTest.cpp b/src/armnn/test/FloatingPointConverterTest.cpp
index 4497ca70a8..4a9e216e70 100644
--- a/src/armnn/test/FloatingPointConverterTest.cpp
+++ b/src/armnn/test/FloatingPointConverterTest.cpp
@@ -5,6 +5,7 @@
#include <armnnUtils/FloatingPointConverter.hpp>
+#include <BFloat16.hpp>
#include <Half.hpp>
#include <boost/test/unit_test.hpp>
@@ -52,4 +53,73 @@ BOOST_AUTO_TEST_CASE(TestConvertFp16ToFp32)
}
}
+BOOST_AUTO_TEST_CASE(TestConvertFloat32ToBFloat16)
+{
+ float floatArray[] = { 1.704735E38f, // 0x7F004000 round down
+ 0.0f, // 0x00000000 round down
+ 2.2959E-41f, // 0x00004000 round down
+ 1.7180272E38f, // 0x7F014000 round down
+ 9.18355E-41f, // 0x00010000 round down
+ 1.14794E-40f, // 0x00014000 round down
+ 4.5918E-41f, // 0x00008000 round down
+ -1.708058E38f, // 0xFF008000 round down
+ -4.3033756E37f, // 0xFE018000 round up
+ 1.60712E-40f, // 0x0001C000 round up
+ -2.0234377f, // 0xC0018001 round up
+ -1.1800863E-38f,// 0x80808001 round up
+ 4.843037E-35f, // 0x0680C000 round up
+ 3.9999998f, // 0x407FFFFF round up
+ 3.4028235E38f, // 0x7F7FFFFF max positive value
+ -3.4028235E38f, // 0xFF7FFFFF max negative value
+ 1.1754942E-38f, // 0x007FFFFF min positive value
+ -1.1754942E-38f // 0x807FFFFF min negative value
+ };
+ uint16_t expectedResult[] = { 0x7F00,
+ 0x0000,
+ 0x0000,
+ 0x7F01,
+ 0x0001,
+ 0x0001,
+ 0x0000,
+ 0xFF00,
+ 0xFE02,
+ 0x0002,
+ 0xC002,
+ 0x8081,
+ 0x0681,
+ 0x4080,
+ 0x7F80,
+ 0xFF80,
+ 0x0080,
+ 0x8080
+ };
+ size_t numFloats = sizeof(floatArray) / sizeof(floatArray[0]);
+
+ std::vector<armnn::BFloat16> convertedBuffer(numFloats);
+
+ armnnUtils::FloatingPointConverter::ConvertFloat32ToBFloat16(floatArray, numFloats, convertedBuffer.data());
+
+ for (size_t i = 0; i < numFloats; i++)
+ {
+ armnn::BFloat16 actual = convertedBuffer[i];
+ BOOST_CHECK_EQUAL(expectedResult[i], actual.val());
+ }
+}
+
+BOOST_AUTO_TEST_CASE(TestConvertBFloat16ToFloat32)
+{
+ uint16_t bf16Array[] = { 16256, 16320, 38699, 16384, 49156, 32639 };
+ size_t numFloats = sizeof(bf16Array) / sizeof(bf16Array[0]);
+ float expectedResult[] = { 1.0f, 1.5f, -5.525308E-25f, 2.0f, -2.0625f, 3.3895314E38f };
+ std::vector<float> convertedBuffer(numFloats, 0.0f);
+
+ armnnUtils::FloatingPointConverter::ConvertBFloat16ToFloat32(bf16Array, numFloats, convertedBuffer.data());
+
+ for (size_t i = 0; i < numFloats; i++)
+ {
+ float actual = convertedBuffer[i];
+ BOOST_CHECK_EQUAL(expectedResult[i], actual);
+ }
+}
+
BOOST_AUTO_TEST_SUITE_END()