diff options
author | Ryan OShea <ryan.oshea3@arm.com> | 2022-11-07 16:20:48 +0000 |
---|---|---|
committer | ryan.oshea3 <ryan.oshea3@arm.com> | 2022-11-16 15:22:50 +0000 |
commit | 31441595009182c985dacbedc70c41ee6664d070 (patch) | |
tree | 248a85295aeff4022c9b395fc97748b0a0aa6b35 /src/armnn/test/FloatingPointConverterTest.cpp | |
parent | bd18eab07a8f30492de1e462b1815189014cb8d5 (diff) | |
download | armnn-31441595009182c985dacbedc70c41ee6664d070.tar.gz |
IVGCVSW-7214 Disable BF16-Turbo-Mode and remove conversion layers
- Remove Bf16ToFp32 Conversion Layer
- Remove Fp32ToBf16 Conversion Layer
- Remove B16 Conversion tests
* Throw exception if m_ReduceFp32ToBf16 optimzer option is set to true
* Provide comments to enable fast math in order to use bf16
* Update docs to inform users to enable fast math for bf16
Execute Network Changes
* Require bf16_turbo_mode to also have fast_math_enabled set to true
- Remove setting m_ReduceFp32ToBf16 optimizer option
Signed-off-by: Ryan OShea <ryan.oshea3@arm.com>
Change-Id: Ibaa6da9d29c96a1ce32ff5196b0847fde9f04a1c
Diffstat (limited to 'src/armnn/test/FloatingPointConverterTest.cpp')
-rw-r--r-- | src/armnn/test/FloatingPointConverterTest.cpp | 70 |
1 files changed, 0 insertions, 70 deletions
diff --git a/src/armnn/test/FloatingPointConverterTest.cpp b/src/armnn/test/FloatingPointConverterTest.cpp index 21a16a3cc0..81384cefae 100644 --- a/src/armnn/test/FloatingPointConverterTest.cpp +++ b/src/armnn/test/FloatingPointConverterTest.cpp @@ -5,7 +5,6 @@ #include <armnnUtils/FloatingPointConverter.hpp> -#include <BFloat16.hpp> #include <Half.hpp> #include <vector> @@ -55,73 +54,4 @@ TEST_CASE("TestConvertFp16ToFp32") } } -TEST_CASE("TestConvertFloat32ToBFloat16") -{ - float floatArray[] = { 1.704735E38f, // 0x7F004000 round down - 0.0f, // 0x00000000 round down - 2.2959E-41f, // 0x00004000 round down - 1.7180272E38f, // 0x7F014000 round down - 9.18355E-41f, // 0x00010000 round down - 1.14794E-40f, // 0x00014000 round down - 4.5918E-41f, // 0x00008000 round down - -1.708058E38f, // 0xFF008000 round down - -4.3033756E37f, // 0xFE018000 round up - 1.60712E-40f, // 0x0001C000 round up - -2.0234377f, // 0xC0018001 round up - -1.1800863E-38f,// 0x80808001 round up - 4.843037E-35f, // 0x0680C000 round up - 3.9999998f, // 0x407FFFFF round up - std::numeric_limits<float>::max(), // 0x7F7FFFFF max positive value - std::numeric_limits<float>::lowest(), // 0xFF7FFFFF max negative value - 1.1754942E-38f, // 0x007FFFFF min positive value - -1.1754942E-38f // 0x807FFFFF min negative value - }; - uint16_t expectedResult[] = { 0x7F00, - 0x0000, - 0x0000, - 0x7F01, - 0x0001, - 0x0001, - 0x0000, - 0xFF00, - 0xFE02, - 0x0002, - 0xC002, - 0x8081, - 0x0681, - 0x4080, - 0x7F80, - 0xFF80, - 0x0080, - 0x8080 - }; - size_t numFloats = sizeof(floatArray) / sizeof(floatArray[0]); - - std::vector<armnn::BFloat16> convertedBuffer(numFloats); - - armnnUtils::FloatingPointConverter::ConvertFloat32ToBFloat16(floatArray, numFloats, convertedBuffer.data()); - - for (size_t i = 0; i < numFloats; i++) - { - armnn::BFloat16 actual = convertedBuffer[i]; - CHECK_EQ(expectedResult[i], actual.Val()); - } -} - -TEST_CASE("TestConvertBFloat16ToFloat32") -{ - uint16_t bf16Array[] = { 16256, 16320, 38699, 16384, 49156, 32639 }; - size_t numFloats = sizeof(bf16Array) / sizeof(bf16Array[0]); - float expectedResult[] = { 1.0f, 1.5f, -5.525308E-25f, 2.0f, -2.0625f, 3.3895314E38f }; - std::vector<float> convertedBuffer(numFloats, 0.0f); - - armnnUtils::FloatingPointConverter::ConvertBFloat16ToFloat32(bf16Array, numFloats, convertedBuffer.data()); - - for (size_t i = 0; i < numFloats; i++) - { - float actual = convertedBuffer[i]; - CHECK_EQ(expectedResult[i], actual); - } -} - } |