IVGCVSW-7214 Disable BF16-Turbo-Mode and remove conversion layers

- Remove Bf16ToFp32 Conversion Layer - Remove Fp32ToBf16 Conversion Layer - Remove B16 Conversion tests * Throw exception if m_ReduceFp32ToBf16 optimzer option is set to true * Provide comments to enable fast math in order to use bf16 * Update docs to inform users to enable fast math for bf16 Execute Network Changes * Require bf16_turbo_mode to also have fast_math_enabled set to true - Remove setting m_ReduceFp32ToBf16 optimizer option Signed-off-by: Ryan OShea <ryan.oshea3@arm.com> Change-Id: Ibaa6da9d29c96a1ce32ff5196b0847fde9f04a1c
author: Ryan OShea <ryan.oshea3@arm.com> 2022-11-07 16:20:48 +0000
committer: ryan.oshea3 <ryan.oshea3@arm.com> 2022-11-16 15:22:50 +0000
commit: 31441595009182c985dacbedc70c41ee6664d070 (patch)
tree: 248a85295aeff4022c9b395fc97748b0a0aa6b35 /src/armnn/test/FloatingPointConverterTest.cpp
parent: bd18eab07a8f30492de1e462b1815189014cb8d5 (diff)
download: armnn-31441595009182c985dacbedc70c41ee6664d070.tar.gz
1 files changed, 0 insertions, 70 deletions
diff --git a/src/armnn/test/FloatingPointConverterTest.cpp b/src/armnn/test/FloatingPointConverterTest.cpp
index 21a16a3cc0..81384cefae 100644
--- a/src/armnn/test/FloatingPointConverterTest.cpp
+++ b/src/armnn/test/FloatingPointConverterTest.cpp
@@ -5,7 +5,6 @@
 
 #include <armnnUtils/FloatingPointConverter.hpp>
 
-#include <BFloat16.hpp>
 #include <Half.hpp>
 
 #include <vector>
@@ -55,73 +54,4 @@ TEST_CASE("TestConvertFp16ToFp32")
     }
 }
 
-TEST_CASE("TestConvertFloat32ToBFloat16")
-{
-    float floatArray[] = { 1.704735E38f,   // 0x7F004000 round down
-                           0.0f,           // 0x00000000 round down
-                           2.2959E-41f,    // 0x00004000 round down
-                           1.7180272E38f,  // 0x7F014000 round down
-                           9.18355E-41f,   // 0x00010000 round down
-                           1.14794E-40f,   // 0x00014000 round down
-                           4.5918E-41f,    // 0x00008000 round down
-                           -1.708058E38f,  // 0xFF008000 round down
-                           -4.3033756E37f, // 0xFE018000 round up
-                           1.60712E-40f,   // 0x0001C000 round up
-                           -2.0234377f,    // 0xC0018001 round up
-                           -1.1800863E-38f,// 0x80808001 round up
-                           4.843037E-35f,  // 0x0680C000 round up
-                           3.9999998f,     // 0x407FFFFF round up
-                           std::numeric_limits<float>::max(),    // 0x7F7FFFFF max positive value
-                           std::numeric_limits<float>::lowest(), // 0xFF7FFFFF max negative value
-                           1.1754942E-38f, // 0x007FFFFF min positive value
-                           -1.1754942E-38f // 0x807FFFFF min negative value
-                          };
-    uint16_t expectedResult[] = { 0x7F00,
-                                  0x0000,
-                                  0x0000,
-                                  0x7F01,
-                                  0x0001,
-                                  0x0001,
-                                  0x0000,
-                                  0xFF00,
-                                  0xFE02,
-                                  0x0002,
-                                  0xC002,
-                                  0x8081,
-                                  0x0681,
-                                  0x4080,
-                                  0x7F80,
-                                  0xFF80,
-                                  0x0080,
-                                  0x8080
-                                 };
-    size_t numFloats = sizeof(floatArray) / sizeof(floatArray[0]);
-
-    std::vector<armnn::BFloat16> convertedBuffer(numFloats);
-
-    armnnUtils::FloatingPointConverter::ConvertFloat32ToBFloat16(floatArray, numFloats, convertedBuffer.data());
-
-    for (size_t i = 0; i < numFloats; i++)
-    {
-        armnn::BFloat16 actual = convertedBuffer[i];
-        CHECK_EQ(expectedResult[i], actual.Val());
-    }
-}
-
-TEST_CASE("TestConvertBFloat16ToFloat32")
-{
-    uint16_t bf16Array[] = { 16256, 16320, 38699, 16384, 49156, 32639 };
-    size_t numFloats = sizeof(bf16Array) / sizeof(bf16Array[0]);
-    float expectedResult[] = { 1.0f, 1.5f, -5.525308E-25f, 2.0f, -2.0625f, 3.3895314E38f };
-    std::vector<float> convertedBuffer(numFloats, 0.0f);
-
-    armnnUtils::FloatingPointConverter::ConvertBFloat16ToFloat32(bf16Array, numFloats, convertedBuffer.data());
-
-    for (size_t i = 0; i < numFloats; i++)
-    {
-        float actual = convertedBuffer[i];
-        CHECK_EQ(expectedResult[i], actual);
-    }
-}
-
 }
author	Ryan OShea <ryan.oshea3@arm.com>	2022-11-07 16:20:48 +0000
committer	ryan.oshea3 <ryan.oshea3@arm.com>	2022-11-16 15:22:50 +0000
commit	31441595009182c985dacbedc70c41ee6664d070 (patch)
tree	248a85295aeff4022c9b395fc97748b0a0aa6b35 /src/armnn/test/FloatingPointConverterTest.cpp
parent	bd18eab07a8f30492de1e462b1815189014cb8d5 (diff)
download	armnn-31441595009182c985dacbedc70c41ee6664d070.tar.gz