aboutsummaryrefslogtreecommitdiff
path: root/src/armnn/test/FloatingPointConverterTest.cpp
diff options
context:
space:
mode:
authorRyan OShea <ryan.oshea3@arm.com>2022-11-07 16:20:48 +0000
committerryan.oshea3 <ryan.oshea3@arm.com>2022-11-16 15:22:50 +0000
commit31441595009182c985dacbedc70c41ee6664d070 (patch)
tree248a85295aeff4022c9b395fc97748b0a0aa6b35 /src/armnn/test/FloatingPointConverterTest.cpp
parentbd18eab07a8f30492de1e462b1815189014cb8d5 (diff)
downloadarmnn-31441595009182c985dacbedc70c41ee6664d070.tar.gz
IVGCVSW-7214 Disable BF16-Turbo-Mode and remove conversion layers
- Remove Bf16ToFp32 Conversion Layer - Remove Fp32ToBf16 Conversion Layer - Remove B16 Conversion tests * Throw exception if m_ReduceFp32ToBf16 optimzer option is set to true * Provide comments to enable fast math in order to use bf16 * Update docs to inform users to enable fast math for bf16 Execute Network Changes * Require bf16_turbo_mode to also have fast_math_enabled set to true - Remove setting m_ReduceFp32ToBf16 optimizer option Signed-off-by: Ryan OShea <ryan.oshea3@arm.com> Change-Id: Ibaa6da9d29c96a1ce32ff5196b0847fde9f04a1c
Diffstat (limited to 'src/armnn/test/FloatingPointConverterTest.cpp')
-rw-r--r--src/armnn/test/FloatingPointConverterTest.cpp70
1 files changed, 0 insertions, 70 deletions
diff --git a/src/armnn/test/FloatingPointConverterTest.cpp b/src/armnn/test/FloatingPointConverterTest.cpp
index 21a16a3cc0..81384cefae 100644
--- a/src/armnn/test/FloatingPointConverterTest.cpp
+++ b/src/armnn/test/FloatingPointConverterTest.cpp
@@ -5,7 +5,6 @@
#include <armnnUtils/FloatingPointConverter.hpp>
-#include <BFloat16.hpp>
#include <Half.hpp>
#include <vector>
@@ -55,73 +54,4 @@ TEST_CASE("TestConvertFp16ToFp32")
}
}
-TEST_CASE("TestConvertFloat32ToBFloat16")
-{
- float floatArray[] = { 1.704735E38f, // 0x7F004000 round down
- 0.0f, // 0x00000000 round down
- 2.2959E-41f, // 0x00004000 round down
- 1.7180272E38f, // 0x7F014000 round down
- 9.18355E-41f, // 0x00010000 round down
- 1.14794E-40f, // 0x00014000 round down
- 4.5918E-41f, // 0x00008000 round down
- -1.708058E38f, // 0xFF008000 round down
- -4.3033756E37f, // 0xFE018000 round up
- 1.60712E-40f, // 0x0001C000 round up
- -2.0234377f, // 0xC0018001 round up
- -1.1800863E-38f,// 0x80808001 round up
- 4.843037E-35f, // 0x0680C000 round up
- 3.9999998f, // 0x407FFFFF round up
- std::numeric_limits<float>::max(), // 0x7F7FFFFF max positive value
- std::numeric_limits<float>::lowest(), // 0xFF7FFFFF max negative value
- 1.1754942E-38f, // 0x007FFFFF min positive value
- -1.1754942E-38f // 0x807FFFFF min negative value
- };
- uint16_t expectedResult[] = { 0x7F00,
- 0x0000,
- 0x0000,
- 0x7F01,
- 0x0001,
- 0x0001,
- 0x0000,
- 0xFF00,
- 0xFE02,
- 0x0002,
- 0xC002,
- 0x8081,
- 0x0681,
- 0x4080,
- 0x7F80,
- 0xFF80,
- 0x0080,
- 0x8080
- };
- size_t numFloats = sizeof(floatArray) / sizeof(floatArray[0]);
-
- std::vector<armnn::BFloat16> convertedBuffer(numFloats);
-
- armnnUtils::FloatingPointConverter::ConvertFloat32ToBFloat16(floatArray, numFloats, convertedBuffer.data());
-
- for (size_t i = 0; i < numFloats; i++)
- {
- armnn::BFloat16 actual = convertedBuffer[i];
- CHECK_EQ(expectedResult[i], actual.Val());
- }
-}
-
-TEST_CASE("TestConvertBFloat16ToFloat32")
-{
- uint16_t bf16Array[] = { 16256, 16320, 38699, 16384, 49156, 32639 };
- size_t numFloats = sizeof(bf16Array) / sizeof(bf16Array[0]);
- float expectedResult[] = { 1.0f, 1.5f, -5.525308E-25f, 2.0f, -2.0625f, 3.3895314E38f };
- std::vector<float> convertedBuffer(numFloats, 0.0f);
-
- armnnUtils::FloatingPointConverter::ConvertBFloat16ToFloat32(bf16Array, numFloats, convertedBuffer.data());
-
- for (size_t i = 0; i < numFloats; i++)
- {
- float actual = convertedBuffer[i];
- CHECK_EQ(expectedResult[i], actual);
- }
-}
-
}