From 8832522f47b701f5f042069e7bf8deae9b75d449 Mon Sep 17 00:00:00 2001 From: Narumol Prangnawarat Date: Fri, 6 Mar 2020 14:45:57 +0000 Subject: IVGCVSW-4517 Implement BFloat16 Encoder and Decoder * Add ConvertFloat32ToBFloat16 * Add ConvertBFloat16ToFloat32 * Add BFloat16Encoder * Add BFloat16Decoder * Unit tests Signed-off-by: Narumol Prangnawarat Change-Id: I198888384c923aba28cfbed09a02edc6f8194b3e --- include/armnnUtils/FloatingPointConverter.hpp | 6 ++ src/armnn/test/FloatingPointConverterTest.cpp | 70 +++++++++++++++++++++++ src/armnnUtils/BFloat16.hpp | 2 +- src/armnnUtils/FloatingPointConverter.cpp | 31 ++++++++++ src/backends/reference/workloads/BaseIterator.hpp | 39 +++++++++++++ src/backends/reference/workloads/Decoders.hpp | 4 ++ src/backends/reference/workloads/Encoders.hpp | 4 ++ 7 files changed, 155 insertions(+), 1 deletion(-) diff --git a/include/armnnUtils/FloatingPointConverter.hpp b/include/armnnUtils/FloatingPointConverter.hpp index cf573a2ee8..a2244735dc 100644 --- a/include/armnnUtils/FloatingPointConverter.hpp +++ b/include/armnnUtils/FloatingPointConverter.hpp @@ -18,6 +18,12 @@ public: static void ConvertFloat32To16(const float *srcFloat32Buffer, size_t numElements, void *dstFloat16Buffer); static void ConvertFloat16To32(const void *srcFloat16Buffer, size_t numElements, float *dstFloat32Buffer); + + // Converts a buffer of FP32 values to BFloat16, and stores in the given dstBFloat16Buffer. + static void ConvertFloat32ToBFloat16(const float* srcFloat32Buffer, size_t numElements, void* dstBFloat16Buffer); + + // Converts a buffer of BFloat16 to FP32 value, and stores in the given dstFloat32Buffer. + static void ConvertBFloat16ToFloat32(const void* srcBFloat16Buffer, size_t numElements, float* dstFloat32Buffer); }; } // namespace armnnUtils diff --git a/src/armnn/test/FloatingPointConverterTest.cpp b/src/armnn/test/FloatingPointConverterTest.cpp index 4497ca70a8..4a9e216e70 100644 --- a/src/armnn/test/FloatingPointConverterTest.cpp +++ b/src/armnn/test/FloatingPointConverterTest.cpp @@ -5,6 +5,7 @@ #include +#include #include #include @@ -52,4 +53,73 @@ BOOST_AUTO_TEST_CASE(TestConvertFp16ToFp32) } } +BOOST_AUTO_TEST_CASE(TestConvertFloat32ToBFloat16) +{ + float floatArray[] = { 1.704735E38f, // 0x7F004000 round down + 0.0f, // 0x00000000 round down + 2.2959E-41f, // 0x00004000 round down + 1.7180272E38f, // 0x7F014000 round down + 9.18355E-41f, // 0x00010000 round down + 1.14794E-40f, // 0x00014000 round down + 4.5918E-41f, // 0x00008000 round down + -1.708058E38f, // 0xFF008000 round down + -4.3033756E37f, // 0xFE018000 round up + 1.60712E-40f, // 0x0001C000 round up + -2.0234377f, // 0xC0018001 round up + -1.1800863E-38f,// 0x80808001 round up + 4.843037E-35f, // 0x0680C000 round up + 3.9999998f, // 0x407FFFFF round up + 3.4028235E38f, // 0x7F7FFFFF max positive value + -3.4028235E38f, // 0xFF7FFFFF max negative value + 1.1754942E-38f, // 0x007FFFFF min positive value + -1.1754942E-38f // 0x807FFFFF min negative value + }; + uint16_t expectedResult[] = { 0x7F00, + 0x0000, + 0x0000, + 0x7F01, + 0x0001, + 0x0001, + 0x0000, + 0xFF00, + 0xFE02, + 0x0002, + 0xC002, + 0x8081, + 0x0681, + 0x4080, + 0x7F80, + 0xFF80, + 0x0080, + 0x8080 + }; + size_t numFloats = sizeof(floatArray) / sizeof(floatArray[0]); + + std::vector convertedBuffer(numFloats); + + armnnUtils::FloatingPointConverter::ConvertFloat32ToBFloat16(floatArray, numFloats, convertedBuffer.data()); + + for (size_t i = 0; i < numFloats; i++) + { + armnn::BFloat16 actual = convertedBuffer[i]; + BOOST_CHECK_EQUAL(expectedResult[i], actual.val()); + } +} + +BOOST_AUTO_TEST_CASE(TestConvertBFloat16ToFloat32) +{ + uint16_t bf16Array[] = { 16256, 16320, 38699, 16384, 49156, 32639 }; + size_t numFloats = sizeof(bf16Array) / sizeof(bf16Array[0]); + float expectedResult[] = { 1.0f, 1.5f, -5.525308E-25f, 2.0f, -2.0625f, 3.3895314E38f }; + std::vector convertedBuffer(numFloats, 0.0f); + + armnnUtils::FloatingPointConverter::ConvertBFloat16ToFloat32(bf16Array, numFloats, convertedBuffer.data()); + + for (size_t i = 0; i < numFloats; i++) + { + float actual = convertedBuffer[i]; + BOOST_CHECK_EQUAL(expectedResult[i], actual); + } +} + BOOST_AUTO_TEST_SUITE_END() diff --git a/src/armnnUtils/BFloat16.hpp b/src/armnnUtils/BFloat16.hpp index bb56b7d37c..965fc31c17 100644 --- a/src/armnnUtils/BFloat16.hpp +++ b/src/armnnUtils/BFloat16.hpp @@ -6,7 +6,7 @@ #pragma once #include -#include +#include #include namespace armnn diff --git a/src/armnnUtils/FloatingPointConverter.cpp b/src/armnnUtils/FloatingPointConverter.cpp index 3bdde11eb8..e9b338ac7c 100644 --- a/src/armnnUtils/FloatingPointConverter.cpp +++ b/src/armnnUtils/FloatingPointConverter.cpp @@ -5,6 +5,7 @@ #include +#include "BFloat16.hpp" #include "Half.hpp" #include @@ -42,4 +43,34 @@ void FloatingPointConverter::ConvertFloat16To32(const void* srcFloat16Buffer, } } +void FloatingPointConverter::ConvertFloat32ToBFloat16(const float* srcFloat32Buffer, + size_t numElements, + void* dstBFloat16Buffer) +{ + BOOST_ASSERT(srcFloat32Buffer != nullptr); + BOOST_ASSERT(dstBFloat16Buffer != nullptr); + + armnn::BFloat16* bf16 = reinterpret_cast(dstBFloat16Buffer); + + for (size_t i = 0; i < numElements; i++) + { + bf16[i] = armnn::BFloat16(srcFloat32Buffer[i]); + } +} + +void FloatingPointConverter::ConvertBFloat16ToFloat32(const void* srcBFloat16Buffer, + size_t numElements, + float* dstFloat32Buffer) +{ + BOOST_ASSERT(srcBFloat16Buffer != nullptr); + BOOST_ASSERT(dstFloat32Buffer != nullptr); + + const armnn::BFloat16* bf16 = reinterpret_cast(srcBFloat16Buffer); + + for (size_t i = 0; i < numElements; i++) + { + dstFloat32Buffer[i] = bf16[i].toFloat32(); + } +} + } //namespace armnnUtils diff --git a/src/backends/reference/workloads/BaseIterator.hpp b/src/backends/reference/workloads/BaseIterator.hpp index c48201837b..3f0144670f 100644 --- a/src/backends/reference/workloads/BaseIterator.hpp +++ b/src/backends/reference/workloads/BaseIterator.hpp @@ -194,6 +194,23 @@ private: const int32_t m_Offset; }; +class BFloat16Decoder : public TypedIterator> +{ +public: + BFloat16Decoder(const BFloat16* data) + : TypedIterator(data) {} + + BFloat16Decoder() + : BFloat16Decoder(nullptr) {} + + float Get() const override + { + float val = 0.f; + armnnUtils::FloatingPointConverter::ConvertBFloat16ToFloat32(m_Iterator, 1, &val); + return val; + } +}; + class Float16Decoder : public TypedIterator> { public: @@ -355,6 +372,28 @@ private: const int32_t m_Offset; }; +class BFloat16Encoder : public TypedIterator> +{ +public: + BFloat16Encoder(armnn::BFloat16* data) + : TypedIterator(data) {} + + BFloat16Encoder() + : BFloat16Encoder(nullptr) {} + + void Set(float right) override + { + armnnUtils::FloatingPointConverter::ConvertFloat32ToBFloat16(&right, 1, m_Iterator); + } + + float Get() const override + { + float val = 0.f; + armnnUtils::FloatingPointConverter::ConvertBFloat16ToFloat32(m_Iterator, 1, &val); + return val; + } +}; + class Float16Encoder : public TypedIterator> { public: diff --git a/src/backends/reference/workloads/Decoders.hpp b/src/backends/reference/workloads/Decoders.hpp index 6a8c756048..83c57c1169 100644 --- a/src/backends/reference/workloads/Decoders.hpp +++ b/src/backends/reference/workloads/Decoders.hpp @@ -102,6 +102,10 @@ inline std::unique_ptr> MakeDecoder(const TensorInfo& info, const info.GetQuantizationScale(), info.GetQuantizationOffset()); } + case DataType::BFloat16: + { + return std::make_unique(static_cast(data)); + } case DataType::Float16: { return std::make_unique(static_cast(data)); diff --git a/src/backends/reference/workloads/Encoders.hpp b/src/backends/reference/workloads/Encoders.hpp index f52297602f..e93987da31 100644 --- a/src/backends/reference/workloads/Encoders.hpp +++ b/src/backends/reference/workloads/Encoders.hpp @@ -75,6 +75,10 @@ inline std::unique_ptr> MakeEncoder(const TensorInfo& info, void* { return std::make_unique(static_cast(data)); } + case armnn::DataType::BFloat16: + { + return std::make_unique(static_cast(data)); + } case armnn::DataType::Float16: { return std::make_unique(static_cast(data)); -- cgit v1.2.1