diff options
author | Narumol Prangnawarat <narumol.prangnawarat@arm.com> | 2020-03-06 14:45:57 +0000 |
---|---|---|
committer | Jan Eilers <jan.eilers@arm.com> | 2020-03-09 16:13:56 +0000 |
commit | 8832522f47b701f5f042069e7bf8deae9b75d449 (patch) | |
tree | f217ab7fbda860a947eba88c9508eb1ac1b1d670 /src/armnnUtils | |
parent | 97da5e2e6c8aaaf4249af60e8305431315226f15 (diff) | |
download | armnn-8832522f47b701f5f042069e7bf8deae9b75d449.tar.gz |
IVGCVSW-4517 Implement BFloat16 Encoder and Decoder
* Add ConvertFloat32ToBFloat16
* Add ConvertBFloat16ToFloat32
* Add BFloat16Encoder
* Add BFloat16Decoder
* Unit tests
Signed-off-by: Narumol Prangnawarat <narumol.prangnawarat@arm.com>
Change-Id: I198888384c923aba28cfbed09a02edc6f8194b3e
Diffstat (limited to 'src/armnnUtils')
-rw-r--r-- | src/armnnUtils/BFloat16.hpp | 2 | ||||
-rw-r--r-- | src/armnnUtils/FloatingPointConverter.cpp | 31 |
2 files changed, 32 insertions, 1 deletions
diff --git a/src/armnnUtils/BFloat16.hpp b/src/armnnUtils/BFloat16.hpp index bb56b7d37c..965fc31c17 100644 --- a/src/armnnUtils/BFloat16.hpp +++ b/src/armnnUtils/BFloat16.hpp @@ -6,7 +6,7 @@ #pragma once #include <ostream> -#include <math.h> +#include <cmath> #include <stdint.h> namespace armnn diff --git a/src/armnnUtils/FloatingPointConverter.cpp b/src/armnnUtils/FloatingPointConverter.cpp index 3bdde11eb8..e9b338ac7c 100644 --- a/src/armnnUtils/FloatingPointConverter.cpp +++ b/src/armnnUtils/FloatingPointConverter.cpp @@ -5,6 +5,7 @@ #include <armnnUtils/FloatingPointConverter.hpp> +#include "BFloat16.hpp" #include "Half.hpp" #include <boost/assert.hpp> @@ -42,4 +43,34 @@ void FloatingPointConverter::ConvertFloat16To32(const void* srcFloat16Buffer, } } +void FloatingPointConverter::ConvertFloat32ToBFloat16(const float* srcFloat32Buffer, + size_t numElements, + void* dstBFloat16Buffer) +{ + BOOST_ASSERT(srcFloat32Buffer != nullptr); + BOOST_ASSERT(dstBFloat16Buffer != nullptr); + + armnn::BFloat16* bf16 = reinterpret_cast<armnn::BFloat16*>(dstBFloat16Buffer); + + for (size_t i = 0; i < numElements; i++) + { + bf16[i] = armnn::BFloat16(srcFloat32Buffer[i]); + } +} + +void FloatingPointConverter::ConvertBFloat16ToFloat32(const void* srcBFloat16Buffer, + size_t numElements, + float* dstFloat32Buffer) +{ + BOOST_ASSERT(srcBFloat16Buffer != nullptr); + BOOST_ASSERT(dstFloat32Buffer != nullptr); + + const armnn::BFloat16* bf16 = reinterpret_cast<const armnn::BFloat16*>(srcBFloat16Buffer); + + for (size_t i = 0; i < numElements; i++) + { + dstFloat32Buffer[i] = bf16[i].toFloat32(); + } +} + } //namespace armnnUtils |