From 8832522f47b701f5f042069e7bf8deae9b75d449 Mon Sep 17 00:00:00 2001 From: Narumol Prangnawarat Date: Fri, 6 Mar 2020 14:45:57 +0000 Subject: IVGCVSW-4517 Implement BFloat16 Encoder and Decoder * Add ConvertFloat32ToBFloat16 * Add ConvertBFloat16ToFloat32 * Add BFloat16Encoder * Add BFloat16Decoder * Unit tests Signed-off-by: Narumol Prangnawarat Change-Id: I198888384c923aba28cfbed09a02edc6f8194b3e --- src/armnnUtils/BFloat16.hpp | 2 +- src/armnnUtils/FloatingPointConverter.cpp | 31 +++++++++++++++++++++++++++++++ 2 files changed, 32 insertions(+), 1 deletion(-) (limited to 'src/armnnUtils') diff --git a/src/armnnUtils/BFloat16.hpp b/src/armnnUtils/BFloat16.hpp index bb56b7d37c..965fc31c17 100644 --- a/src/armnnUtils/BFloat16.hpp +++ b/src/armnnUtils/BFloat16.hpp @@ -6,7 +6,7 @@ #pragma once #include -#include +#include #include namespace armnn diff --git a/src/armnnUtils/FloatingPointConverter.cpp b/src/armnnUtils/FloatingPointConverter.cpp index 3bdde11eb8..e9b338ac7c 100644 --- a/src/armnnUtils/FloatingPointConverter.cpp +++ b/src/armnnUtils/FloatingPointConverter.cpp @@ -5,6 +5,7 @@ #include +#include "BFloat16.hpp" #include "Half.hpp" #include @@ -42,4 +43,34 @@ void FloatingPointConverter::ConvertFloat16To32(const void* srcFloat16Buffer, } } +void FloatingPointConverter::ConvertFloat32ToBFloat16(const float* srcFloat32Buffer, + size_t numElements, + void* dstBFloat16Buffer) +{ + BOOST_ASSERT(srcFloat32Buffer != nullptr); + BOOST_ASSERT(dstBFloat16Buffer != nullptr); + + armnn::BFloat16* bf16 = reinterpret_cast(dstBFloat16Buffer); + + for (size_t i = 0; i < numElements; i++) + { + bf16[i] = armnn::BFloat16(srcFloat32Buffer[i]); + } +} + +void FloatingPointConverter::ConvertBFloat16ToFloat32(const void* srcBFloat16Buffer, + size_t numElements, + float* dstFloat32Buffer) +{ + BOOST_ASSERT(srcBFloat16Buffer != nullptr); + BOOST_ASSERT(dstFloat32Buffer != nullptr); + + const armnn::BFloat16* bf16 = reinterpret_cast(srcBFloat16Buffer); + + for (size_t i = 0; i < numElements; i++) + { + dstFloat32Buffer[i] = bf16[i].toFloat32(); + } +} + } //namespace armnnUtils -- cgit v1.2.1