diff options
author | Narumol Prangnawarat <narumol.prangnawarat@arm.com> | 2020-03-11 14:51:27 +0000 |
---|---|---|
committer | Narumol Prangnawarat <narumol.prangnawarat@arm.com> | 2020-03-13 09:49:42 +0000 |
commit | 44179c372eea9f17c96cbf50ee383e57e14d70a6 (patch) | |
tree | 2a2971c2db67426107b21d9a045cfa46a4a1663a /src/armnnUtils | |
parent | e9b5d2989abc8008df7ff3ea287ee896ee1121a6 (diff) | |
download | armnn-44179c372eea9f17c96cbf50ee383e57e14d70a6.tar.gz |
IVGCVSW-4511 Add BFloat16 to RefLayerSupport and unit tests
Signed-off-by: Narumol Prangnawarat <narumol.prangnawarat@arm.com>
Change-Id: Ifaae4d5aac468ba927b2c6a4bf31b8c8522aeb2e
Diffstat (limited to 'src/armnnUtils')
-rw-r--r-- | src/armnnUtils/BFloat16.hpp | 16 | ||||
-rw-r--r-- | src/armnnUtils/QuantizeHelper.hpp | 17 |
2 files changed, 28 insertions, 5 deletions
diff --git a/src/armnnUtils/BFloat16.hpp b/src/armnnUtils/BFloat16.hpp index 5da4da559f..16ceb524c3 100644 --- a/src/armnnUtils/BFloat16.hpp +++ b/src/armnnUtils/BFloat16.hpp @@ -27,6 +27,17 @@ public: m_Value = Float32ToBFloat16(v).Val(); } + operator float() const + { + return ToFloat32(); + } + + BFloat16& operator=(const BFloat16& other) + { + m_Value = other.Val(); + return *this; + } + BFloat16& operator=(float v) { m_Value = Float32ToBFloat16(v).Val(); @@ -38,11 +49,6 @@ public: return m_Value == r.Val(); } - bool operator==(const float& r) const - { - return ToFloat32() == r; - } - static BFloat16 Float32ToBFloat16(const float v) { if (std::isnan(v)) diff --git a/src/armnnUtils/QuantizeHelper.hpp b/src/armnnUtils/QuantizeHelper.hpp index 6fd13fda98..596ec98f64 100644 --- a/src/armnnUtils/QuantizeHelper.hpp +++ b/src/armnnUtils/QuantizeHelper.hpp @@ -8,6 +8,7 @@ #include <armnn/utility/IgnoreUnused.hpp> #include <armnn/TypesUtils.hpp> +#include <BFloat16.hpp> #include <Half.hpp> #include <initializer_list> @@ -65,6 +66,22 @@ struct SelectiveQuantizer<armnn::Half, false> } }; +template<> +struct SelectiveQuantizer<armnn::BFloat16, false> +{ + static armnn::BFloat16 Quantize(float value, float scale, int32_t offset) + { + armnn::IgnoreUnused(scale, offset); + return armnn::BFloat16(value); + } + + static float Dequantize(armnn::BFloat16 value, float scale, int32_t offset) + { + armnn::IgnoreUnused(scale, offset); + return value; + } +}; + template<typename T> T SelectiveQuantize(float value, float scale, int32_t offset) { |