aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorNarumol Prangnawarat <narumol.prangnawarat@arm.com>2020-03-20 15:01:01 +0000
committerNarumol Prangnawarat <narumol.prangnawarat@arm.com>2020-03-20 19:09:07 +0000
commitbc7ffb5e9e5f4c86280b20c65772eb12d8bb140e (patch)
tree5187f34326414e7dfea80e0f4efaae5cbeb05d1d
parentcf2ad554502830804e991aca2e5b0741623119b2 (diff)
downloadarmnn-bc7ffb5e9e5f4c86280b20c65772eb12d8bb140e.tar.gz
IVGCVSW-4520 Implement BFloat16 Optimizer
* Add ReduceFp32ToBf16 to OptimizerOptions * Add ConvertFp32NetworkToBf16 * Add utility functions to insert conversion layers * Add constant conversion BF16 <-> FP32 * Unit tests Signed-off-by: Narumol Prangnawarat <narumol.prangnawarat@arm.com> Change-Id: Iaff77e20c721400b052cb37eb9ef6fe16d7abaff
-rw-r--r--Android.mk2
-rw-r--r--CMakeLists.txt3
-rw-r--r--include/armnn/INetwork.hpp14
-rw-r--r--src/armnn/CompatibleTypes.hpp12
-rw-r--r--src/armnn/Network.cpp92
-rw-r--r--src/armnn/NetworkUtils.cpp87
-rw-r--r--src/armnn/NetworkUtils.hpp6
-rw-r--r--src/armnn/optimizations/All.hpp1
-rw-r--r--src/armnn/optimizations/ConvertConstants.hpp54
-rw-r--r--src/armnn/optimizations/ConvertFp32NetworkToBf16.hpp81
-rw-r--r--src/armnn/test/optimizations/ConvertConstantsBFloatTests.cpp127
-rw-r--r--src/armnn/test/optimizations/Fp32NetworkToBf16ConverterTests.cpp45
12 files changed, 518 insertions, 6 deletions
diff --git a/Android.mk b/Android.mk
index 0c2a420f4b..87b1f9ac1a 100644
--- a/Android.mk
+++ b/Android.mk
@@ -347,8 +347,10 @@ LOCAL_SRC_FILES := \
src/armnn/test/ModelAccuracyCheckerTest.cpp \
src/armnn/test/NetworkTests.cpp \
src/armnn/test/ObservableTest.cpp \
+ src/armnn/test/optimizations/ConvertConstantsBFloatTests.cpp \
src/armnn/test/optimizations/ConvertConstantsFloatToHalfTests.cpp \
src/armnn/test/optimizations/ConvertConstantsHalfToFloatTests.cpp \
+ src/armnn/test/optimizations/Fp32NetworkToBf16ConverterTests.cpp \
src/armnn/test/optimizations/Fp32NetworkToFp16ConverterTests.cpp \
src/armnn/test/optimizations/InsertDebugLayerTests.cpp \
src/armnn/test/optimizations/MovePermuteUpTests.cpp \
diff --git a/CMakeLists.txt b/CMakeLists.txt
index e13b132bba..605e0421ed 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -450,6 +450,7 @@ list(APPEND armnn_sources
src/armnn/optimizations/AddDebug.hpp
src/armnn/optimizations/All.hpp
src/armnn/optimizations/ConvertConstants.hpp
+ src/armnn/optimizations/ConvertFp32NetworkToBf16.hpp
src/armnn/optimizations/ConvertFp32NetworkToFp16.hpp
src/armnn/optimizations/FoldPadIntoConvolution2d.hpp
src/armnn/optimizations/MovePermuteUp.hpp
@@ -626,8 +627,10 @@ if(BUILD_UNIT_TESTS)
src/armnn/test/NetworkTests.cpp
src/armnn/test/ObservableTest.cpp
src/armnn/test/OptimizerTests.cpp
+ src/armnn/test/optimizations/ConvertConstantsBFloatTests.cpp
src/armnn/test/optimizations/ConvertConstantsFloatToHalfTests.cpp
src/armnn/test/optimizations/ConvertConstantsHalfToFloatTests.cpp
+ src/armnn/test/optimizations/Fp32NetworkToBf16ConverterTests.cpp
src/armnn/test/optimizations/Fp32NetworkToFp16ConverterTests.cpp
src/armnn/test/optimizations/InsertDebugLayerTests.cpp
src/armnn/test/optimizations/MovePermuteUpTests.cpp
diff --git a/include/armnn/INetwork.hpp b/include/armnn/INetwork.hpp
index 84ecaebfb9..b840dd58e5 100644
--- a/include/armnn/INetwork.hpp
+++ b/include/armnn/INetwork.hpp
@@ -591,18 +591,28 @@ struct OptimizerOptions
OptimizerOptions()
: m_ReduceFp32ToFp16(false)
, m_Debug(false)
+ , m_ReduceFp32ToBf16(false)
{}
- OptimizerOptions(bool reduceFp32ToFp16, bool debug)
+ OptimizerOptions(bool reduceFp32ToFp16, bool debug, bool reduceFp32ToBf16 = false)
: m_ReduceFp32ToFp16(reduceFp32ToFp16)
, m_Debug(debug)
- {}
+ , m_ReduceFp32ToBf16(reduceFp32ToBf16)
+ {
+ if (m_ReduceFp32ToFp16 && m_ReduceFp32ToBf16)
+ {
+ throw InvalidArgumentException("BFloat16 and Float16 optimization cannot be enabled at the same time.");
+ }
+ }
// Reduce Fp32 data to Fp16 for faster processing
bool m_ReduceFp32ToFp16;
// Add debug data for easier troubleshooting
bool m_Debug;
+
+ // Reduce Fp32 data to Bf16 for faster processing
+ bool m_ReduceFp32ToBf16;
};
/// Create an optimized version of the network
diff --git a/src/armnn/CompatibleTypes.hpp b/src/armnn/CompatibleTypes.hpp
index 4332f74b23..1a663d3e27 100644
--- a/src/armnn/CompatibleTypes.hpp
+++ b/src/armnn/CompatibleTypes.hpp
@@ -5,8 +5,10 @@
#pragma once
-#include "armnn/Types.hpp"
-#include "Half.hpp"
+#include <armnn/Types.hpp>
+
+#include <BFloat16.hpp>
+#include <Half.hpp>
namespace armnn
{
@@ -30,6 +32,12 @@ inline bool CompatibleTypes<Half>(DataType dataType)
}
template<>
+inline bool CompatibleTypes<BFloat16>(DataType dataType)
+{
+ return dataType == DataType::BFloat16;
+}
+
+template<>
inline bool CompatibleTypes<uint8_t>(DataType dataType)
{
return dataType == DataType::Boolean || dataType == DataType::QAsymmU8;
diff --git a/src/armnn/Network.cpp b/src/armnn/Network.cpp
index 7a6fa8f78c..5f7719730b 100644
--- a/src/armnn/Network.cpp
+++ b/src/armnn/Network.cpp
@@ -248,6 +248,86 @@ OptimizationResult AttemptBackendAssignment(BackendSettings& backendSettings,
return result;
}
}
+ else if (dataTypeIn == DataType::BFloat16 || dataTypeOut == DataType::BFloat16)
+ {
+ if (IWorkloadFactory::IsLayerSupported(*layer, DataType::Float32, reasonIfUnsupported)
+ && layer->GetType() != LayerType::ConvertFp32ToBf16
+ && layer->GetType() != LayerType::ConvertBf16ToFp32)
+ {
+ // Insert BF16 -> FP32 conversion layer before current layer
+ std::vector<ConvertBf16ToFp32Layer*> convertBf16ToFp32Layers;
+ if (dataTypeIn == DataType::BFloat16)
+ {
+ convertBf16ToFp32Layers =
+ InsertConvertBf16ToFp32LayersBefore(graph, *layer);
+ }
+
+ // Insert FP32 -> BF16 conversion layer after current layer
+ std::vector<ConvertFp32ToBf16Layer*> convertFp32ToBf16Layers;
+ if (dataTypeOut == DataType::BFloat16)
+ {
+ convertFp32ToBf16Layers =
+ InsertConvertFp32ToBf16LayersAfter(graph, *layer);
+ }
+
+ // Assign a supported backend to the newly introduced conversion layers
+ auto AssignFirstSupportedBackend = [&](Layer* layer, BackendId preferredBackend)
+ {
+ bool supportedBackendFound = false;
+ std::string reasonIfUnsupported;
+
+ // Try preferred backend first
+ layer->SetBackendId(preferredBackend);
+ if (IWorkloadFactory::IsLayerSupported(*layer,
+ EmptyOptional(),
+ reasonIfUnsupported))
+ {
+ supportedBackendFound = true;
+ }
+ else
+ {
+ for (const auto& backend : availablePreferredBackends)
+ {
+ // Skip preferred backend (we already determined that it is not supported)
+ if (backend == preferredBackend)
+ {
+ continue;
+ }
+
+ layer->SetBackendId(backend);
+ if (IWorkloadFactory::IsLayerSupported(*layer,
+ EmptyOptional(),
+ reasonIfUnsupported))
+ {
+ supportedBackendFound = true;
+ break;
+ }
+ }
+ }
+
+ return supportedBackendFound;
+ };
+
+ for (ConvertBf16ToFp32Layer* convertLayer : convertBf16ToFp32Layers)
+ {
+ if (!AssignFirstSupportedBackend(convertLayer, backend))
+ {
+ return ReturnError(convertLayer);
+ }
+ }
+
+ for (ConvertFp32ToBf16Layer* convertLayer : convertFp32ToBf16Layers)
+ {
+ if (!AssignFirstSupportedBackend(convertLayer, backend))
+ {
+ return ReturnError(convertLayer);
+ }
+ }
+
+ return result;
+ }
+ }
+
std::stringstream warningMsg;
warningMsg << "Layer of type " << GetLayerTypeAsCString(layer->GetType())
<< " is not supported on requested backend " << layer->GetBackendId().Get()
@@ -898,6 +978,11 @@ IOptimizedNetworkPtr Optimize(const INetwork& inNetwork,
throw armnn::InvalidArgumentException("Invoked Optimize with no backends specified");
}
+ if (options.m_ReduceFp32ToFp16 && options.m_ReduceFp32ToBf16)
+ {
+ throw InvalidArgumentException("BFloat16 and Float16 optimization cannot be enabled at the same time.");
+ }
+
const Network& network = *boost::polymorphic_downcast<const Network*>(&inNetwork);
std::unique_ptr<Graph> graph = std::make_unique<Graph>(network.GetGraph());
@@ -934,6 +1019,13 @@ IOptimizedNetworkPtr Optimize(const INetwork& inNetwork,
Optimizer::Pass(optGraph, MakeOptimizations(ConvertConstantsFloatToHalf()));
}
+ // If Fp32 to Bf16 optimization is set convert Fp32 network to Bf16
+ if (options.m_ReduceFp32ToBf16)
+ {
+ Optimizer::Pass(optGraph, MakeOptimizations(Fp32NetworkToBf16Converter()));
+ Optimizer::Pass(optGraph, MakeOptimizations(ConvertConstantsFloatToBFloat()));
+ }
+
// Initialize backend settings
BackendSettings backendSettings(backendPreferences, deviceSpec);
if (backendSettings.GetAvailablePreferredBackends().empty())
diff --git a/src/armnn/NetworkUtils.cpp b/src/armnn/NetworkUtils.cpp
index 1bbeaac005..8653a08510 100644
--- a/src/armnn/NetworkUtils.cpp
+++ b/src/armnn/NetworkUtils.cpp
@@ -16,7 +16,7 @@ namespace armnn
namespace
{
-void UpdateOutputSlotFp16ToFp32(OutputSlot& outputSlot)
+void UpdateOutputSlotToFp32(OutputSlot& outputSlot)
{
const TensorInfo& origTensorInfo = outputSlot.GetTensorInfo();
TensorInfo newTensorInfo(origTensorInfo);
@@ -24,19 +24,69 @@ void UpdateOutputSlotFp16ToFp32(OutputSlot& outputSlot)
outputSlot.SetTensorInfo(newTensorInfo);
}
+void ChangeOutputBf16ToFp32(Layer& layer)
+{
+ for (auto&& outputSlot = layer.BeginOutputSlots(); outputSlot != layer.EndOutputSlots(); ++outputSlot)
+ {
+ if (outputSlot->GetTensorInfo().GetDataType() == DataType::BFloat16)
+ {
+ UpdateOutputSlotToFp32(*outputSlot);
+ }
+ }
+}
+
void ChangeOutputFp16ToFp32(Layer& layer)
{
for (auto&& outputSlot = layer.BeginOutputSlots(); outputSlot != layer.EndOutputSlots(); ++outputSlot)
{
if (outputSlot->GetTensorInfo().GetDataType() == DataType::Float16)
{
- UpdateOutputSlotFp16ToFp32(*outputSlot);
+ UpdateOutputSlotToFp32(*outputSlot);
}
}
}
} // anonymous namespace
+std::vector<ConvertBf16ToFp32Layer*> InsertConvertBf16ToFp32LayersBefore(Graph& graph,
+ Layer& layer,
+ bool expectCorrectInputType)
+{
+ std::vector<ConvertBf16ToFp32Layer*> convertLayers;
+ convertLayers.reserve(layer.GetNumInputSlots());
+
+ // Insert a ConvertBf16ToFp32Layer before each input slot
+ for (auto&& inputSlot = layer.BeginInputSlots(); inputSlot != layer.EndInputSlots(); ++inputSlot)
+ {
+ bool allowInsert = true;
+ if (expectCorrectInputType)
+ {
+ // Only insert ConvertBf16ToFp32Layer before BF16 input slots
+ OutputSlot* connectedOutputSlot = inputSlot->GetConnectedOutputSlot();
+ allowInsert =
+ connectedOutputSlot && connectedOutputSlot->GetTensorInfo().GetDataType() == DataType::BFloat16;
+ }
+
+ if (allowInsert)
+ {
+ const std::string name =
+ std::string("convert_bf16_to_fp32-" + std::to_string(inputSlot->GetSlotIndex()) + "-") +
+ layer.GetName();
+ ConvertBf16ToFp32Layer* convertLayer =
+ graph.InsertNewLayer<ConvertBf16ToFp32Layer>(*inputSlot, name.c_str());
+
+ TensorInfo convertInfo = convertLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo();
+ convertInfo.SetDataType(DataType::Float32);
+
+ convertLayer->GetOutputSlot().SetTensorInfo(convertInfo);
+
+ convertLayers.emplace_back(convertLayer);
+ }
+ }
+
+ return convertLayers;
+}
+
std::vector<ConvertFp16ToFp32Layer*> InsertConvertFp16ToFp32LayersBefore(Graph& graph,
Layer& layer,
bool expectCorrectInputType)
@@ -76,6 +126,39 @@ std::vector<ConvertFp16ToFp32Layer*> InsertConvertFp16ToFp32LayersBefore(Graph&
return convertLayers;
}
+std::vector<ConvertFp32ToBf16Layer*> InsertConvertFp32ToBf16LayersAfter(Graph& graph, Layer& layer)
+{
+ const unsigned int numOutputSlots = layer.GetNumOutputSlots();
+
+ std::vector<ConvertFp32ToBf16Layer*> convertLayers;
+ convertLayers.reserve(numOutputSlots);
+
+ // Update Bf16 output slots to FP32 on current layer
+ ChangeOutputBf16ToFp32(layer);
+
+ // Insert a ConvertFp32ToBf16Layer after each FP32 output slot
+ for (unsigned int slotIndex = 0u; slotIndex < numOutputSlots; ++slotIndex)
+ {
+ OutputSlot& outputSlot = layer.GetOutputSlot(slotIndex);
+ if(outputSlot.GetTensorInfo().GetDataType() == DataType::Float32)
+ {
+ const std::string name =
+ std::string("convert_fp32_to_bf16-" + std::to_string(slotIndex) + "-") + layer.GetName();
+ ConvertFp32ToBf16Layer* convertLayer =
+ graph.InsertNewLayer<ConvertFp32ToBf16Layer>(outputSlot, name.c_str());
+
+ TensorInfo convertInfo = convertLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo();
+ convertInfo.SetDataType(DataType::BFloat16);
+
+ convertLayer->GetOutputSlot().SetTensorInfo(convertInfo);
+
+ convertLayers.emplace_back(convertLayer);
+ }
+ }
+
+ return convertLayers;
+}
+
std::vector<ConvertFp32ToFp16Layer*> InsertConvertFp32ToFp16LayersAfter(Graph& graph, Layer& layer)
{
const unsigned int numOutputSlots = layer.GetNumOutputSlots();
diff --git a/src/armnn/NetworkUtils.hpp b/src/armnn/NetworkUtils.hpp
index 38fb22350d..064545aac5 100644
--- a/src/armnn/NetworkUtils.hpp
+++ b/src/armnn/NetworkUtils.hpp
@@ -11,6 +11,12 @@
namespace armnn
{
+std::vector<ConvertBf16ToFp32Layer*> InsertConvertBf16ToFp32LayersBefore(Graph& graph,
+ Layer& layer,
+ bool expectCorrectInputType = true);
+
+std::vector<ConvertFp32ToBf16Layer*> InsertConvertFp32ToBf16LayersAfter(Graph& graph, Layer& layer);
+
std::vector<ConvertFp16ToFp32Layer*> InsertConvertFp16ToFp32LayersBefore(Graph& graph,
Layer& layer,
bool expectCorrectInputType = true);
diff --git a/src/armnn/optimizations/All.hpp b/src/armnn/optimizations/All.hpp
index 273c337665..9fc284213d 100644
--- a/src/armnn/optimizations/All.hpp
+++ b/src/armnn/optimizations/All.hpp
@@ -6,6 +6,7 @@
#include "AddDebug.hpp"
#include "ConvertConstants.hpp"
+#include "ConvertFp32NetworkToBf16.hpp"
#include "ConvertFp32NetworkToFp16.hpp"
#include "FoldPadIntoConvolution2d.hpp"
#include "MovePermuteUp.hpp"
diff --git a/src/armnn/optimizations/ConvertConstants.hpp b/src/armnn/optimizations/ConvertConstants.hpp
index 5e19c7bd05..f3ebcdf5d9 100644
--- a/src/armnn/optimizations/ConvertConstants.hpp
+++ b/src/armnn/optimizations/ConvertConstants.hpp
@@ -13,6 +13,7 @@
#include <armnn/utility/IgnoreUnused.hpp>
+#include <BFloat16.hpp>
#include <Half.hpp>
namespace armnn
@@ -20,6 +21,27 @@ namespace armnn
namespace optimizations
{
+struct BFloat16ToFloat32
+{
+ static void Func(std::unique_ptr<ScopedCpuTensorHandle>& handle)
+ {
+ const TensorInfo& info = handle->GetTensorInfo();
+
+ if (info.GetDataType() == DataType::BFloat16)
+ {
+ std::vector<float> newValues(info.GetNumElements());
+
+ armnnUtils::FloatingPointConverter::ConvertBFloat16ToFloat32(handle->GetTensor<BFloat16>(),
+ info.GetNumElements(),
+ newValues.data());
+
+ TensorInfo newInfo(info.GetShape(), DataType::Float32);
+ ConstTensor newInput(newInfo, newValues);
+ handle.reset(new ScopedCpuTensorHandle(newInput));
+ }
+ }
+};
+
struct Float16ToFloat32
{
static void Func(std::unique_ptr<ScopedCpuTensorHandle>& handle)
@@ -41,6 +63,27 @@ struct Float16ToFloat32
}
};
+struct Float32ToBFloat16
+{
+ static void Func(std::unique_ptr<ScopedCpuTensorHandle>& handle)
+ {
+ const TensorInfo& info = handle->GetTensorInfo();
+
+ if (info.GetDataType() == DataType::Float32)
+ {
+ std::vector<BFloat16> newValues(info.GetNumElements());
+
+ armnnUtils::FloatingPointConverter::ConvertFloat32ToBFloat16(handle->GetTensor<float>(),
+ info.GetNumElements(),
+ newValues.data());
+
+ TensorInfo newInfo(info.GetShape(), DataType::BFloat16);
+ ConstTensor newInput(newInfo, newValues);
+ handle.reset(new ScopedCpuTensorHandle(newInput));
+ }
+ }
+};
+
struct Float32ToFloat16
{
static void Func(std::unique_ptr<ScopedCpuTensorHandle>& handle)
@@ -97,6 +140,17 @@ struct IsFloat16Layer
}
};
+struct IsBFloat16Layer
+{
+ static bool Test(const Layer& layer)
+ {
+ return layer.GetDataType() == DataType::BFloat16;
+ }
+};
+
+using ConvertConstantsBFloatToFloat = ConvertConstants<BFloat16ToFloat32, IsFloat32Layer>;
+using ConvertConstantsFloatToBFloat = ConvertConstants<Float32ToBFloat16, IsBFloat16Layer>;
+
using ConvertConstantsHalfToFloat = ConvertConstants<Float16ToFloat32, IsFloat32Layer>;
using ConvertConstantsFloatToHalf = ConvertConstants<Float32ToFloat16, IsFloat16Layer>;
diff --git a/src/armnn/optimizations/ConvertFp32NetworkToBf16.hpp b/src/armnn/optimizations/ConvertFp32NetworkToBf16.hpp
new file mode 100644
index 0000000000..d6350c3af3
--- /dev/null
+++ b/src/armnn/optimizations/ConvertFp32NetworkToBf16.hpp
@@ -0,0 +1,81 @@
+//
+// Copyright © 2020 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+#pragma once
+
+#include "Optimization.hpp"
+#include "NetworkUtils.hpp"
+
+namespace armnn
+{
+namespace optimizations
+{
+
+class ConvertFp32NetworkToBf16Impl
+{
+public:
+ void Run(Graph& graph, Layer& layer) const
+ {
+ if(layer.GetType() == LayerType::Input)
+ {
+ // if the outputs of this layer are DataType::Float32
+ // add a ConvertFloat32ToBFloat16 layer after each of the outputs
+ if (layer.GetDataType() == DataType::Float32)
+ {
+ InsertConvertFp32ToBf16LayersAfter(graph, layer);
+ }
+ }
+ else if (layer.GetType() == LayerType::Output)
+ {
+ // if the inputs of this layer are DataType::Float32
+ // add a ConvertBFloat16ToFloat32 layer before each of the inputs
+ if (layer.GetDataType() == DataType::Float32)
+ {
+ // NOTE: We need to call InsertConvertBf16ToFp32LayersBefore with expectCorrectInputType = false
+ // here, otherwise it will expect the inputs to be DataType::BFloat16
+ InsertConvertBf16ToFp32LayersBefore(graph, layer, false);
+ }
+ }
+ else if (layer.GetType() != LayerType::ConvertFp32ToBf16 && layer.GetType() != LayerType::ConvertBf16ToFp32)
+ {
+ // if the inputs/outputs of this layer are DataType::Float32
+ // change the data type for all inputs and outputs to DataType::BFloat16
+ for (auto&& input = layer.BeginInputSlots(); input != layer.EndInputSlots(); ++input)
+ {
+ // if it is connected to OutputSlot of the InputLayer do not change the DataType of connection
+ // InputSlots of the current layer will be updated when conversion layer is inserted after InputLayer
+ Layer& base = input->GetConnectedOutputSlot()->GetOwningLayer();
+ if (base.GetType() != LayerType::Input)
+ {
+ TensorInfo convertInfo = input->GetConnection()->GetTensorInfo();
+ if (convertInfo.GetDataType() == DataType::Float32)
+ {
+ convertInfo.SetDataType(DataType::BFloat16);
+ input->GetConnection()->SetTensorInfo(convertInfo);
+ }
+ }
+ }
+
+ // change outputs to DataType::BFloat16
+ for (auto&& output = layer.BeginOutputSlots(); output != layer.EndOutputSlots(); ++output)
+ {
+ TensorInfo convertInfo = output->GetTensorInfo();
+ if (convertInfo.GetDataType() == DataType::Float32)
+ {
+ convertInfo.SetDataType(DataType::BFloat16);
+ output->SetTensorInfo(convertInfo);
+ }
+ }
+ }
+ }
+
+protected:
+ ConvertFp32NetworkToBf16Impl() = default;
+ ~ConvertFp32NetworkToBf16Impl() = default;
+};
+
+using Fp32NetworkToBf16Converter = OptimizeForType<Layer, ConvertFp32NetworkToBf16Impl>;
+
+} // namespace optimizations
+} // namespace armnn
diff --git a/src/armnn/test/optimizations/ConvertConstantsBFloatTests.cpp b/src/armnn/test/optimizations/ConvertConstantsBFloatTests.cpp
new file mode 100644
index 0000000000..5cb89daafd
--- /dev/null
+++ b/src/armnn/test/optimizations/ConvertConstantsBFloatTests.cpp
@@ -0,0 +1,127 @@
+//
+// Copyright © 2020 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#include "../TestUtils.hpp"
+
+#include <BFloat16.hpp>
+#include <Optimizer.hpp>
+
+#include <boost/test/unit_test.hpp>
+
+using namespace armnn;
+
+BOOST_AUTO_TEST_SUITE(Optimizer)
+using namespace armnn::optimizations;
+
+BOOST_AUTO_TEST_CASE(ConvertConstantsFloatToBFloatTest)
+{
+ armnn::Graph graph;
+
+ const armnn::TensorInfo info({ 1, 1, 1, 2 }, armnn::DataType::BFloat16);
+
+ // Create const tensor from fp32 data
+ unsigned int dims[] = { 4, 2, 1, 1 };
+ std::vector<float> floatWeights{ 0.0f, -1.0f,
+ 3.8f, // 0x40733333 Round down
+ 3.1055E+29f, // 0x707ADC3C Round up
+ 9.149516E-10f, // 0x307B7FFF Round down
+ -3.8f, // 0xC0733333 Round down
+ -3.1055E+29f, // 0xF07ADC3C Round up
+ -9.149516E-10f // 0xB07B7FFF Round down
+ };
+ armnn::ConstTensor weights(armnn::TensorInfo(4, dims, armnn::DataType::Float32), floatWeights);
+
+ // Create simple test network
+ auto input = graph.AddLayer<armnn::InputLayer>(0, "input");
+ input->GetOutputSlot().SetTensorInfo(info);
+
+ auto fc = graph.AddLayer<armnn::FullyConnectedLayer>(armnn::FullyConnectedDescriptor(), "fc");
+ fc->m_Weight = std::make_unique<armnn::ScopedCpuTensorHandle>(weights);
+ fc->GetOutputSlot().SetTensorInfo(info);
+
+ auto output = graph.AddLayer<armnn::OutputLayer>(1, "output");
+
+ // Connect up the layers
+ input->GetOutputSlot().Connect(fc->GetInputSlot(0));
+ fc->GetOutputSlot().Connect(output->GetInputSlot(0));
+
+ // Check tensor data type before conversion
+ BOOST_CHECK(fc->m_Weight->GetTensorInfo().GetDataType() == armnn::DataType::Float32);
+
+ // Run the optimizer
+ armnn::Optimizer::Pass(graph, armnn::MakeOptimizations(ConvertConstantsFloatToBFloat()));
+
+ // Check tensor data type after conversion
+ BOOST_CHECK(fc->m_Weight->GetTensorInfo().GetDataType() == armnn::DataType::BFloat16);
+
+ // Check whether data matches expected Bf16 data
+ BFloat16* data = fc->m_Weight->GetTensor<BFloat16>();
+ BOOST_CHECK(data[0] == BFloat16(0.0f));
+ BOOST_CHECK(data[1] == BFloat16(-1.0f));
+ BOOST_CHECK(data[2] == BFloat16(3.796875f)); // 0x4073
+ BOOST_CHECK(data[3] == BFloat16(3.1072295E29f)); // 0x707B
+ BOOST_CHECK(data[4] == BFloat16(9.131327E-10f)); // 0x307B
+ BOOST_CHECK(data[5] == BFloat16(-3.796875f)); // 0xC073
+ BOOST_CHECK(data[6] == BFloat16(-3.1072295E29f)); // 0xF07B
+ BOOST_CHECK(data[7] == BFloat16(-9.131327E-10f)); // 0xB07B
+}
+
+BOOST_AUTO_TEST_CASE(ConvertConstantsBFloatToFloatTest)
+{
+ armnn::Graph graph;
+
+ const armnn::TensorInfo info({ 1, 1, 1, 2 }, armnn::DataType::Float32);
+
+ // Create the BFloat16 precision input data
+ unsigned int dims[] = { 4, 2, 1, 1 };
+ std::vector<float> convWeightsData{ 0.f, -1.f,
+ 3.796875f, // 0x4073
+ 3.1072295E29f, // 0x707B
+ 9.131327E-10f, // 0x307B
+ -3.796875f, // 0xC073
+ -3.1072295E29f, // 0xF07B
+ -9.131327E-10f // 0xB07B
+ };
+ std::vector<uint16_t> bfWeights(8);
+ armnnUtils::FloatingPointConverter::ConvertFloat32ToBFloat16(convWeightsData.data(), convWeightsData.size(),
+ bfWeights.data());
+ armnn::ConstTensor weights(armnn::TensorInfo(4, dims, armnn::DataType::BFloat16), bfWeights);
+
+ //Create the simple test network
+ auto input = graph.AddLayer<armnn::InputLayer>(0, "input");
+ input->GetOutputSlot().SetTensorInfo(info);
+
+ auto fc = graph.AddLayer<armnn::FullyConnectedLayer>(armnn::FullyConnectedDescriptor(), "fc");
+ fc->m_Weight = std::make_unique<armnn::ScopedCpuTensorHandle>(weights);
+ fc->GetOutputSlot().SetTensorInfo(info);
+
+ auto output = graph.AddLayer<armnn::OutputLayer>(1, "output");
+
+ //Connect up the layers
+ input->GetOutputSlot().Connect(fc->GetInputSlot(0));
+ fc->GetOutputSlot().Connect(output->GetInputSlot(0));
+
+ //Test the tensor info is correct.
+ BOOST_CHECK(fc->m_Weight->GetTensorInfo().GetDataType() == armnn::DataType::BFloat16);
+
+ // Run the optimizer
+ armnn::Optimizer::Pass(graph, armnn::MakeOptimizations(ConvertConstantsBFloatToFloat()));
+
+ //Test the tensor info is correct.
+ BOOST_CHECK(fc->m_Weight->GetTensorInfo().GetDataType() == armnn::DataType::Float32);
+
+ // Now test the data matches float32 data
+ float* data = fc->m_Weight->GetTensor<float>();
+ BOOST_CHECK(data[0] == 0.0f);
+ BOOST_CHECK(data[1] == -1.0f);
+ BOOST_CHECK(data[2] == 3.796875f);
+ BOOST_CHECK(data[3] == 3.1072295E29f);
+ BOOST_CHECK(data[4] == 9.131327E-10f);
+ BOOST_CHECK(data[5] == -3.796875f);
+ BOOST_CHECK(data[6] == -3.1072295E29f);
+ BOOST_CHECK(data[7] == -9.131327E-10f);
+}
+
+BOOST_AUTO_TEST_SUITE_END() \ No newline at end of file
diff --git a/src/armnn/test/optimizations/Fp32NetworkToBf16ConverterTests.cpp b/src/armnn/test/optimizations/Fp32NetworkToBf16ConverterTests.cpp
new file mode 100644
index 0000000000..90a15487ac
--- /dev/null
+++ b/src/armnn/test/optimizations/Fp32NetworkToBf16ConverterTests.cpp
@@ -0,0 +1,45 @@
+//
+// Copyright © 2020 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#include "../TestUtils.hpp"
+
+#include <Optimizer.hpp>
+
+#include <boost/test/unit_test.hpp>
+
+BOOST_AUTO_TEST_SUITE(Optimizer)
+using namespace armnn::optimizations;
+
+BOOST_AUTO_TEST_CASE(Fp32NetworkToBf16OptimizationTest)
+{
+ armnn::Graph graph;
+
+ const armnn::TensorInfo infoFP32({ 2, 2, 1, 3 }, armnn::DataType::Float32);
+
+ // Create the simple test network
+ auto input = graph.AddLayer<armnn::InputLayer>(0, "input");
+ input->GetOutputSlot().SetTensorInfo(infoFP32);
+
+ auto floor = graph.AddLayer<armnn::FloorLayer>("floor");
+ floor->GetOutputSlot().SetTensorInfo(infoFP32);
+
+ auto output = graph.AddLayer<armnn::OutputLayer>(1, "output");
+
+ // Connect up the layers
+ input->GetOutputSlot().Connect(floor->GetInputSlot(0));
+ floor->GetOutputSlot().Connect(output->GetInputSlot(0));
+
+ BOOST_TEST(CheckSequence(graph.cbegin(), graph.cend(), &IsLayerOfType<armnn::InputLayer>,
+ &IsLayerOfType<armnn::FloorLayer>, &IsLayerOfType<armnn::OutputLayer>));
+
+ // Run the optimizer
+ armnn::Optimizer::Pass(graph, armnn::MakeOptimizations(Fp32NetworkToBf16Converter()));
+
+ BOOST_TEST(CheckSequence(graph.cbegin(), graph.cend(), &IsLayerOfType<armnn::InputLayer>,
+ &IsLayerOfType<armnn::ConvertFp32ToBf16Layer>, &IsLayerOfType<armnn::FloorLayer>,
+ &IsLayerOfType<armnn::ConvertBf16ToFp32Layer>, &IsLayerOfType<armnn::OutputLayer>));
+}
+
+BOOST_AUTO_TEST_SUITE_END() \ No newline at end of file