diff options
-rw-r--r-- | Android.mk | 2 | ||||
-rw-r--r-- | CMakeLists.txt | 3 | ||||
-rw-r--r-- | include/armnn/INetwork.hpp | 14 | ||||
-rw-r--r-- | src/armnn/CompatibleTypes.hpp | 12 | ||||
-rw-r--r-- | src/armnn/Network.cpp | 92 | ||||
-rw-r--r-- | src/armnn/NetworkUtils.cpp | 87 | ||||
-rw-r--r-- | src/armnn/NetworkUtils.hpp | 6 | ||||
-rw-r--r-- | src/armnn/optimizations/All.hpp | 1 | ||||
-rw-r--r-- | src/armnn/optimizations/ConvertConstants.hpp | 54 | ||||
-rw-r--r-- | src/armnn/optimizations/ConvertFp32NetworkToBf16.hpp | 81 | ||||
-rw-r--r-- | src/armnn/test/optimizations/ConvertConstantsBFloatTests.cpp | 127 | ||||
-rw-r--r-- | src/armnn/test/optimizations/Fp32NetworkToBf16ConverterTests.cpp | 45 |
12 files changed, 518 insertions, 6 deletions
diff --git a/Android.mk b/Android.mk index 0c2a420f4b..87b1f9ac1a 100644 --- a/Android.mk +++ b/Android.mk @@ -347,8 +347,10 @@ LOCAL_SRC_FILES := \ src/armnn/test/ModelAccuracyCheckerTest.cpp \ src/armnn/test/NetworkTests.cpp \ src/armnn/test/ObservableTest.cpp \ + src/armnn/test/optimizations/ConvertConstantsBFloatTests.cpp \ src/armnn/test/optimizations/ConvertConstantsFloatToHalfTests.cpp \ src/armnn/test/optimizations/ConvertConstantsHalfToFloatTests.cpp \ + src/armnn/test/optimizations/Fp32NetworkToBf16ConverterTests.cpp \ src/armnn/test/optimizations/Fp32NetworkToFp16ConverterTests.cpp \ src/armnn/test/optimizations/InsertDebugLayerTests.cpp \ src/armnn/test/optimizations/MovePermuteUpTests.cpp \ diff --git a/CMakeLists.txt b/CMakeLists.txt index e13b132bba..605e0421ed 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -450,6 +450,7 @@ list(APPEND armnn_sources src/armnn/optimizations/AddDebug.hpp src/armnn/optimizations/All.hpp src/armnn/optimizations/ConvertConstants.hpp + src/armnn/optimizations/ConvertFp32NetworkToBf16.hpp src/armnn/optimizations/ConvertFp32NetworkToFp16.hpp src/armnn/optimizations/FoldPadIntoConvolution2d.hpp src/armnn/optimizations/MovePermuteUp.hpp @@ -626,8 +627,10 @@ if(BUILD_UNIT_TESTS) src/armnn/test/NetworkTests.cpp src/armnn/test/ObservableTest.cpp src/armnn/test/OptimizerTests.cpp + src/armnn/test/optimizations/ConvertConstantsBFloatTests.cpp src/armnn/test/optimizations/ConvertConstantsFloatToHalfTests.cpp src/armnn/test/optimizations/ConvertConstantsHalfToFloatTests.cpp + src/armnn/test/optimizations/Fp32NetworkToBf16ConverterTests.cpp src/armnn/test/optimizations/Fp32NetworkToFp16ConverterTests.cpp src/armnn/test/optimizations/InsertDebugLayerTests.cpp src/armnn/test/optimizations/MovePermuteUpTests.cpp diff --git a/include/armnn/INetwork.hpp b/include/armnn/INetwork.hpp index 84ecaebfb9..b840dd58e5 100644 --- a/include/armnn/INetwork.hpp +++ b/include/armnn/INetwork.hpp @@ -591,18 +591,28 @@ struct OptimizerOptions OptimizerOptions() : m_ReduceFp32ToFp16(false) , m_Debug(false) + , m_ReduceFp32ToBf16(false) {} - OptimizerOptions(bool reduceFp32ToFp16, bool debug) + OptimizerOptions(bool reduceFp32ToFp16, bool debug, bool reduceFp32ToBf16 = false) : m_ReduceFp32ToFp16(reduceFp32ToFp16) , m_Debug(debug) - {} + , m_ReduceFp32ToBf16(reduceFp32ToBf16) + { + if (m_ReduceFp32ToFp16 && m_ReduceFp32ToBf16) + { + throw InvalidArgumentException("BFloat16 and Float16 optimization cannot be enabled at the same time."); + } + } // Reduce Fp32 data to Fp16 for faster processing bool m_ReduceFp32ToFp16; // Add debug data for easier troubleshooting bool m_Debug; + + // Reduce Fp32 data to Bf16 for faster processing + bool m_ReduceFp32ToBf16; }; /// Create an optimized version of the network diff --git a/src/armnn/CompatibleTypes.hpp b/src/armnn/CompatibleTypes.hpp index 4332f74b23..1a663d3e27 100644 --- a/src/armnn/CompatibleTypes.hpp +++ b/src/armnn/CompatibleTypes.hpp @@ -5,8 +5,10 @@ #pragma once -#include "armnn/Types.hpp" -#include "Half.hpp" +#include <armnn/Types.hpp> + +#include <BFloat16.hpp> +#include <Half.hpp> namespace armnn { @@ -30,6 +32,12 @@ inline bool CompatibleTypes<Half>(DataType dataType) } template<> +inline bool CompatibleTypes<BFloat16>(DataType dataType) +{ + return dataType == DataType::BFloat16; +} + +template<> inline bool CompatibleTypes<uint8_t>(DataType dataType) { return dataType == DataType::Boolean || dataType == DataType::QAsymmU8; diff --git a/src/armnn/Network.cpp b/src/armnn/Network.cpp index 7a6fa8f78c..5f7719730b 100644 --- a/src/armnn/Network.cpp +++ b/src/armnn/Network.cpp @@ -248,6 +248,86 @@ OptimizationResult AttemptBackendAssignment(BackendSettings& backendSettings, return result; } } + else if (dataTypeIn == DataType::BFloat16 || dataTypeOut == DataType::BFloat16) + { + if (IWorkloadFactory::IsLayerSupported(*layer, DataType::Float32, reasonIfUnsupported) + && layer->GetType() != LayerType::ConvertFp32ToBf16 + && layer->GetType() != LayerType::ConvertBf16ToFp32) + { + // Insert BF16 -> FP32 conversion layer before current layer + std::vector<ConvertBf16ToFp32Layer*> convertBf16ToFp32Layers; + if (dataTypeIn == DataType::BFloat16) + { + convertBf16ToFp32Layers = + InsertConvertBf16ToFp32LayersBefore(graph, *layer); + } + + // Insert FP32 -> BF16 conversion layer after current layer + std::vector<ConvertFp32ToBf16Layer*> convertFp32ToBf16Layers; + if (dataTypeOut == DataType::BFloat16) + { + convertFp32ToBf16Layers = + InsertConvertFp32ToBf16LayersAfter(graph, *layer); + } + + // Assign a supported backend to the newly introduced conversion layers + auto AssignFirstSupportedBackend = [&](Layer* layer, BackendId preferredBackend) + { + bool supportedBackendFound = false; + std::string reasonIfUnsupported; + + // Try preferred backend first + layer->SetBackendId(preferredBackend); + if (IWorkloadFactory::IsLayerSupported(*layer, + EmptyOptional(), + reasonIfUnsupported)) + { + supportedBackendFound = true; + } + else + { + for (const auto& backend : availablePreferredBackends) + { + // Skip preferred backend (we already determined that it is not supported) + if (backend == preferredBackend) + { + continue; + } + + layer->SetBackendId(backend); + if (IWorkloadFactory::IsLayerSupported(*layer, + EmptyOptional(), + reasonIfUnsupported)) + { + supportedBackendFound = true; + break; + } + } + } + + return supportedBackendFound; + }; + + for (ConvertBf16ToFp32Layer* convertLayer : convertBf16ToFp32Layers) + { + if (!AssignFirstSupportedBackend(convertLayer, backend)) + { + return ReturnError(convertLayer); + } + } + + for (ConvertFp32ToBf16Layer* convertLayer : convertFp32ToBf16Layers) + { + if (!AssignFirstSupportedBackend(convertLayer, backend)) + { + return ReturnError(convertLayer); + } + } + + return result; + } + } + std::stringstream warningMsg; warningMsg << "Layer of type " << GetLayerTypeAsCString(layer->GetType()) << " is not supported on requested backend " << layer->GetBackendId().Get() @@ -898,6 +978,11 @@ IOptimizedNetworkPtr Optimize(const INetwork& inNetwork, throw armnn::InvalidArgumentException("Invoked Optimize with no backends specified"); } + if (options.m_ReduceFp32ToFp16 && options.m_ReduceFp32ToBf16) + { + throw InvalidArgumentException("BFloat16 and Float16 optimization cannot be enabled at the same time."); + } + const Network& network = *boost::polymorphic_downcast<const Network*>(&inNetwork); std::unique_ptr<Graph> graph = std::make_unique<Graph>(network.GetGraph()); @@ -934,6 +1019,13 @@ IOptimizedNetworkPtr Optimize(const INetwork& inNetwork, Optimizer::Pass(optGraph, MakeOptimizations(ConvertConstantsFloatToHalf())); } + // If Fp32 to Bf16 optimization is set convert Fp32 network to Bf16 + if (options.m_ReduceFp32ToBf16) + { + Optimizer::Pass(optGraph, MakeOptimizations(Fp32NetworkToBf16Converter())); + Optimizer::Pass(optGraph, MakeOptimizations(ConvertConstantsFloatToBFloat())); + } + // Initialize backend settings BackendSettings backendSettings(backendPreferences, deviceSpec); if (backendSettings.GetAvailablePreferredBackends().empty()) diff --git a/src/armnn/NetworkUtils.cpp b/src/armnn/NetworkUtils.cpp index 1bbeaac005..8653a08510 100644 --- a/src/armnn/NetworkUtils.cpp +++ b/src/armnn/NetworkUtils.cpp @@ -16,7 +16,7 @@ namespace armnn namespace { -void UpdateOutputSlotFp16ToFp32(OutputSlot& outputSlot) +void UpdateOutputSlotToFp32(OutputSlot& outputSlot) { const TensorInfo& origTensorInfo = outputSlot.GetTensorInfo(); TensorInfo newTensorInfo(origTensorInfo); @@ -24,19 +24,69 @@ void UpdateOutputSlotFp16ToFp32(OutputSlot& outputSlot) outputSlot.SetTensorInfo(newTensorInfo); } +void ChangeOutputBf16ToFp32(Layer& layer) +{ + for (auto&& outputSlot = layer.BeginOutputSlots(); outputSlot != layer.EndOutputSlots(); ++outputSlot) + { + if (outputSlot->GetTensorInfo().GetDataType() == DataType::BFloat16) + { + UpdateOutputSlotToFp32(*outputSlot); + } + } +} + void ChangeOutputFp16ToFp32(Layer& layer) { for (auto&& outputSlot = layer.BeginOutputSlots(); outputSlot != layer.EndOutputSlots(); ++outputSlot) { if (outputSlot->GetTensorInfo().GetDataType() == DataType::Float16) { - UpdateOutputSlotFp16ToFp32(*outputSlot); + UpdateOutputSlotToFp32(*outputSlot); } } } } // anonymous namespace +std::vector<ConvertBf16ToFp32Layer*> InsertConvertBf16ToFp32LayersBefore(Graph& graph, + Layer& layer, + bool expectCorrectInputType) +{ + std::vector<ConvertBf16ToFp32Layer*> convertLayers; + convertLayers.reserve(layer.GetNumInputSlots()); + + // Insert a ConvertBf16ToFp32Layer before each input slot + for (auto&& inputSlot = layer.BeginInputSlots(); inputSlot != layer.EndInputSlots(); ++inputSlot) + { + bool allowInsert = true; + if (expectCorrectInputType) + { + // Only insert ConvertBf16ToFp32Layer before BF16 input slots + OutputSlot* connectedOutputSlot = inputSlot->GetConnectedOutputSlot(); + allowInsert = + connectedOutputSlot && connectedOutputSlot->GetTensorInfo().GetDataType() == DataType::BFloat16; + } + + if (allowInsert) + { + const std::string name = + std::string("convert_bf16_to_fp32-" + std::to_string(inputSlot->GetSlotIndex()) + "-") + + layer.GetName(); + ConvertBf16ToFp32Layer* convertLayer = + graph.InsertNewLayer<ConvertBf16ToFp32Layer>(*inputSlot, name.c_str()); + + TensorInfo convertInfo = convertLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(); + convertInfo.SetDataType(DataType::Float32); + + convertLayer->GetOutputSlot().SetTensorInfo(convertInfo); + + convertLayers.emplace_back(convertLayer); + } + } + + return convertLayers; +} + std::vector<ConvertFp16ToFp32Layer*> InsertConvertFp16ToFp32LayersBefore(Graph& graph, Layer& layer, bool expectCorrectInputType) @@ -76,6 +126,39 @@ std::vector<ConvertFp16ToFp32Layer*> InsertConvertFp16ToFp32LayersBefore(Graph& return convertLayers; } +std::vector<ConvertFp32ToBf16Layer*> InsertConvertFp32ToBf16LayersAfter(Graph& graph, Layer& layer) +{ + const unsigned int numOutputSlots = layer.GetNumOutputSlots(); + + std::vector<ConvertFp32ToBf16Layer*> convertLayers; + convertLayers.reserve(numOutputSlots); + + // Update Bf16 output slots to FP32 on current layer + ChangeOutputBf16ToFp32(layer); + + // Insert a ConvertFp32ToBf16Layer after each FP32 output slot + for (unsigned int slotIndex = 0u; slotIndex < numOutputSlots; ++slotIndex) + { + OutputSlot& outputSlot = layer.GetOutputSlot(slotIndex); + if(outputSlot.GetTensorInfo().GetDataType() == DataType::Float32) + { + const std::string name = + std::string("convert_fp32_to_bf16-" + std::to_string(slotIndex) + "-") + layer.GetName(); + ConvertFp32ToBf16Layer* convertLayer = + graph.InsertNewLayer<ConvertFp32ToBf16Layer>(outputSlot, name.c_str()); + + TensorInfo convertInfo = convertLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(); + convertInfo.SetDataType(DataType::BFloat16); + + convertLayer->GetOutputSlot().SetTensorInfo(convertInfo); + + convertLayers.emplace_back(convertLayer); + } + } + + return convertLayers; +} + std::vector<ConvertFp32ToFp16Layer*> InsertConvertFp32ToFp16LayersAfter(Graph& graph, Layer& layer) { const unsigned int numOutputSlots = layer.GetNumOutputSlots(); diff --git a/src/armnn/NetworkUtils.hpp b/src/armnn/NetworkUtils.hpp index 38fb22350d..064545aac5 100644 --- a/src/armnn/NetworkUtils.hpp +++ b/src/armnn/NetworkUtils.hpp @@ -11,6 +11,12 @@ namespace armnn { +std::vector<ConvertBf16ToFp32Layer*> InsertConvertBf16ToFp32LayersBefore(Graph& graph, + Layer& layer, + bool expectCorrectInputType = true); + +std::vector<ConvertFp32ToBf16Layer*> InsertConvertFp32ToBf16LayersAfter(Graph& graph, Layer& layer); + std::vector<ConvertFp16ToFp32Layer*> InsertConvertFp16ToFp32LayersBefore(Graph& graph, Layer& layer, bool expectCorrectInputType = true); diff --git a/src/armnn/optimizations/All.hpp b/src/armnn/optimizations/All.hpp index 273c337665..9fc284213d 100644 --- a/src/armnn/optimizations/All.hpp +++ b/src/armnn/optimizations/All.hpp @@ -6,6 +6,7 @@ #include "AddDebug.hpp" #include "ConvertConstants.hpp" +#include "ConvertFp32NetworkToBf16.hpp" #include "ConvertFp32NetworkToFp16.hpp" #include "FoldPadIntoConvolution2d.hpp" #include "MovePermuteUp.hpp" diff --git a/src/armnn/optimizations/ConvertConstants.hpp b/src/armnn/optimizations/ConvertConstants.hpp index 5e19c7bd05..f3ebcdf5d9 100644 --- a/src/armnn/optimizations/ConvertConstants.hpp +++ b/src/armnn/optimizations/ConvertConstants.hpp @@ -13,6 +13,7 @@ #include <armnn/utility/IgnoreUnused.hpp> +#include <BFloat16.hpp> #include <Half.hpp> namespace armnn @@ -20,6 +21,27 @@ namespace armnn namespace optimizations { +struct BFloat16ToFloat32 +{ + static void Func(std::unique_ptr<ScopedCpuTensorHandle>& handle) + { + const TensorInfo& info = handle->GetTensorInfo(); + + if (info.GetDataType() == DataType::BFloat16) + { + std::vector<float> newValues(info.GetNumElements()); + + armnnUtils::FloatingPointConverter::ConvertBFloat16ToFloat32(handle->GetTensor<BFloat16>(), + info.GetNumElements(), + newValues.data()); + + TensorInfo newInfo(info.GetShape(), DataType::Float32); + ConstTensor newInput(newInfo, newValues); + handle.reset(new ScopedCpuTensorHandle(newInput)); + } + } +}; + struct Float16ToFloat32 { static void Func(std::unique_ptr<ScopedCpuTensorHandle>& handle) @@ -41,6 +63,27 @@ struct Float16ToFloat32 } }; +struct Float32ToBFloat16 +{ + static void Func(std::unique_ptr<ScopedCpuTensorHandle>& handle) + { + const TensorInfo& info = handle->GetTensorInfo(); + + if (info.GetDataType() == DataType::Float32) + { + std::vector<BFloat16> newValues(info.GetNumElements()); + + armnnUtils::FloatingPointConverter::ConvertFloat32ToBFloat16(handle->GetTensor<float>(), + info.GetNumElements(), + newValues.data()); + + TensorInfo newInfo(info.GetShape(), DataType::BFloat16); + ConstTensor newInput(newInfo, newValues); + handle.reset(new ScopedCpuTensorHandle(newInput)); + } + } +}; + struct Float32ToFloat16 { static void Func(std::unique_ptr<ScopedCpuTensorHandle>& handle) @@ -97,6 +140,17 @@ struct IsFloat16Layer } }; +struct IsBFloat16Layer +{ + static bool Test(const Layer& layer) + { + return layer.GetDataType() == DataType::BFloat16; + } +}; + +using ConvertConstantsBFloatToFloat = ConvertConstants<BFloat16ToFloat32, IsFloat32Layer>; +using ConvertConstantsFloatToBFloat = ConvertConstants<Float32ToBFloat16, IsBFloat16Layer>; + using ConvertConstantsHalfToFloat = ConvertConstants<Float16ToFloat32, IsFloat32Layer>; using ConvertConstantsFloatToHalf = ConvertConstants<Float32ToFloat16, IsFloat16Layer>; diff --git a/src/armnn/optimizations/ConvertFp32NetworkToBf16.hpp b/src/armnn/optimizations/ConvertFp32NetworkToBf16.hpp new file mode 100644 index 0000000000..d6350c3af3 --- /dev/null +++ b/src/armnn/optimizations/ConvertFp32NetworkToBf16.hpp @@ -0,0 +1,81 @@ +// +// Copyright © 2020 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// +#pragma once + +#include "Optimization.hpp" +#include "NetworkUtils.hpp" + +namespace armnn +{ +namespace optimizations +{ + +class ConvertFp32NetworkToBf16Impl +{ +public: + void Run(Graph& graph, Layer& layer) const + { + if(layer.GetType() == LayerType::Input) + { + // if the outputs of this layer are DataType::Float32 + // add a ConvertFloat32ToBFloat16 layer after each of the outputs + if (layer.GetDataType() == DataType::Float32) + { + InsertConvertFp32ToBf16LayersAfter(graph, layer); + } + } + else if (layer.GetType() == LayerType::Output) + { + // if the inputs of this layer are DataType::Float32 + // add a ConvertBFloat16ToFloat32 layer before each of the inputs + if (layer.GetDataType() == DataType::Float32) + { + // NOTE: We need to call InsertConvertBf16ToFp32LayersBefore with expectCorrectInputType = false + // here, otherwise it will expect the inputs to be DataType::BFloat16 + InsertConvertBf16ToFp32LayersBefore(graph, layer, false); + } + } + else if (layer.GetType() != LayerType::ConvertFp32ToBf16 && layer.GetType() != LayerType::ConvertBf16ToFp32) + { + // if the inputs/outputs of this layer are DataType::Float32 + // change the data type for all inputs and outputs to DataType::BFloat16 + for (auto&& input = layer.BeginInputSlots(); input != layer.EndInputSlots(); ++input) + { + // if it is connected to OutputSlot of the InputLayer do not change the DataType of connection + // InputSlots of the current layer will be updated when conversion layer is inserted after InputLayer + Layer& base = input->GetConnectedOutputSlot()->GetOwningLayer(); + if (base.GetType() != LayerType::Input) + { + TensorInfo convertInfo = input->GetConnection()->GetTensorInfo(); + if (convertInfo.GetDataType() == DataType::Float32) + { + convertInfo.SetDataType(DataType::BFloat16); + input->GetConnection()->SetTensorInfo(convertInfo); + } + } + } + + // change outputs to DataType::BFloat16 + for (auto&& output = layer.BeginOutputSlots(); output != layer.EndOutputSlots(); ++output) + { + TensorInfo convertInfo = output->GetTensorInfo(); + if (convertInfo.GetDataType() == DataType::Float32) + { + convertInfo.SetDataType(DataType::BFloat16); + output->SetTensorInfo(convertInfo); + } + } + } + } + +protected: + ConvertFp32NetworkToBf16Impl() = default; + ~ConvertFp32NetworkToBf16Impl() = default; +}; + +using Fp32NetworkToBf16Converter = OptimizeForType<Layer, ConvertFp32NetworkToBf16Impl>; + +} // namespace optimizations +} // namespace armnn diff --git a/src/armnn/test/optimizations/ConvertConstantsBFloatTests.cpp b/src/armnn/test/optimizations/ConvertConstantsBFloatTests.cpp new file mode 100644 index 0000000000..5cb89daafd --- /dev/null +++ b/src/armnn/test/optimizations/ConvertConstantsBFloatTests.cpp @@ -0,0 +1,127 @@ +// +// Copyright © 2020 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "../TestUtils.hpp" + +#include <BFloat16.hpp> +#include <Optimizer.hpp> + +#include <boost/test/unit_test.hpp> + +using namespace armnn; + +BOOST_AUTO_TEST_SUITE(Optimizer) +using namespace armnn::optimizations; + +BOOST_AUTO_TEST_CASE(ConvertConstantsFloatToBFloatTest) +{ + armnn::Graph graph; + + const armnn::TensorInfo info({ 1, 1, 1, 2 }, armnn::DataType::BFloat16); + + // Create const tensor from fp32 data + unsigned int dims[] = { 4, 2, 1, 1 }; + std::vector<float> floatWeights{ 0.0f, -1.0f, + 3.8f, // 0x40733333 Round down + 3.1055E+29f, // 0x707ADC3C Round up + 9.149516E-10f, // 0x307B7FFF Round down + -3.8f, // 0xC0733333 Round down + -3.1055E+29f, // 0xF07ADC3C Round up + -9.149516E-10f // 0xB07B7FFF Round down + }; + armnn::ConstTensor weights(armnn::TensorInfo(4, dims, armnn::DataType::Float32), floatWeights); + + // Create simple test network + auto input = graph.AddLayer<armnn::InputLayer>(0, "input"); + input->GetOutputSlot().SetTensorInfo(info); + + auto fc = graph.AddLayer<armnn::FullyConnectedLayer>(armnn::FullyConnectedDescriptor(), "fc"); + fc->m_Weight = std::make_unique<armnn::ScopedCpuTensorHandle>(weights); + fc->GetOutputSlot().SetTensorInfo(info); + + auto output = graph.AddLayer<armnn::OutputLayer>(1, "output"); + + // Connect up the layers + input->GetOutputSlot().Connect(fc->GetInputSlot(0)); + fc->GetOutputSlot().Connect(output->GetInputSlot(0)); + + // Check tensor data type before conversion + BOOST_CHECK(fc->m_Weight->GetTensorInfo().GetDataType() == armnn::DataType::Float32); + + // Run the optimizer + armnn::Optimizer::Pass(graph, armnn::MakeOptimizations(ConvertConstantsFloatToBFloat())); + + // Check tensor data type after conversion + BOOST_CHECK(fc->m_Weight->GetTensorInfo().GetDataType() == armnn::DataType::BFloat16); + + // Check whether data matches expected Bf16 data + BFloat16* data = fc->m_Weight->GetTensor<BFloat16>(); + BOOST_CHECK(data[0] == BFloat16(0.0f)); + BOOST_CHECK(data[1] == BFloat16(-1.0f)); + BOOST_CHECK(data[2] == BFloat16(3.796875f)); // 0x4073 + BOOST_CHECK(data[3] == BFloat16(3.1072295E29f)); // 0x707B + BOOST_CHECK(data[4] == BFloat16(9.131327E-10f)); // 0x307B + BOOST_CHECK(data[5] == BFloat16(-3.796875f)); // 0xC073 + BOOST_CHECK(data[6] == BFloat16(-3.1072295E29f)); // 0xF07B + BOOST_CHECK(data[7] == BFloat16(-9.131327E-10f)); // 0xB07B +} + +BOOST_AUTO_TEST_CASE(ConvertConstantsBFloatToFloatTest) +{ + armnn::Graph graph; + + const armnn::TensorInfo info({ 1, 1, 1, 2 }, armnn::DataType::Float32); + + // Create the BFloat16 precision input data + unsigned int dims[] = { 4, 2, 1, 1 }; + std::vector<float> convWeightsData{ 0.f, -1.f, + 3.796875f, // 0x4073 + 3.1072295E29f, // 0x707B + 9.131327E-10f, // 0x307B + -3.796875f, // 0xC073 + -3.1072295E29f, // 0xF07B + -9.131327E-10f // 0xB07B + }; + std::vector<uint16_t> bfWeights(8); + armnnUtils::FloatingPointConverter::ConvertFloat32ToBFloat16(convWeightsData.data(), convWeightsData.size(), + bfWeights.data()); + armnn::ConstTensor weights(armnn::TensorInfo(4, dims, armnn::DataType::BFloat16), bfWeights); + + //Create the simple test network + auto input = graph.AddLayer<armnn::InputLayer>(0, "input"); + input->GetOutputSlot().SetTensorInfo(info); + + auto fc = graph.AddLayer<armnn::FullyConnectedLayer>(armnn::FullyConnectedDescriptor(), "fc"); + fc->m_Weight = std::make_unique<armnn::ScopedCpuTensorHandle>(weights); + fc->GetOutputSlot().SetTensorInfo(info); + + auto output = graph.AddLayer<armnn::OutputLayer>(1, "output"); + + //Connect up the layers + input->GetOutputSlot().Connect(fc->GetInputSlot(0)); + fc->GetOutputSlot().Connect(output->GetInputSlot(0)); + + //Test the tensor info is correct. + BOOST_CHECK(fc->m_Weight->GetTensorInfo().GetDataType() == armnn::DataType::BFloat16); + + // Run the optimizer + armnn::Optimizer::Pass(graph, armnn::MakeOptimizations(ConvertConstantsBFloatToFloat())); + + //Test the tensor info is correct. + BOOST_CHECK(fc->m_Weight->GetTensorInfo().GetDataType() == armnn::DataType::Float32); + + // Now test the data matches float32 data + float* data = fc->m_Weight->GetTensor<float>(); + BOOST_CHECK(data[0] == 0.0f); + BOOST_CHECK(data[1] == -1.0f); + BOOST_CHECK(data[2] == 3.796875f); + BOOST_CHECK(data[3] == 3.1072295E29f); + BOOST_CHECK(data[4] == 9.131327E-10f); + BOOST_CHECK(data[5] == -3.796875f); + BOOST_CHECK(data[6] == -3.1072295E29f); + BOOST_CHECK(data[7] == -9.131327E-10f); +} + +BOOST_AUTO_TEST_SUITE_END()
\ No newline at end of file diff --git a/src/armnn/test/optimizations/Fp32NetworkToBf16ConverterTests.cpp b/src/armnn/test/optimizations/Fp32NetworkToBf16ConverterTests.cpp new file mode 100644 index 0000000000..90a15487ac --- /dev/null +++ b/src/armnn/test/optimizations/Fp32NetworkToBf16ConverterTests.cpp @@ -0,0 +1,45 @@ +// +// Copyright © 2020 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "../TestUtils.hpp" + +#include <Optimizer.hpp> + +#include <boost/test/unit_test.hpp> + +BOOST_AUTO_TEST_SUITE(Optimizer) +using namespace armnn::optimizations; + +BOOST_AUTO_TEST_CASE(Fp32NetworkToBf16OptimizationTest) +{ + armnn::Graph graph; + + const armnn::TensorInfo infoFP32({ 2, 2, 1, 3 }, armnn::DataType::Float32); + + // Create the simple test network + auto input = graph.AddLayer<armnn::InputLayer>(0, "input"); + input->GetOutputSlot().SetTensorInfo(infoFP32); + + auto floor = graph.AddLayer<armnn::FloorLayer>("floor"); + floor->GetOutputSlot().SetTensorInfo(infoFP32); + + auto output = graph.AddLayer<armnn::OutputLayer>(1, "output"); + + // Connect up the layers + input->GetOutputSlot().Connect(floor->GetInputSlot(0)); + floor->GetOutputSlot().Connect(output->GetInputSlot(0)); + + BOOST_TEST(CheckSequence(graph.cbegin(), graph.cend(), &IsLayerOfType<armnn::InputLayer>, + &IsLayerOfType<armnn::FloorLayer>, &IsLayerOfType<armnn::OutputLayer>)); + + // Run the optimizer + armnn::Optimizer::Pass(graph, armnn::MakeOptimizations(Fp32NetworkToBf16Converter())); + + BOOST_TEST(CheckSequence(graph.cbegin(), graph.cend(), &IsLayerOfType<armnn::InputLayer>, + &IsLayerOfType<armnn::ConvertFp32ToBf16Layer>, &IsLayerOfType<armnn::FloorLayer>, + &IsLayerOfType<armnn::ConvertBf16ToFp32Layer>, &IsLayerOfType<armnn::OutputLayer>)); +} + +BOOST_AUTO_TEST_SUITE_END()
\ No newline at end of file |