From 9f62d33156376c040dac9a4063dc733b95c5ad27 Mon Sep 17 00:00:00 2001 From: Narumol Prangnawarat Date: Tue, 11 Jul 2023 16:49:00 +0100 Subject: IVGCVSW-7783 Add check for FP16 infinity values * Check to round to closest finite FP16 value when convert FP32 to FP16 * Unit tests to be added Signed-off-by: Narumol Prangnawarat Change-Id: If3b982ff3030379ac33c47d4be13edb0bda679f6 --- .../ConvertConstantsFloatToHalfTests.cpp | 54 ++++++++++++++++++++-- src/armnnUtils/FloatingPointConverter.cpp | 5 ++ 2 files changed, 56 insertions(+), 3 deletions(-) diff --git a/src/armnn/test/optimizations/ConvertConstantsFloatToHalfTests.cpp b/src/armnn/test/optimizations/ConvertConstantsFloatToHalfTests.cpp index 118907e703..5961c04dc3 100644 --- a/src/armnn/test/optimizations/ConvertConstantsFloatToHalfTests.cpp +++ b/src/armnn/test/optimizations/ConvertConstantsFloatToHalfTests.cpp @@ -63,7 +63,6 @@ TEST_CASE("ConvertConstantsFloatToHalfTest") CHECK(data[3] == Half(4.0f)); } - TEST_CASE("ConvertConstantsFloatToHalfTest_constant") { armnn::Graph graph; @@ -80,7 +79,7 @@ TEST_CASE("ConvertConstantsFloatToHalfTest_constant") auto output = graph.AddLayer(1, "Output"); float expectedWeightsData[] = { 1.0f, 2.0f, 3.0f, 4.0f }; - float expectedBiasesData[] = { 2.0f, 2.0f }; + float expectedBiasesData[] = { 0.0f, 2.0f }; const armnn::TensorInfo inputInfo ({ 1, 2, 2, 3 }, armnn::DataType::Float16); const armnn::TensorInfo outputInfo ({ 1, 2, 2, 3 }, armnn::DataType::Float16); @@ -124,9 +123,58 @@ TEST_CASE("ConvertConstantsFloatToHalfTest_constant") // Check whether bias data matches expected fp16 data const Half* biasData = biases->m_LayerOutput->GetConstTensor(); - CHECK(biasData[0] == Half(2.0f)); + CHECK(biasData[0] == Half(0.0f)); CHECK(biasData[1] == Half(2.0f)); } +TEST_CASE("ConvertConstantsFloatToHalfInfinityTest") +{ + armnn::Graph graph; + + const armnn::TensorInfo info({ 1, 1, 1, 2 }, armnn::DataType::Float16); + + // Create const tensor from fp32 data + unsigned int dims[] = { 4, 1, 1, 1 }; + std::vector floatWeights{ std::numeric_limits::infinity(), + -std::numeric_limits::infinity(), + std::numeric_limits::max(), + std::numeric_limits::lowest() }; + armnn::TensorInfo weightsInfo = armnn::TensorInfo(4, dims, armnn::DataType::Float32, 0.0f, 0, true); + armnn::ConstTensor weights(weightsInfo, floatWeights); + + // Create simple test network + auto input = graph.AddLayer(0, "input"); + input->GetOutputSlot().SetTensorInfo(info); + + auto fc = graph.AddLayer(armnn::FullyConnectedDescriptor(), "fc"); + fc->GetOutputSlot().SetTensorInfo(info); + + auto weightsLayer = graph.AddLayer("weights"); + weightsLayer->m_LayerOutput = std::make_unique(weights); + weightsLayer->GetOutputSlot().SetTensorInfo(weightsInfo); + + auto output = graph.AddLayer(1, "output"); + + // Connect up the layers + input->GetOutputSlot().Connect(fc->GetInputSlot(0)); + weightsLayer->GetOutputSlot().Connect(fc->GetInputSlot(1)); + fc->GetOutputSlot().Connect(output->GetInputSlot(0)); + + // Check tensor data type before conversion + CHECK(weightsLayer->m_LayerOutput->GetTensorInfo().GetDataType() == armnn::DataType::Float32); + + // Run the optimizer + armnn::Optimizer::Pass(graph, armnn::MakeOptimizations(ConvertConstantsFloatToHalf())); + + // Check tensor data type after conversion + CHECK(weightsLayer->m_LayerOutput->GetTensorInfo().GetDataType() == armnn::DataType::Float16); + + // Check whether data matches expected fp16 data + const Half* data = weightsLayer->m_LayerOutput->GetConstTensor(); + CHECK(data[0] == std::numeric_limits::max()); + CHECK(data[1] == std::numeric_limits::lowest()); + CHECK(data[2] == std::numeric_limits::max()); + CHECK(data[3] == std::numeric_limits::lowest()); +} } \ No newline at end of file diff --git a/src/armnnUtils/FloatingPointConverter.cpp b/src/armnnUtils/FloatingPointConverter.cpp index 7a684f1eb0..024b74eea4 100644 --- a/src/armnnUtils/FloatingPointConverter.cpp +++ b/src/armnnUtils/FloatingPointConverter.cpp @@ -25,6 +25,11 @@ void FloatingPointConverter::ConvertFloat32To16(const float* srcFloat32Buffer, for (size_t i = 0; i < numElements; i++) { pHalf[i] = armnn::Half(srcFloat32Buffer[i]); + if (isinf(pHalf[i])) + { + // If the value of converted Fp16 is infinity, round to the closest finite Fp16 value. + pHalf[i] = copysign(std::numeric_limits::max(), pHalf[i]); + } } } -- cgit v1.2.1