From 5841c740ba6bc6c8c3e96d24156dc47907af6430 Mon Sep 17 00:00:00 2001 From: Teresa Charlin Date: Sun, 15 May 2022 14:07:05 +0100 Subject: IVGCVSW-6455 Support Const + Dequantize layer and optimize it. * Support Float16 as input to Dequantize layer * Add Optimization to substitute Const+Dequantize layers with Const layer Signed-off-by: Teresa Charlin Change-Id: I58bb7e3871ca480c7b6fca93c4efb2de84e09e64 Signed-off-by: David --- CMakeLists.txt | 1 + src/armnn/Network.cpp | 1 + src/armnn/optimizations/All.hpp | 1 + ...nvertConstDequantisationLayersToConstLayers.hpp | 119 +++++++++++++++++++++ ...tConstDequantisationLayersToConstLayersTest.cpp | 105 ++++++++++++++++++ src/backends/backendsCommon/WorkloadData.cpp | 15 ++- 6 files changed, 237 insertions(+), 5 deletions(-) create mode 100644 src/armnn/optimizations/ConvertConstDequantisationLayersToConstLayers.hpp create mode 100644 src/armnn/test/optimizations/ConvertConstDequantisationLayersToConstLayersTest.cpp diff --git a/CMakeLists.txt b/CMakeLists.txt index e67c389f3d..903f06c86c 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -528,6 +528,7 @@ if(BUILD_UNIT_TESTS) src/armnn/test/ObservableTest.cpp src/armnn/test/OptimizerTests.cpp src/armnn/test/optimizations/AddBroadcastReshapeLayerTests.cpp + src/armnn/test/optimizations/ConvertConstDequantisationLayersToConstLayersTest.cpp src/armnn/test/optimizations/ConvertConstantsBFloatTests.cpp src/armnn/test/optimizations/ConvertConstantsFloatToHalfTests.cpp src/armnn/test/optimizations/ConvertConstantsHalfToFloatTests.cpp diff --git a/src/armnn/Network.cpp b/src/armnn/Network.cpp index 6a646d3cc8..9da28ceeea 100644 --- a/src/armnn/Network.cpp +++ b/src/armnn/Network.cpp @@ -1739,6 +1739,7 @@ IOptimizedNetworkPtr Optimize(const Graph& inGraph, FuseBatchNormIntoConvolution2DFloat16(), FuseBatchNormIntoDepthwiseConvolution2DFloat32(), FuseBatchNormIntoDepthwiseConvolution2DFloat16(), + ConvertConstDequantisationLayersToConstLayers(), RedirectMembersToConstantInputs())); // If Fp32 to Fp16 optimization is set convert Fp32 network to Fp16 diff --git a/src/armnn/optimizations/All.hpp b/src/armnn/optimizations/All.hpp index 38c4ac9462..e4a1f33e08 100644 --- a/src/armnn/optimizations/All.hpp +++ b/src/armnn/optimizations/All.hpp @@ -7,6 +7,7 @@ #include "AddBroadcastReshapeLayer.hpp" #include "AddDebug.hpp" #include "ConvertConstants.hpp" +#include "ConvertConstDequantisationLayersToConstLayers.hpp" #include "ConvertFp32NetworkToBf16.hpp" #include "ConvertFp32NetworkToFp16.hpp" #include "FoldPadIntoLayer2d.hpp" diff --git a/src/armnn/optimizations/ConvertConstDequantisationLayersToConstLayers.hpp b/src/armnn/optimizations/ConvertConstDequantisationLayersToConstLayers.hpp new file mode 100644 index 0000000000..16314dc0d0 --- /dev/null +++ b/src/armnn/optimizations/ConvertConstDequantisationLayersToConstLayers.hpp @@ -0,0 +1,119 @@ +// +// Copyright © 2022 Arm Ltd and Contributors. All rights reserved. +// SPDX-License-Identifier: MIT +// +#pragma once + +#include "Optimization.hpp" +#include "NetworkUtils.hpp" + +namespace armnn +{ +namespace optimizations +{ + +class ConvertConstDequantisationLayersToConstLayersImpl +{ +public: + void Run(Graph& graph, InputSlot& connection) const + { + Layer& base = connection.GetConnectedOutputSlot()->GetOwningLayer(); + Layer& child = connection.GetOwningLayer(); + + ARMNN_ASSERT(base.GetType() == LayerType::Constant); + ARMNN_ASSERT(child.GetType() == LayerType::Dequantize); + + ReplaceConstDequantisationLayer(graph, + PolymorphicDowncast(&base), + PolymorphicDowncast(&child)); + + } +protected: + ConvertConstDequantisationLayersToConstLayersImpl() = default; + ~ConvertConstDequantisationLayersToConstLayersImpl() = default; +private: + + static void ReplaceConstDequantisationLayer(Graph& graph, + ConstantLayer* constantLayer, + DequantizeLayer* dequantizeLayer) + { + IgnoreUnused(graph); + /** + * This optimisation is to find situations where a constant set of inputs is being provided to a Dequantization + * layer. In this case we don't want the overhead of Dequantizing the values on every inference, instead we + * want to Dequantize them once and store them in a Const layer to be used everytime as they will not change. + */ + TensorInfo constantInfo = constantLayer->GetOutputSlot(0).GetTensorInfo(); + TensorInfo inputDequantizeInfo = dequantizeLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(); + TensorInfo outputDequantizeInfo = dequantizeLayer->GetOutputSlot(0).GetTensorInfo(); + + ARMNN_ASSERT(constantLayer->GetNumOutputSlots() == 1); + auto numConnections = constantLayer->GetOutputSlot(0).GetNumConnections(); + + std::vector newValues(outputDequantizeInfo.GetNumElements()); + if (constantInfo.GetDataType() == DataType::Float16 && + inputDequantizeInfo.GetDataType() == DataType::Float16 && + outputDequantizeInfo.GetDataType() == DataType::Float32) + { + armnnUtils::FloatingPointConverter::ConvertFloat16To32(constantLayer->m_LayerOutput->Map(true), + outputDequantizeInfo.GetNumElements(), + newValues.data()); + } + else if (constantInfo.GetDataType() == DataType::QAsymmS8 && + inputDequantizeInfo.GetDataType() == DataType::QAsymmS8 && + outputDequantizeInfo.GetDataType() == DataType::Float32) + { + ConvertInt8To32(constantLayer->m_LayerOutput->Map(true), + outputDequantizeInfo.GetNumElements(), + newValues.data()); + } + + TensorInfo newInfo = outputDequantizeInfo; + newInfo.SetConstant(true); + ConstTensor newInput(newInfo, newValues); + constantLayer->m_LayerOutput.reset(new ScopedTensorHandle(newInput)); + + // Moves connections in dequantize output to the constant layer. + // Dequantize layer will be removed if left unconnected. + dequantizeLayer->GetOutputSlot().MoveAllConnections(constantLayer->GetOutputSlot()); + + // Updating the output tensor + constantLayer->GetOutputSlot(0).SetTensorInfo(newInfo); + ARMNN_ASSERT(constantLayer->GetOutputSlot(0).GetTensorInfo().IsConstant() == true); + + // Set isConstant to true in all input tensor infos where constantLayer is now connected to + for (unsigned int i = numConnections; i < constantLayer->GetOutputSlot(0).GetNumConnections(); ++i) + { + auto info = constantLayer->GetOutputSlot(0).GetConnection(i)->GetOwningLayer().GetInputSlot(0) + .GetConnectedOutputSlot()->GetTensorInfo(); + info.SetConstant(); + constantLayer->GetOutputSlot(0).GetConnection(i)->GetOwningLayer().GetInputSlot(0) + .GetConnectedOutputSlot()->SetTensorInfo(info); + } + } + + +static void ConvertInt8To32(const void* srcInt8Buffer, + size_t numElements, + float* dstFloat32Buffer) +{ + ARMNN_ASSERT(srcInt8Buffer != nullptr); + ARMNN_ASSERT(dstFloat32Buffer != nullptr); + + const auto* pInt8 = static_cast(srcInt8Buffer); + + for (size_t i = 0; i < numElements; ++i) + { + dstFloat32Buffer[i] = pInt8[i]; + } +} + +}; + +using ConvertConstDequantisationLayersToConstLayers + = OptimizeForConnection; + +} // namespace optimizations +} // namespace armnn \ No newline at end of file diff --git a/src/armnn/test/optimizations/ConvertConstDequantisationLayersToConstLayersTest.cpp b/src/armnn/test/optimizations/ConvertConstDequantisationLayersToConstLayersTest.cpp new file mode 100644 index 0000000000..926ac2d26d --- /dev/null +++ b/src/armnn/test/optimizations/ConvertConstDequantisationLayersToConstLayersTest.cpp @@ -0,0 +1,105 @@ +// +// Copyright © 2022 Arm Ltd and Contributors. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "LayersFwd.hpp" +#include +#include +#include +#include +#include + +TEST_SUITE("Optimizer") +{ +using namespace armnn; +using namespace armnn::optimizations; + +TEST_CASE("ConvertConstFloat16DequantizeToConstFloat32") +{ + Graph graph; + const unsigned int shape[] = {1, 2, 2, 3}; + + const TensorInfo constTensorInfo(4, shape, DataType::Float16, 1.0, 0, true); + const TensorInfo outputDequantizeInfo(4, shape, DataType::Float32, 1.0, 0, true); + + ConstantLayer *constantLayer = graph.AddLayer("constant"); + std::vector constantValues(constTensorInfo.GetNumElements(), 4.5f); + ConstTensor constTensor(constTensorInfo, constantValues.data()); + constantLayer->m_LayerOutput = std::make_shared(constTensor); + constantLayer->GetOutputSlot().SetTensorInfo(constTensorInfo); + + DequantizeLayer *dequantizeLayer = graph.AddLayer("dequantize"); + dequantizeLayer->GetOutputSlot().SetTensorInfo(outputDequantizeInfo); + + OutputLayer *output = graph.AddLayer(0, "output"); + + // Connect up constant -> dequantize -> output + constantLayer->GetOutputSlot().Connect(dequantizeLayer->GetInputSlot(0)); + dequantizeLayer->GetOutputSlot().Connect(output->GetInputSlot(0)); + + auto checkConstantFloat16 = [](const armnn::Layer *const layer) -> bool { + return IsLayerOfType(layer) && + (layer->GetDataType() == DataType::Float16); + }; + auto checkConstantFloat32 = [](const armnn::Layer *const layer) -> bool { + return IsLayerOfType(layer) && + (layer->GetDataType() == DataType::Float32); + }; + + CHECK(CheckSequence(graph.cbegin(), graph.cend(), + checkConstantFloat16, + &IsLayerOfType, + &IsLayerOfType)); + + armnn::Optimizer::Pass(graph, MakeOptimizations(ConvertConstDequantisationLayersToConstLayers())); + + CHECK(CheckSequence(graph.cbegin(), graph.cend(), + checkConstantFloat32, + &IsLayerOfType)); +} + +TEST_CASE("ConvertConstInt8DequantizeToConstFloat32") +{ + Graph graph; + const unsigned int shape[] = {1, 2, 2, 3}; + + const TensorInfo constTensorInfo(4, shape, DataType::QAsymmS8, 1.0, 0, true); + const TensorInfo outputDequantizeInfo(4, shape, DataType::Float32, 1.0, 0, true); + + ConstantLayer *constantLayer = graph.AddLayer("constant"); + std::vector constantValues(constTensorInfo.GetNumElements(), 5); + ConstTensor constTensor(constTensorInfo, constantValues.data()); + constantLayer->m_LayerOutput = std::make_shared(constTensor); + constantLayer->GetOutputSlot().SetTensorInfo(constTensorInfo); + + DequantizeLayer *dequantizeLayer = graph.AddLayer("dequantize"); + dequantizeLayer->GetOutputSlot().SetTensorInfo(outputDequantizeInfo); + + OutputLayer *output = graph.AddLayer(0, "output"); + + // Connect up constant -> dequantize -> output + constantLayer->GetOutputSlot().Connect(dequantizeLayer->GetInputSlot(0)); + dequantizeLayer->GetOutputSlot().Connect(output->GetInputSlot(0)); + + auto checkConstantQAsymmS8 = [](const armnn::Layer *const layer) -> bool { + return IsLayerOfType(layer) && + (layer->GetDataType() == DataType::QAsymmS8); + }; + auto checkConstantFloat32 = [](const armnn::Layer *const layer) -> bool { + return IsLayerOfType(layer) && + (layer->GetDataType() == DataType::Float32); + }; + + CHECK(CheckSequence(graph.cbegin(), graph.cend(), + checkConstantQAsymmS8, + &IsLayerOfType, + &IsLayerOfType)); + + armnn::Optimizer::Pass(graph, MakeOptimizations(ConvertConstDequantisationLayersToConstLayers())); + + CHECK(CheckSequence(graph.cbegin(), graph.cend(), + checkConstantFloat32, + &IsLayerOfType)); +} +} diff --git a/src/backends/backendsCommon/WorkloadData.cpp b/src/backends/backendsCommon/WorkloadData.cpp index 2194b487d3..606821b5e5 100644 --- a/src/backends/backendsCommon/WorkloadData.cpp +++ b/src/backends/backendsCommon/WorkloadData.cpp @@ -2903,19 +2903,24 @@ void DequantizeQueueDescriptor::Validate(const WorkloadInfo& workloadInfo) const const TensorInfo& inputTensorInfo = workloadInfo.m_InputTensorInfos[0]; const TensorInfo& outputTensorInfo = workloadInfo.m_OutputTensorInfos[0]; - if (!IsQuantizedType(inputTensorInfo.GetDataType())) + std::vector inputSupportedTypes = { - throw InvalidArgumentException(descriptorName + ": Input to dequantize layer must be quantized type."); - } + DataType::QAsymmS8, + DataType::QAsymmU8, + DataType::QSymmS8, + DataType::QSymmS16, + DataType::Float16 + }; + ValidateDataTypes(inputTensorInfo, inputSupportedTypes, descriptorName); - std::vector supportedTypes = + std::vector outputSupportedTypes = { DataType::BFloat16, DataType::Float32, DataType::Float16 }; - ValidateDataTypes(outputTensorInfo, supportedTypes, descriptorName); + ValidateDataTypes(outputTensorInfo, outputSupportedTypes, descriptorName); } void MergeQueueDescriptor::Validate(const WorkloadInfo& workloadInfo) const -- cgit v1.2.1