diff options
Diffstat (limited to 'src/armnn/optimizations/ConvertConstDequantisationLayersToConstLayers.hpp')
-rw-r--r-- | src/armnn/optimizations/ConvertConstDequantisationLayersToConstLayers.hpp | 119 |
1 files changed, 119 insertions, 0 deletions
diff --git a/src/armnn/optimizations/ConvertConstDequantisationLayersToConstLayers.hpp b/src/armnn/optimizations/ConvertConstDequantisationLayersToConstLayers.hpp new file mode 100644 index 0000000000..16314dc0d0 --- /dev/null +++ b/src/armnn/optimizations/ConvertConstDequantisationLayersToConstLayers.hpp @@ -0,0 +1,119 @@ +// +// Copyright © 2022 Arm Ltd and Contributors. All rights reserved. +// SPDX-License-Identifier: MIT +// +#pragma once + +#include "Optimization.hpp" +#include "NetworkUtils.hpp" + +namespace armnn +{ +namespace optimizations +{ + +class ConvertConstDequantisationLayersToConstLayersImpl +{ +public: + void Run(Graph& graph, InputSlot& connection) const + { + Layer& base = connection.GetConnectedOutputSlot()->GetOwningLayer(); + Layer& child = connection.GetOwningLayer(); + + ARMNN_ASSERT(base.GetType() == LayerType::Constant); + ARMNN_ASSERT(child.GetType() == LayerType::Dequantize); + + ReplaceConstDequantisationLayer(graph, + PolymorphicDowncast<ConstantLayer*>(&base), + PolymorphicDowncast<DequantizeLayer*>(&child)); + + } +protected: + ConvertConstDequantisationLayersToConstLayersImpl() = default; + ~ConvertConstDequantisationLayersToConstLayersImpl() = default; +private: + + static void ReplaceConstDequantisationLayer(Graph& graph, + ConstantLayer* constantLayer, + DequantizeLayer* dequantizeLayer) + { + IgnoreUnused(graph); + /** + * This optimisation is to find situations where a constant set of inputs is being provided to a Dequantization + * layer. In this case we don't want the overhead of Dequantizing the values on every inference, instead we + * want to Dequantize them once and store them in a Const layer to be used everytime as they will not change. + */ + TensorInfo constantInfo = constantLayer->GetOutputSlot(0).GetTensorInfo(); + TensorInfo inputDequantizeInfo = dequantizeLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(); + TensorInfo outputDequantizeInfo = dequantizeLayer->GetOutputSlot(0).GetTensorInfo(); + + ARMNN_ASSERT(constantLayer->GetNumOutputSlots() == 1); + auto numConnections = constantLayer->GetOutputSlot(0).GetNumConnections(); + + std::vector<float> newValues(outputDequantizeInfo.GetNumElements()); + if (constantInfo.GetDataType() == DataType::Float16 && + inputDequantizeInfo.GetDataType() == DataType::Float16 && + outputDequantizeInfo.GetDataType() == DataType::Float32) + { + armnnUtils::FloatingPointConverter::ConvertFloat16To32(constantLayer->m_LayerOutput->Map(true), + outputDequantizeInfo.GetNumElements(), + newValues.data()); + } + else if (constantInfo.GetDataType() == DataType::QAsymmS8 && + inputDequantizeInfo.GetDataType() == DataType::QAsymmS8 && + outputDequantizeInfo.GetDataType() == DataType::Float32) + { + ConvertInt8To32(constantLayer->m_LayerOutput->Map(true), + outputDequantizeInfo.GetNumElements(), + newValues.data()); + } + + TensorInfo newInfo = outputDequantizeInfo; + newInfo.SetConstant(true); + ConstTensor newInput(newInfo, newValues); + constantLayer->m_LayerOutput.reset(new ScopedTensorHandle(newInput)); + + // Moves connections in dequantize output to the constant layer. + // Dequantize layer will be removed if left unconnected. + dequantizeLayer->GetOutputSlot().MoveAllConnections(constantLayer->GetOutputSlot()); + + // Updating the output tensor + constantLayer->GetOutputSlot(0).SetTensorInfo(newInfo); + ARMNN_ASSERT(constantLayer->GetOutputSlot(0).GetTensorInfo().IsConstant() == true); + + // Set isConstant to true in all input tensor infos where constantLayer is now connected to + for (unsigned int i = numConnections; i < constantLayer->GetOutputSlot(0).GetNumConnections(); ++i) + { + auto info = constantLayer->GetOutputSlot(0).GetConnection(i)->GetOwningLayer().GetInputSlot(0) + .GetConnectedOutputSlot()->GetTensorInfo(); + info.SetConstant(); + constantLayer->GetOutputSlot(0).GetConnection(i)->GetOwningLayer().GetInputSlot(0) + .GetConnectedOutputSlot()->SetTensorInfo(info); + } + } + + +static void ConvertInt8To32(const void* srcInt8Buffer, + size_t numElements, + float* dstFloat32Buffer) +{ + ARMNN_ASSERT(srcInt8Buffer != nullptr); + ARMNN_ASSERT(dstFloat32Buffer != nullptr); + + const auto* pInt8 = static_cast<const int8_t*>(srcInt8Buffer); + + for (size_t i = 0; i < numElements; ++i) + { + dstFloat32Buffer[i] = pInt8[i]; + } +} + +}; + +using ConvertConstDequantisationLayersToConstLayers + = OptimizeForConnection<ConstantLayer, + DequantizeLayer, + ConvertConstDequantisationLayersToConstLayersImpl>; + +} // namespace optimizations +} // namespace armnn
\ No newline at end of file |