// // Copyright © 2022 Arm Ltd and Contributors. All rights reserved. // SPDX-License-Identifier: MIT // #pragma once #include "Optimization.hpp" #include "NetworkUtils.hpp" namespace armnn { namespace optimizations { class ConvertConstDequantisationLayersToConstLayersImpl { public: void Run(Graph& graph, InputSlot& connection) const { Layer& base = connection.GetConnectedOutputSlot()->GetOwningLayer(); Layer& child = connection.GetOwningLayer(); ARMNN_ASSERT(base.GetType() == LayerType::Constant); ARMNN_ASSERT(child.GetType() == LayerType::Dequantize); ReplaceConstDequantisationLayer(graph, PolymorphicDowncast(&base), PolymorphicDowncast(&child)); } protected: ConvertConstDequantisationLayersToConstLayersImpl() = default; ~ConvertConstDequantisationLayersToConstLayersImpl() = default; private: static void ReplaceConstDequantisationLayer(Graph& graph, ConstantLayer* constantLayer, DequantizeLayer* dequantizeLayer) { IgnoreUnused(graph); /** * This optimisation is to find situations where a constant set of inputs is being provided to a Dequantization * layer. In this case we don't want the overhead of Dequantizing the values on every inference, instead we * want to Dequantize them once and store them in a Const layer to be used everytime as they will not change. */ TensorInfo constantInfo = constantLayer->GetOutputSlot(0).GetTensorInfo(); TensorInfo inputDequantizeInfo = dequantizeLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(); TensorInfo outputDequantizeInfo = dequantizeLayer->GetOutputSlot(0).GetTensorInfo(); ARMNN_ASSERT(constantLayer->GetNumOutputSlots() == 1); auto numConnections = constantLayer->GetOutputSlot(0).GetNumConnections(); std::vector newValues(outputDequantizeInfo.GetNumElements()); if (constantInfo.GetDataType() == DataType::Float16 && inputDequantizeInfo.GetDataType() == DataType::Float16 && outputDequantizeInfo.GetDataType() == DataType::Float32) { armnnUtils::FloatingPointConverter::ConvertFloat16To32(constantLayer->m_LayerOutput->Map(true), outputDequantizeInfo.GetNumElements(), newValues.data()); } else if (constantInfo.GetDataType() == DataType::QAsymmS8 && inputDequantizeInfo.GetDataType() == DataType::QAsymmS8 && outputDequantizeInfo.GetDataType() == DataType::Float32) { ConvertInt8To32(constantLayer->m_LayerOutput->Map(true), outputDequantizeInfo.GetNumElements(), newValues.data()); } TensorInfo newInfo = outputDequantizeInfo; newInfo.SetConstant(true); ConstTensor newInput(newInfo, newValues); constantLayer->m_LayerOutput.reset(new ScopedTensorHandle(newInput)); // Moves connections in dequantize output to the constant layer. // Dequantize layer will be removed if left unconnected. dequantizeLayer->GetOutputSlot().MoveAllConnections(constantLayer->GetOutputSlot()); // Updating the output tensor constantLayer->GetOutputSlot(0).SetTensorInfo(newInfo); ARMNN_ASSERT(constantLayer->GetOutputSlot(0).GetTensorInfo().IsConstant() == true); // Set isConstant to true in all input tensor infos where constantLayer is now connected to for (unsigned int i = numConnections; i < constantLayer->GetOutputSlot(0).GetNumConnections(); ++i) { auto info = constantLayer->GetOutputSlot(0).GetConnection(i)->GetOwningLayer().GetInputSlot(0) .GetConnectedOutputSlot()->GetTensorInfo(); info.SetConstant(); constantLayer->GetOutputSlot(0).GetConnection(i)->GetOwningLayer().GetInputSlot(0) .GetConnectedOutputSlot()->SetTensorInfo(info); } } static void ConvertInt8To32(const void* srcInt8Buffer, size_t numElements, float* dstFloat32Buffer) { ARMNN_ASSERT(srcInt8Buffer != nullptr); ARMNN_ASSERT(dstFloat32Buffer != nullptr); const auto* pInt8 = static_cast(srcInt8Buffer); for (size_t i = 0; i < numElements; ++i) { dstFloat32Buffer[i] = pInt8[i]; } } }; using ConvertConstDequantisationLayersToConstLayers = OptimizeForConnection; } // namespace optimizations } // namespace armnn