plain/23.05/_convert_const_dequantisation_layers_to_const_layers_8hpp_source.xhtml

//

// Copyright © 2022 Arm Ltd and Contributors. All rights reserved.

// SPDX-License-Identifier: MIT

//

#pragma once


#include "Optimization.hpp"

#include "NetworkUtils.hpp"


#include <armnn/Logging.hpp>

#include <armnnUtils/Permute.hpp>


namespace armnn

{

namespace optimizations

{


class ConvertConstDequantisationLayersToConstLayersImpl

{

public:

    void Run(Graph& graph, InputSlot& connection) const

    {

        Layer& base = connection.GetConnectedOutputSlot()->GetOwningLayer();

        Layer& child = connection.GetOwningLayer();


        ARMNN_ASSERT(base.GetType() == LayerType::Constant);

        ARMNN_ASSERT(child.GetType() == LayerType::Dequantize);


        ReplaceConstDequantisationLayer(graph,

                                        PolymorphicDowncast<ConstantLayer*>(&base),

                                        PolymorphicDowncast<DequantizeLayer*>(&child));


    }

protected:

    ConvertConstDequantisationLayersToConstLayersImpl() = default;

    ~ConvertConstDequantisationLayersToConstLayersImpl() = default;

private:


    static void ReplaceConstDequantisationLayer(Graph&,

                                                ConstantLayer* constantLayer,

                                                DequantizeLayer* dequantizeLayer)

    {

        ARMNN_LOG(info) << "ConvertConstDequantisationLayersToConstLayersImpl::ReplaceConstDequantisationLayer()";

        /**

         * This optimisation is to find situations where a constant set of inputs is being provided to a Dequantization

         * layer. In this case we don't want the overhead of Dequantizing the values on every inference, instead we

         * want to Dequantize them once and store them in a Const layer to be used everytime as they will not change.

         */

        TensorInfo constantInfo = constantLayer->GetOutputSlot(0).GetTensorInfo();

        TensorInfo inputDequantizeInfo = dequantizeLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo();

        TensorInfo outputDequantizeInfo = dequantizeLayer->GetOutputSlot(0).GetTensorInfo();


        bool requiresPermute = false;


        auto connection = dequantizeLayer->GetOutputSlot(0).GetConnection(0);

        if (connection)

        {

            if (connection->GetOwningLayer().GetType() == LayerType::Convolution2d)

            {

                /**

                 * ArmNN does not currently support non-fixed weights or bias

                 * The NNAPI filter is always OHWI [depth_out, filter_height, filter_width, depth_in]

                 * but ArmNN expects the filter's height and width indices to match the input's height

                 * and width indices so we permute it to OIHW if the DataLayout is NCHW

                 */

                ARMNN_LOG(info) << "ConvertConstDequantisationLayersToConstLayersImpl:: Connected to "

                                   "Convolution layer.";

                auto conv2dLayer = PolymorphicDowncast<Convolution2dLayer*>(&connection->GetOwningLayer());

                if (conv2dLayer->GetParameters().m_DataLayout == DataLayout::NCHW)

                {

                    ARMNN_LOG(info) << "ConvertConstDequantisationLayersToConstLayersImpl:: Connected to "

                                        "Convolution layer and requires permute on weights. ";

                    requiresPermute = true;

                }

            }

        }


        ARMNN_ASSERT(constantLayer->GetNumOutputSlots() == 1);

        auto numConnections = constantLayer->GetOutputSlot(0).GetNumConnections();


        ARMNN_LOG(info) << "constantInfo datatype:" << armnn::GetDataTypeName(constantInfo.GetDataType())

           << "inputDequantizeInfo datatype:" << armnn::GetDataTypeName(inputDequantizeInfo.GetDataType())

           << "outputDequantizeInfo datatype:" << armnn::GetDataTypeName(outputDequantizeInfo.GetDataType());


        std::vector<float> newValues(outputDequantizeInfo.GetNumElements());

        if (constantInfo.GetDataType() == DataType::Float16 &&

            inputDequantizeInfo.GetDataType() == DataType::Float16 &&

            outputDequantizeInfo.GetDataType() == DataType::Float32)

        {

            ARMNN_LOG(info) << "ConvertConstDequantisationLayersToConstLayersImpl:: Converting FP16 -> FP32";

            armnnUtils::FloatingPointConverter::ConvertFloat16To32(constantLayer->m_LayerOutput->Map(true),

                                                                   outputDequantizeInfo.GetNumElements(),

                                                                   newValues.data());

        }

        else if (((constantInfo.GetDataType() == DataType::QAsymmS8

                  && inputDequantizeInfo.GetDataType() == DataType::QAsymmS8)

                  || (constantInfo.GetDataType() == DataType::QSymmS8

                      && inputDequantizeInfo.GetDataType() == DataType::QSymmS8)) &&

                outputDequantizeInfo.GetDataType() == DataType::Float32)

        {

            ARMNN_LOG(info) << "ConvertConstDequantisationLayersToConstLayersImpl:: Converting INT8 -> FP32";

            ConvertInt8To32(constantLayer->m_LayerOutput->Map(true),

                            outputDequantizeInfo.GetNumElements(),

                            inputDequantizeInfo.GetQuantizationScale(),

                            inputDequantizeInfo.GetQuantizationOffset(),

                            newValues.data());

        }


        TensorInfo newInfo = outputDequantizeInfo;

        newInfo.SetConstant(true);

        if (requiresPermute)

        {

            ARMNN_LOG(info) << "ConvertConstDequantisationLayersToConstLayersImpl:: Permuting the constant data.";

            const PermutationVector OHWIToOIHW = {0, 2, 3, 1};

            std::vector<float> permutedValues(outputDequantizeInfo.GetNumElements());

            armnnUtils::Permute(outputDequantizeInfo.GetShape(), OHWIToOIHW,

                                newValues.data(), permutedValues.data(),

                                GetDataTypeSize(outputDequantizeInfo.GetDataType()));

            ConstTensor newInput(newInfo, permutedValues);

            constantLayer->m_LayerOutput.reset(new ScopedTensorHandle(newInput));

        }

        else

        {

            ConstTensor newInput(newInfo, newValues);

            constantLayer->m_LayerOutput.reset(new ScopedTensorHandle(newInput));

        }


        // Moves connections in dequantize output to the constant layer.

        // Dequantize layer will be removed if left unconnected.

        dequantizeLayer->GetOutputSlot().MoveAllConnections(constantLayer->GetOutputSlot());


        // Updating the output tensor

        constantLayer->GetOutputSlot(0).SetTensorInfo(newInfo);

        ARMNN_ASSERT(constantLayer->GetOutputSlot(0).GetTensorInfo().IsConstant() == true);


        // Set isConstant to true in all input tensor infos where constantLayer is now connected to

        for (unsigned int i = numConnections; i < constantLayer->GetOutputSlot(0).GetNumConnections(); ++i)

        {

            auto info = constantLayer->GetOutputSlot(0).GetConnection(i)->GetOwningLayer().GetInputSlot(0)

                    .GetConnectedOutputSlot()->GetTensorInfo();

            info.SetConstant();

            constantLayer->GetOutputSlot(0).GetConnection(i)->GetOwningLayer().GetInputSlot(0)

                    .GetConnectedOutputSlot()->SetTensorInfo(info);

        }

    }


static void ConvertInt8To32(const void* srcInt8Buffer,

                            size_t numElements,

                            const float scale,

                            const int32_t offset,

                            float* dstFloat32Buffer)

{

    ARMNN_ASSERT(srcInt8Buffer != nullptr);

    ARMNN_ASSERT(dstFloat32Buffer != nullptr);


    ARMNN_LOG(info) << "ConvertConstDequantisationLayersToConstLayersImpl:: scale: " << scale;

    ARMNN_LOG(info) << "ConvertConstDequantisationLayersToConstLayersImpl:: offset: " << offset;


    const auto* pInt8 = static_cast<const int8_t*>(srcInt8Buffer);


    for (size_t i = 0; i < numElements; ++i)

    {

        dstFloat32Buffer[i] = static_cast<float>(pInt8[i] - offset) * scale;

    }

}


};


using ConvertConstDequantisationLayersToConstLayers

    = OptimizeForConnection<ConstantLayer,

                            DequantizeLayer,

                            ConvertConstDequantisationLayersToConstLayersImpl>;


} // namespace optimizations

} // namespace armnn