src/armnn/optimizations/ConvertConstDequantisationLayersToConstLayers.hpp


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176

//
// Copyright © 2022 Arm Ltd and Contributors. All rights reserved.
// SPDX-License-Identifier: MIT
//
#pragma once

#include "Optimization.hpp"
#include "NetworkUtils.hpp"

#include <armnn/Logging.hpp>
#include <armnnUtils/Permute.hpp>

namespace armnn
{
namespace optimizations
{

class ConvertConstDequantisationLayersToConstLayersImpl
{
public:
    void Run(Graph& graph, InputSlot& connection) const
    {
        Layer& base = connection.GetConnectedOutputSlot()->GetOwningLayer();
        Layer& child = connection.GetOwningLayer();

        ARMNN_ASSERT(base.GetType() == LayerType::Constant);
        ARMNN_ASSERT(child.GetType() == LayerType::Dequantize);

        ReplaceConstDequantisationLayer(graph,
                                        PolymorphicDowncast<ConstantLayer*>(&base),
                                        PolymorphicDowncast<DequantizeLayer*>(&child));

    }
protected:
    ConvertConstDequantisationLayersToConstLayersImpl() = default;
    ~ConvertConstDequantisationLayersToConstLayersImpl() = default;
private:

    static void ReplaceConstDequantisationLayer(Graph&,
                                                ConstantLayer* constantLayer,
                                                DequantizeLayer* dequantizeLayer)
    {
        ARMNN_LOG(info) << "ConvertConstDequantisationLayersToConstLayersImpl::ReplaceConstDequantisationLayer()";
        /**
         * This optimisation is to find situations where a constant set of inputs is being provided to a Dequantization
         * layer. In this case we don't want the overhead of Dequantizing the values on every inference, instead we
         * want to Dequantize them once and store them in a Const layer to be used everytime as they will not change.
         */
        TensorInfo constantInfo = constantLayer->GetOutputSlot(0).GetTensorInfo();
        TensorInfo inputDequantizeInfo = dequantizeLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo();
        TensorInfo outputDequantizeInfo = dequantizeLayer->GetOutputSlot(0).GetTensorInfo();

        bool requiresPermute = false;

        auto connection = dequantizeLayer->GetOutputSlot(0).GetConnection(0);
        if (connection)
        {
            if (connection->GetOwningLayer().GetType() == LayerType::Convolution2d)
            {
                /**
                 * ArmNN does not currently support non-fixed weights or bias
                 * The NNAPI filter is always OHWI [depth_out, filter_height, filter_width, depth_in]
                 * but ArmNN expects the filter's height and width indices to match the input's height
                 * and width indices so we permute it to OIHW if the DataLayout is NCHW
                 */
                ARMNN_LOG(info) << "ConvertConstDequantisationLayersToConstLayersImpl:: Connected to "
                                   "Convolution layer.";
                auto conv2dLayer = PolymorphicDowncast<Convolution2dLayer*>(&connection->GetOwningLayer());
                if (conv2dLayer->GetParameters().m_DataLayout == DataLayout::NCHW)
                {
                    ARMNN_LOG(info) << "ConvertConstDequantisationLayersToConstLayersImpl:: Connected to "
                                        "Convolution layer and requires permute on weights. ";
                    requiresPermute = true;
                }
            }
        }

        ARMNN_ASSERT(constantLayer->GetNumOutputSlots() == 1);
        auto numConnections = constantLayer->GetOutputSlot(0).GetNumConnections();

        ARMNN_LOG(info) << "constantInfo datatype:" << armnn::GetDataTypeName(constantInfo.GetDataType())
           << "inputDequantizeInfo datatype:" << armnn::GetDataTypeName(inputDequantizeInfo.GetDataType())
           << "outputDequantizeInfo datatype:" << armnn::GetDataTypeName(outputDequantizeInfo.GetDataType());

        std::vector<float> newValues(outputDequantizeInfo.GetNumElements());
        if (constantInfo.GetDataType() == DataType::Float16 &&
            inputDequantizeInfo.GetDataType() == DataType::Float16 &&
            outputDequantizeInfo.GetDataType() == DataType::Float32)
        {
            ARMNN_LOG(info) << "ConvertConstDequantisationLayersToConstLayersImpl:: Converting FP16 -> FP32";
            armnnUtils::FloatingPointConverter::ConvertFloat16To32(constantLayer->m_LayerOutput->Map(true),
                                                                   outputDequantizeInfo.GetNumElements(),
                                                                   newValues.data());
        }
        else if (((constantInfo.GetDataType() == DataType::QAsymmS8
                  && inputDequantizeInfo.GetDataType() == DataType::QAsymmS8)
                  || (constantInfo.GetDataType() == DataType::QSymmS8
                      && inputDequantizeInfo.GetDataType() == DataType::QSymmS8)) &&
                outputDequantizeInfo.GetDataType() == DataType::Float32)
        {
            ARMNN_LOG(info) << "ConvertConstDequantisationLayersToConstLayersImpl:: Converting INT8 -> FP32";
            ConvertInt8To32(constantLayer->m_LayerOutput->Map(true),
                            outputDequantizeInfo.GetNumElements(),
                            inputDequantizeInfo.GetQuantizationScale(),
                            inputDequantizeInfo.GetQuantizationOffset(),
                            newValues.data());
        }

        TensorInfo newInfo = outputDequantizeInfo;
        newInfo.SetConstant(true);
        if (requiresPermute)
        {
            ARMNN_LOG(info) << "ConvertConstDequantisationLayersToConstLayersImpl:: Permuting the constant data.";
            const PermutationVector OHWIToOIHW = {0, 2, 3, 1};
            std::vector<float> permutedValues(outputDequantizeInfo.GetNumElements());
            armnnUtils::Permute(outputDequantizeInfo.GetShape(), OHWIToOIHW,
                                newValues.data(), permutedValues.data(),
                                GetDataTypeSize(outputDequantizeInfo.GetDataType()));
            ConstTensor newInput(newInfo, permutedValues);
            constantLayer->m_LayerOutput.reset(new ScopedTensorHandle(newInput));
        }
        else
        {
            ConstTensor newInput(newInfo, newValues);
            constantLayer->m_LayerOutput.reset(new ScopedTensorHandle(newInput));
        }

        // Moves connections in dequantize output to the constant layer.
        // Dequantize layer will be removed if left unconnected.
        dequantizeLayer->GetOutputSlot().MoveAllConnections(constantLayer->GetOutputSlot());

        // Updating the output tensor
        constantLayer->GetOutputSlot(0).SetTensorInfo(newInfo);
        ARMNN_ASSERT(constantLayer->GetOutputSlot(0).GetTensorInfo().IsConstant() == true);

        // Set isConstant to true in all input tensor infos where constantLayer is now connected to
        for (unsigned int i = numConnections; i < constantLayer->GetOutputSlot(0).GetNumConnections(); ++i)
        {
            auto info = constantLayer->GetOutputSlot(0).GetConnection(i)->GetOwningLayer().GetInputSlot(0)
                    .GetConnectedOutputSlot()->GetTensorInfo();
            info.SetConstant();
            constantLayer->GetOutputSlot(0).GetConnection(i)->GetOwningLayer().GetInputSlot(0)
                    .GetConnectedOutputSlot()->SetTensorInfo(info);
        }
    }


static void ConvertInt8To32(const void* srcInt8Buffer,
                            size_t numElements,
                            const float scale,
                            const int32_t offset,
                            float* dstFloat32Buffer)
{
    ARMNN_ASSERT(srcInt8Buffer != nullptr);
    ARMNN_ASSERT(dstFloat32Buffer != nullptr);

    ARMNN_LOG(info) << "ConvertConstDequantisationLayersToConstLayersImpl:: scale: " << scale;
    ARMNN_LOG(info) << "ConvertConstDequantisationLayersToConstLayersImpl:: offset: " << offset;

    const auto* pInt8 = static_cast<const int8_t*>(srcInt8Buffer);

    for (size_t i = 0; i < numElements; ++i)
    {
        dstFloat32Buffer[i] = static_cast<float>(pInt8[i] - offset) * scale;
    }
}

};

using ConvertConstDequantisationLayersToConstLayers
    = OptimizeForConnection<ConstantLayer,
                            DequantizeLayer,
                            ConvertConstDequantisationLayersToConstLayersImpl>;

} // namespace optimizations
} // namespace armnn