src/armnn/QuantizerVisitor.hpp


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182

//
// Copyright © 2017 Arm Ltd. All rights reserved.
// SPDX-License-Identifier: MIT
//

#pragma once

#include "armnn/LayerVisitorBase.hpp"
#include "StaticRangeVisitor.hpp"
#include "NetworkQuantizationScheme.hpp"

#include <armnn/INetwork.hpp>
#include <armnn/Types.hpp>
#include <armnnQuantizer/INetworkQuantizer.hpp>

#include <unordered_map>

namespace armnn
{

// Forward declaration
class StaticRangeVisitor;

/// Visitor object for quantizing layers in a network
class QuantizerVisitor : public LayerVisitorBase<VisitorThrowingPolicy>
{
public:
    QuantizerVisitor(const RangeTracker& rangeTracker,
                     const IQuantizationScheme* quantizationScheme,
                     bool preserveType = false);

    ~QuantizerVisitor() = default;

    /// Functions to quantize the individual layers, overridden from ILayerVisitor
    void VisitActivationLayer(const IConnectableLayer* layer,
                              const ActivationDescriptor& activationDescriptor,
                              const char* name = nullptr) override;

    void VisitAdditionLayer(const IConnectableLayer* layer, const char* name = nullptr) override;

    void VisitBatchNormalizationLayer(const IConnectableLayer* layer,
                                      const BatchNormalizationDescriptor& desc,
                                      const ConstTensor& mean,
                                      const ConstTensor& variance,
                                      const ConstTensor& beta,
                                      const ConstTensor& gamma,
                                      const char* name = nullptr) override;

    void VisitBatchToSpaceNdLayer(const IConnectableLayer* layer,
                                  const BatchToSpaceNdDescriptor& batchToSpaceNdDescriptor,
                                  const char* name = nullptr) override;

    void VisitConcatLayer(const IConnectableLayer* layer,
                          const OriginsDescriptor& originsDescriptor,
                          const char* name = nullptr) override;

    void VisitConstantLayer(const IConnectableLayer* layer,
                            const ConstTensor& input,
                            const char* name = nullptr) override;

    void VisitConvolution2dLayer(const IConnectableLayer* layer,
                                 const Convolution2dDescriptor& convolution2dDescriptor,
                                 const ConstTensor& weights,
                                 const Optional<ConstTensor>& biases,
                                 const char* name = nullptr) override;

    void VisitDepthwiseConvolution2dLayer(const IConnectableLayer* layer,
                                          const DepthwiseConvolution2dDescriptor& desc,
                                          const ConstTensor& weights,
                                          const Optional<ConstTensor>& biases,
                                          const char* name = nullptr) override;

    void VisitFullyConnectedLayer(const IConnectableLayer *layer,
                                  const FullyConnectedDescriptor& desc,
                                  const ConstTensor& weights,
                                  const Optional<ConstTensor>& biases,
                                  const char *name = nullptr)  override;

    void VisitInputLayer(const IConnectableLayer* layer, LayerBindingId id, const char* name = nullptr) override;

    void VisitMeanLayer(const IConnectableLayer* layer,
                        const MeanDescriptor& meanDescriptor,
                        const char* name = nullptr) override;

    void VisitMultiplicationLayer(const IConnectableLayer* layer,
                                  const char* name = nullptr) override;

    void VisitNormalizationLayer(const armnn::IConnectableLayer* layer,
                                 const armnn::NormalizationDescriptor& normalizationDescriptor,
                                 const char* name = nullptr) override;

    void VisitOutputLayer(const IConnectableLayer* layer, LayerBindingId id, const char* name = nullptr)  override;

    void VisitPadLayer(const IConnectableLayer*,
                       const PadDescriptor&,
                       const char* name = nullptr) override;

    void VisitPermuteLayer(const IConnectableLayer* layer,
                           const PermuteDescriptor& permuteDescriptor,
                           const char* name = nullptr) override;

    void VisitPooling2dLayer(const IConnectableLayer* layer,
                             const Pooling2dDescriptor& pooling2dDescriptor,
                             const char* name = nullptr) override;

    void VisitPreluLayer(const IConnectableLayer* layer,
                         const char* name = nullptr) override;

    void VisitReshapeLayer(const IConnectableLayer* layer,
                           const ReshapeDescriptor& reshapeDescriptor,
                           const char* name = nullptr) override;

    void VisitResizeBilinearLayer(const IConnectableLayer* layer,
                                  const ResizeBilinearDescriptor& resizeDesc,
                                  const char* name = nullptr) override;

    void VisitRsqrtLayer(const IConnectableLayer*,
                         const char* name = nullptr) override;

    void VisitSoftmaxLayer(const IConnectableLayer* layer,
                           const SoftmaxDescriptor& softmaxDescriptor,
                           const char* name = nullptr) override;

    void VisitSpaceToBatchNdLayer(const IConnectableLayer* layer,
                                  const SpaceToBatchNdDescriptor& spaceToBatchNdDescriptor,
                                  const char* name = nullptr) override;

    void VisitSpaceToDepthLayer(const IConnectableLayer* layer,
                                const SpaceToDepthDescriptor& spaceToDepthDescriptor,
                                const char* name = nullptr) override;

    void VisitSplitterLayer(const IConnectableLayer* layer,
                            const SplitterDescriptor& splitterDescriptor,
                            const char* name = nullptr) override;

    void VisitStridedSliceLayer(const IConnectableLayer* layer,
                                const StridedSliceDescriptor& stridedSliceDescriptor,
                                const char* name = nullptr) override;

    void VisitSubtractionLayer(const IConnectableLayer* layer,
                               const char* name = nullptr) override;

    void VisitTransposeConvolution2dLayer(const IConnectableLayer* layer,
                                          const TransposeConvolution2dDescriptor& descriptor,
                                          const ConstTensor& weights,
                                          const Optional<ConstTensor>& biases,
                                          const char* name = nullptr) override;

    /// Extract the quantized network
    INetworkPtr RetrieveFinalNetwork() { return std::move(m_QuantizedNetwork); }

private:
    /// Connects the layer to preceeding layers and sets the quantization parameters based on recorded ranges
    void SetQuantizedInputConnections(const IConnectableLayer* srcLayer, IConnectableLayer* quantizedLayer);

    /// Record the guids so we can easily find the layers later
    void RecordLayer(const IConnectableLayer* srcLayer, IConnectableLayer* qLayer);

    /// Sets the bias quantization scale based on input and weight scales
    ConstTensor CreateQuantizedBias(const IConnectableLayer* srcLayer,
                                    const ConstTensor& weights,
                                    const Optional<ConstTensor>& biases,
                                    std::vector<int32_t>& weightsBacking);

    /// Reference to the static range visitor used to retrieve the quantization ranges
    const RangeTracker& m_Ranges;

    /// Quantized version of the model we are building up
    INetworkPtr m_QuantizedNetwork;

    /// Mapping from input network guids to quantized network guids
    std::unordered_map<LayerGuid, LayerGuid> m_OriginalToQuantizedGuidMap;

    /// Mapping from guid to layer in quantized network
    std::unordered_map<LayerGuid, IConnectableLayer*> m_QuantizedGuidToLayerMap;

    const IQuantizationScheme* m_QuantizationScheme;

    const bool m_PreserveType;
};

} //namespace armnn