src/backends/tosaCommon/operatorMappings/QuantizeOperator.cpp


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171

//
// Copyright © 2023-2024 Arm Ltd and Contributors. All rights reserved.
// SPDX-License-Identifier: MIT
//
// Copyright © 2020 The TensorFlow Authors. All Rights Reserved.
// SPDX-License-Identifier: Apache-2.0
//

#include "QuantizeOperator.hpp"

#include "TosaRescaleOperatorUtils.hpp"

// This function is paraphrased from:
// tensorflow/compiler/mlir/tosa/transforms/legalize_common.cc from function convertQuantizeOp
TosaSerializationBasicBlock* ConvertQuantizeToTosaOperator(const Layer* layer,
                                                           const std::vector<const TensorInfo*>& inputs,
                                                           const std::vector<const TensorInfo*>& outputs)
{
    ARMNN_THROW_INVALIDARG_MSG_IF_FALSE( inputs.size() == 1,
                                         "ConvertQuantizeToTosaOperator: Quantize must have only one input" );
    ARMNN_THROW_INVALIDARG_MSG_IF_FALSE( outputs.size() == 1,
                                         "ConvertQuantizeToTosaOperator: Quantize must have only one output" );

    std::string inputName           = std::string("input0_");
    std::string outputName          = std::string("output0_");
    std::string blockName           = std::string("Op_QUANTIZE_block_") + GetUniqueTosaMappingID();

    // If a layer is present then the block will be used for execution, so input and output names need to be determined
    // using the previous and following layers so the graph is connected correctly. For validation this doesn't matter.
    if(layer != nullptr)
    {
        // Get the layers connected to the input slots and determine unique tensor names.
        Layer& connectedLayer = layer->GetInputSlot(0).GetConnectedOutputSlot()->GetOwningLayer();
        inputName = GenerateUniqueName(connectedLayer, 0);

        // Determine unique output tensor name.
        outputName = GenerateUniqueOutputName(*layer, 0);
    }

    const TensorInfo inputInfo = *inputs[0];
    const TensorInfo outputInfo = *outputs[0];

    // Extract quantization detail from Tensor
    float zeroPoint = static_cast<float>(outputInfo.GetQuantizationOffset());
    // No per axis support in Tensorflow TOSA code
    float scale = outputInfo.GetQuantizationScale();

    // As per the Tensorflow quantization specification
    // Tensorflow TOSA code calculates quantization using multiplication by scale
    // Armnn code calculates quantization using division by scale
    // Invert scale factor passed from Armnn for tf TOSA code
    scale = (scale != 0) ?  (1 / scale) : scale;

    std::vector<TosaSerializationTensor*> tensors;

    std::vector<int32_t> inputShape0 = GetTosaTensorShape(inputInfo.GetShape());
    DType inputDType0 = ArmNNToDType(inputInfo.GetDataType());
    bool isFloatInput = inputDType0 == DType::DType_FP16 || inputDType0 == DType::DType_FP32;

    // Only add input tensors if connected layer is an input layer.
    // As intermediate or constant tensors will be created separately.
    // There also can't be duplicate tensor.
    if(inputName.find("input0_") != std::string::npos)
    {
        tensors.push_back(new TosaSerializationTensor(inputName, inputShape0, inputDType0, {}));
    }

    std::vector<int32_t> outputShape0 = GetTosaTensorShape(outputInfo.GetShape());
    DType outputDType0 = ArmNNToDType(outputInfo.GetDataType());

    if (isFloatInput)
    {
        // quantize:
        // const_zeroPoint = constant(zeroPoint)
        // const_scale = constant(scale)
        // out_mul = mul(input, const_scale)
        // out_add = add(out_mul, const_zeroPoint)
        // output = cast<output_type>(out_add)

        std::string outputNameScale     = std::string("input1_") + GetUniqueTosaMappingID();
        std::string outputNameZeroPoint = std::string("input2_") + GetUniqueTosaMappingID();
        std::string outputNameMul       = std::string("intermediate0_") + GetUniqueTosaMappingID();
        std::string outputNameAdd       = std::string("intermediate1_") + GetUniqueTosaMappingID();

        // const_zeroPoint
        TosaSerializationOperator* zeroPointOp = nullptr;
        TosaSerializationTensor* zeroPointTensor = nullptr;
        CreateConstTosaOperator<float>(outputNameZeroPoint,
                                       zeroPoint,
                                       inputDType0,
                                       inputShape0,
                                       zeroPointOp,
                                       zeroPointTensor);
        tensors.push_back(zeroPointTensor);

        // const_scale
        TosaSerializationOperator *scaleOp = nullptr;
        TosaSerializationTensor* scaleTensor = nullptr;
        CreateConstTosaOperator<float>(outputNameScale,
                                       scale,
                                       inputDType0,
                                       inputShape0,
                                       scaleOp,
                                       scaleTensor);
        tensors.push_back(scaleTensor);

        // mul
        int32_t shift = 0;
        TosaMulAttribute mulAttribute(shift);
        TosaSerializationOperator* mulOp = new TosaSerializationOperator(Op_MUL,
                                                                         Attribute_MulAttribute,
                                                                         &mulAttribute,
                                                                         {inputName, outputNameScale},
                                                                         {outputNameMul});
        tensors.push_back(new TosaSerializationTensor(outputNameMul, inputShape0, inputDType0, {}));

        // add
        TosaSerializationOperator* addOp = new TosaSerializationOperator(Op_ADD,
                                                                         Attribute_NONE,
                                                                         nullptr,
                                                                         {outputNameMul, outputNameZeroPoint},
                                                                         {outputNameAdd});
        tensors.push_back(new TosaSerializationTensor(outputNameAdd, inputShape0, inputDType0, {}));

        // cast
        TosaSerializationOperator* castOp = new TosaSerializationOperator(Op_CAST,
                                                                          Attribute_NONE,
                                                                          nullptr,
                                                                          {outputNameAdd},
                                                                          {outputName});

        tensors.push_back(new TosaSerializationTensor(outputName, outputShape0, outputDType0, {}));

        // operatorInputNames/operatorOutputNames ends up being the same as
        // blockInputNames/blockOutputNames for one-to-one ArmNN to TOSA mappings
        return new TosaSerializationBasicBlock(blockName,                                       // name
                                               mainName,                                        // region name
                                               {zeroPointOp, scaleOp, mulOp, addOp, castOp},    // operators
                                               tensors,                                         // tensors
                                               {inputName},                                     // inputs
                                               {outputName});                                   // outputs
    }
    else
    {
        double scale_alpha      = inputs[0]->GetQuantizationScale() / outputs[0]->GetQuantizationScale();
        int32_t input_zp        = inputs[0]->GetQuantizationOffset();
        int32_t output_zp       = outputs[0]->GetQuantizationOffset();

        TosaSerializationOperator* rescaleOp = nullptr;
        CreateRescaleTosaOperator(inputName,
                                  outputName,
                                  scale_alpha,
                                  input_zp,
                                  output_zp,
                                  true,
                                  true,
                                  &rescaleOp);
        tensors.push_back(new TosaSerializationTensor(outputName,
                                                      inputShape0,
                                                      outputDType0, {}));

        // operatorInputNames/operatorOutputNames ends up being the same as
        // blockInputNames/blockOutputNames for one-to-one ArmNN to TOSA mappings
        return new TosaSerializationBasicBlock(blockName,      // name
                                               mainName,       // region name
                                               {rescaleOp},    // operators
                                               tensors,        // tensors
                                               {inputName},    // inputs
                                               {outputName});  // outputs
    }
}