ArmNN
 24.02
QuantizeOperator.cpp
Go to the documentation of this file.
1 //
2 // Copyright © 2023 Arm Ltd and Contributors. All rights reserved.
3 // SPDX-License-Identifier: MIT
4 //
5 // Copyright © 2020 The TensorFlow Authors. All Rights Reserved.
6 // SPDX-License-Identifier: Apache-2.0
7 //
8 
9 #include "QuantizeOperator.hpp"
10 
11 // This function is paraphrased from:
12 // tensorflow/compiler/mlir/tosa/transforms/legalize_common.cc from function convertQuantizeOp
13 TosaSerializationBasicBlock* ConvertQuantizeToTosaOperator(const Layer* layer,
14  const std::vector<const TensorInfo*>& inputs,
15  const std::vector<const TensorInfo*>& outputs)
16 {
17  ARMNN_THROW_INVALIDARG_MSG_IF_FALSE( inputs.size() == 1,
18  "ConvertQuantizeToTosaOperator: Quantize must have only one input" );
19  ARMNN_THROW_INVALIDARG_MSG_IF_FALSE( outputs.size() == 1,
20  "ConvertQuantizeToTosaOperator: Quantize must have only one output" );
21 
22  std::string inputName = std::string("input0_");
23  std::string outputNameZeroPoint = std::string("intermediate0_") + GetUniqueTosaMappingID();
24  std::string outputNameScale = std::string("intermediate1_") + GetUniqueTosaMappingID();
25  std::string outputNameMul = std::string("intermediate2_") + GetUniqueTosaMappingID();
26  std::string outputNameAdd = std::string("intermediate3_") + GetUniqueTosaMappingID();
27  std::string outputName = std::string("output0_");
28  std::string blockName = std::string("Op_QUANTIZE_block_") + GetUniqueTosaMappingID();
29 
30  // If a layer is present then the block will be used for execution, so input and output names need to be determined
31  // using the previous and following layers so the graph is connected correctly. For validation this doesn't matter.
32  if(layer != nullptr)
33  {
34  // Get the layers connected to the input slots and determine unique tensor names.
35  Layer& connectedLayer = layer->GetInputSlot(0).GetConnectedOutputSlot()->GetOwningLayer();
36  inputName = GenerateUniqueName(connectedLayer, 0);
37 
38  // Determine unique output tensor name.
39  outputName = GenerateUniqueOutputName(*layer, 0);
40  }
41 
42  const TensorInfo inputInfo = *inputs[0];
43  const TensorInfo outputInfo = *outputs[0];
44 
45  // Extract quantization detail from Tensor
46  float zeroPoint = static_cast<float>(outputInfo.GetQuantizationOffset());
47  // No per axis support in Tensorflow TOSA code
48  float scale = outputInfo.GetQuantizationScale();
49 
50  // As per the Tensorflow quantization specification
51  // Tensorflow TOSA code calculates quantization using multiplication by scale
52  // Armnn code calculates quantization using division by scale
53  // Invert scale factor passed from Armnn for tf TOSA code
54  scale = (scale != 0) ? (1 / scale) : scale;
55 
56  std::vector<TosaSerializationTensor*> tensors;
57 
58  // Only add input tensors if connected layer is an input layer.
59  // As intermediate or constant tensors will be created separately.
60  // There also can't be duplicate tensor.
61  std::vector<int32_t> inputShape0;
62  DType inputDType0 = DType::DType_UNKNOWN;
63  if(inputName.find("input0_") != std::string::npos)
64  {
65  inputShape0 = GetTosaTensorShape(inputInfo.GetShape());
66  inputDType0 = ArmNNToDType(inputInfo.GetDataType());
67  ARMNN_THROW_INVALIDARG_MSG_IF_FALSE( inputDType0 == DType::DType_FP16 || inputDType0 == DType::DType_FP32,
68  "ConvertQuantizeToTosaOperator: Quantize input must be of type Float" );
69  tensors.push_back(new TosaSerializationTensor(inputName, inputShape0, inputDType0, {}));
70  }
71 
72  std::vector<int32_t> outputShape0 = GetTosaTensorShape(outputInfo.GetShape());
73  DType outputDType0 = ArmNNToDType(outputInfo.GetDataType());
74 
75  // quantize:
76  // const_zeroPoint = constant(zeroPoint)
77  // const_scale = constant(scale)
78  // out_mul = mul(input, const_scale)
79  // out_add = add(out_mul, const_zeroPoint)
80  // output = cast<output_type>(out_add)
81 
82  // const_zeroPoint
83  TosaSerializationOperator* zeroPointOp = nullptr;
84  TosaSerializationTensor* zeroPointTensor = nullptr;
85  CreateConstTosaOperator<float>(outputNameZeroPoint,
86  zeroPoint,
87  inputDType0,
88  inputShape0,
89  zeroPointOp,
90  zeroPointTensor);
91  tensors.push_back(zeroPointTensor);
92 
93  // const_scale
94  TosaSerializationOperator *scaleOp = nullptr;
95  TosaSerializationTensor* scaleTensor = nullptr;
96  CreateConstTosaOperator<float>(outputNameScale,
97  scale,
98  inputDType0,
99  inputShape0,
100  scaleOp,
101  scaleTensor);
102  tensors.push_back(scaleTensor);
103 
104  // mul
105  int32_t shift = 0;
106  TosaMulAttribute mulAttribute(shift);
107  TosaSerializationOperator* mulOp = new TosaSerializationOperator(Op_MUL,
108  Attribute_MulAttribute,
109  &mulAttribute,
110  {inputName, outputNameScale},
111  {outputNameMul});
112  tensors.push_back(new TosaSerializationTensor(outputNameMul, inputShape0, inputDType0, {}));
113 
114  // add
115  TosaSerializationOperator* addOp = new TosaSerializationOperator(Op_ADD,
116  Attribute_NONE,
117  nullptr,
118  {outputNameMul, outputNameZeroPoint},
119  {outputNameAdd});
120  tensors.push_back(new TosaSerializationTensor(outputNameAdd, inputShape0, inputDType0, {}));
121 
122  // cast
123  TosaSerializationOperator* castOp = new TosaSerializationOperator(Op_CAST,
124  Attribute_NONE,
125  nullptr,
126  {outputNameAdd},
127  {outputName});
128 
129  tensors.push_back(new TosaSerializationTensor(outputName, outputShape0, outputDType0, {}));
130 
131  // operatorInputNames/operatorOutputNames ends up being the same as
132  // blockInputNames/blockOutputNames for one-to-one ArmNN to TOSA mappings
133  return new TosaSerializationBasicBlock(blockName, // name
134  mainName, // region name
135  {zeroPointOp, scaleOp, mulOp, addOp, castOp}, // operators
136  tensors, // tensors
137  {inputName}, // inputs
138  {outputName}); // outputs
139 }
QuantizeOperator.hpp
armnn::TensorInfo::GetQuantizationScale
float GetQuantizationScale() const
Definition: Tensor.cpp:461
armnn::TensorInfo
Definition: Tensor.hpp:152
armnn::Layer::GetInputSlot
const InputSlot & GetInputSlot(unsigned int index) const override
Get a const input slot handle by slot index.
Definition: Layer.hpp:337
armnn::Layer
Definition: Layer.hpp:230
mainName
const std::string mainName
Definition: TosaOperatorUtils.hpp:19
armnn::OutputSlot::GetOwningLayer
Layer & GetOwningLayer() const
Definition: Layer.hpp:132
ArmNNToDType
DType ArmNNToDType(const DataType &type)
Definition: TosaOperatorUtils.hpp:22
GenerateUniqueOutputName
std::string GenerateUniqueOutputName(const Layer &layer, uint32_t layerSlot)
Definition: TosaOperatorUtils.hpp:82
armnn::TensorInfo::GetDataType
DataType GetDataType() const
Definition: Tensor.hpp:200
ConvertQuantizeToTosaOperator
TosaSerializationBasicBlock * ConvertQuantizeToTosaOperator(const Layer *layer, const std::vector< const TensorInfo * > &inputs, const std::vector< const TensorInfo * > &outputs)
Definition: QuantizeOperator.cpp:13
armnn::TensorInfo::GetShape
const TensorShape & GetShape() const
Definition: Tensor.hpp:193
GenerateUniqueName
std::string GenerateUniqueName(const Layer &layer, uint32_t layerSlot)
Definition: TosaOperatorUtils.hpp:63
GetTosaTensorShape
std::vector< int32_t > GetTosaTensorShape(const TensorShape &shape)
Definition: TosaOperatorUtils.hpp:52
armnn::InputSlot::GetConnectedOutputSlot
const OutputSlot * GetConnectedOutputSlot() const
Definition: Layer.hpp:56
armnn::TensorInfo::GetQuantizationOffset
int32_t GetQuantizationOffset() const
Definition: Tensor.cpp:478
GetUniqueTosaMappingID
std::string GetUniqueTosaMappingID()
Definition: TosaOperatorUtils.hpp:100
ARMNN_THROW_INVALIDARG_MSG_IF_FALSE
#define ARMNN_THROW_INVALIDARG_MSG_IF_FALSE(_cond, _str)
Definition: Exceptions.hpp:210