aboutsummaryrefslogtreecommitdiff
path: root/src/backends/tosaCommon/operatorMappings/QuantizeOperator.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'src/backends/tosaCommon/operatorMappings/QuantizeOperator.cpp')
-rw-r--r--src/backends/tosaCommon/operatorMappings/QuantizeOperator.cpp196
1 files changed, 112 insertions, 84 deletions
diff --git a/src/backends/tosaCommon/operatorMappings/QuantizeOperator.cpp b/src/backends/tosaCommon/operatorMappings/QuantizeOperator.cpp
index 1107add6e9..a4d7d0ed28 100644
--- a/src/backends/tosaCommon/operatorMappings/QuantizeOperator.cpp
+++ b/src/backends/tosaCommon/operatorMappings/QuantizeOperator.cpp
@@ -1,5 +1,5 @@
//
-// Copyright © 2023 Arm Ltd and Contributors. All rights reserved.
+// Copyright © 2023-2024 Arm Ltd and Contributors. All rights reserved.
// SPDX-License-Identifier: MIT
//
// Copyright © 2020 The TensorFlow Authors. All Rights Reserved.
@@ -8,6 +8,8 @@
#include "QuantizeOperator.hpp"
+#include "TosaRescaleOperatorUtils.hpp"
+
// This function is paraphrased from:
// tensorflow/compiler/mlir/tosa/transforms/legalize_common.cc from function convertQuantizeOp
TosaSerializationBasicBlock* ConvertQuantizeToTosaOperator(const Layer* layer,
@@ -19,11 +21,7 @@ TosaSerializationBasicBlock* ConvertQuantizeToTosaOperator(const Layer* layer,
ARMNN_THROW_INVALIDARG_MSG_IF_FALSE( outputs.size() == 1,
"ConvertQuantizeToTosaOperator: Quantize must have only one output" );
- std::string inputName = std::string("input0_");
- std::string outputNameZeroPoint = std::string("intermediate0_") + GetUniqueTosaMappingID();
- std::string outputNameScale = std::string("intermediate1_") + GetUniqueTosaMappingID();
- std::string outputNameMul = std::string("intermediate2_") + GetUniqueTosaMappingID();
- std::string outputNameAdd = std::string("intermediate3_") + GetUniqueTosaMappingID();
+ std::string inputName = std::string("input_");
std::string outputName = std::string("output0_");
std::string blockName = std::string("Op_QUANTIZE_block_") + GetUniqueTosaMappingID();
@@ -31,12 +29,8 @@ TosaSerializationBasicBlock* ConvertQuantizeToTosaOperator(const Layer* layer,
// using the previous and following layers so the graph is connected correctly. For validation this doesn't matter.
if(layer != nullptr)
{
- // Get the layers connected to the input slots and determine unique tensor names.
- Layer& connectedLayer = layer->GetInputSlot(0).GetConnectedOutputSlot()->GetOwningLayer();
- inputName = GenerateUniqueName(connectedLayer, 0);
-
- // Determine unique output tensor name.
- outputName = GenerateUniqueOutputName(*layer, 0);
+ inputName = GenerateUniqueInputName(layer->GetInputSlot(0));
+ outputName = GenerateUniqueOutputName(*layer);
}
const TensorInfo inputInfo = *inputs[0];
@@ -55,85 +49,119 @@ TosaSerializationBasicBlock* ConvertQuantizeToTosaOperator(const Layer* layer,
std::vector<TosaSerializationTensor*> tensors;
+ std::vector<int32_t> inputShape0 = GetTosaTensorShape(inputInfo.GetShape());
+ DType inputDType0 = ArmNNToDType(inputInfo.GetDataType());
+ bool isFloatInput = inputDType0 == DType::DType_FP16 || inputDType0 == DType::DType_FP32;
+
// Only add input tensors if connected layer is an input layer.
// As intermediate or constant tensors will be created separately.
// There also can't be duplicate tensor.
- std::vector<int32_t> inputShape0;
- DType inputDType0 = DType::DType_UNKNOWN;
- if(inputName.find("input0_") != std::string::npos)
+ if(inputName.find("input_") != std::string::npos)
{
- inputShape0 = GetTosaTensorShape(inputInfo.GetShape());
- inputDType0 = ArmNNToDType(inputInfo.GetDataType());
- ARMNN_THROW_INVALIDARG_MSG_IF_FALSE( inputDType0 == DType::DType_FP16 || inputDType0 == DType::DType_FP32,
- "ConvertQuantizeToTosaOperator: Quantize input must be of type Float" );
tensors.push_back(new TosaSerializationTensor(inputName, inputShape0, inputDType0, {}));
}
std::vector<int32_t> outputShape0 = GetTosaTensorShape(outputInfo.GetShape());
DType outputDType0 = ArmNNToDType(outputInfo.GetDataType());
- // quantize:
- // const_zeroPoint = constant(zeroPoint)
- // const_scale = constant(scale)
- // out_mul = mul(input, const_scale)
- // out_add = add(out_mul, const_zeroPoint)
- // output = cast<output_type>(out_add)
-
- // const_zeroPoint
- TosaSerializationOperator* zeroPointOp = nullptr;
- TosaSerializationTensor* zeroPointTensor = nullptr;
- CreateConstTosaOperator<float>(outputNameZeroPoint,
- zeroPoint,
- inputDType0,
- inputShape0,
- zeroPointOp,
- zeroPointTensor);
- tensors.push_back(zeroPointTensor);
-
- // const_scale
- TosaSerializationOperator *scaleOp = nullptr;
- TosaSerializationTensor* scaleTensor = nullptr;
- CreateConstTosaOperator<float>(outputNameScale,
- scale,
- inputDType0,
- inputShape0,
- scaleOp,
- scaleTensor);
- tensors.push_back(scaleTensor);
-
- // mul
- int32_t shift = 0;
- TosaMulAttribute mulAttribute(shift);
- TosaSerializationOperator* mulOp = new TosaSerializationOperator(Op_MUL,
- Attribute_MulAttribute,
- &mulAttribute,
- {inputName, outputNameScale},
- {outputNameMul});
- tensors.push_back(new TosaSerializationTensor(outputNameMul, inputShape0, inputDType0, {}));
-
- // add
- TosaSerializationOperator* addOp = new TosaSerializationOperator(Op_ADD,
- Attribute_NONE,
- nullptr,
- {outputNameMul, outputNameZeroPoint},
- {outputNameAdd});
- tensors.push_back(new TosaSerializationTensor(outputNameAdd, inputShape0, inputDType0, {}));
-
- // cast
- TosaSerializationOperator* castOp = new TosaSerializationOperator(Op_CAST,
- Attribute_NONE,
- nullptr,
- {outputNameAdd},
- {outputName});
-
- tensors.push_back(new TosaSerializationTensor(outputName, outputShape0, outputDType0, {}));
-
- // operatorInputNames/operatorOutputNames ends up being the same as
- // blockInputNames/blockOutputNames for one-to-one ArmNN to TOSA mappings
- return new TosaSerializationBasicBlock(blockName, // name
- mainName, // region name
- {zeroPointOp, scaleOp, mulOp, addOp, castOp}, // operators
- tensors, // tensors
- {inputName}, // inputs
- {outputName}); // outputs
-}
+ if (isFloatInput)
+ {
+ // quantize:
+ // const_zeroPoint = constant(zeroPoint)
+ // const_scale = constant(scale)
+ // out_mul = mul(input, const_scale)
+ // out_add = add(out_mul, const_zeroPoint)
+ // output = cast<output_type>(out_add)
+
+ std::string outputNameScale = std::string("input1_") + GetUniqueTosaMappingID();
+ std::string outputNameZeroPoint = std::string("input2_") + GetUniqueTosaMappingID();
+ std::string outputNameMul = std::string("intermediate0_") + GetUniqueTosaMappingID();
+ std::string outputNameAdd = std::string("intermediate1_") + GetUniqueTosaMappingID();
+
+ // const_zeroPoint
+ TosaSerializationOperator* zeroPointOp = nullptr;
+ TosaSerializationTensor* zeroPointTensor = nullptr;
+ CreateConstTosaOperator<float>(outputNameZeroPoint,
+ zeroPoint,
+ inputDType0,
+ inputShape0,
+ zeroPointOp,
+ zeroPointTensor);
+ tensors.push_back(zeroPointTensor);
+
+ // const_scale
+ TosaSerializationOperator *scaleOp = nullptr;
+ TosaSerializationTensor* scaleTensor = nullptr;
+ CreateConstTosaOperator<float>(outputNameScale,
+ scale,
+ inputDType0,
+ inputShape0,
+ scaleOp,
+ scaleTensor);
+ tensors.push_back(scaleTensor);
+
+ // mul
+ int32_t shift = 0;
+ TosaMulAttribute mulAttribute(shift);
+ TosaSerializationOperator* mulOp = new TosaSerializationOperator(Op_MUL,
+ Attribute_MulAttribute,
+ &mulAttribute,
+ {inputName, outputNameScale},
+ {outputNameMul});
+ tensors.push_back(new TosaSerializationTensor(outputNameMul, inputShape0, inputDType0, {}));
+
+ // add
+ TosaSerializationOperator* addOp = new TosaSerializationOperator(Op_ADD,
+ Attribute_NONE,
+ nullptr,
+ {outputNameMul, outputNameZeroPoint},
+ {outputNameAdd});
+ tensors.push_back(new TosaSerializationTensor(outputNameAdd, inputShape0, inputDType0, {}));
+
+ // cast
+ TosaSerializationOperator* castOp = new TosaSerializationOperator(Op_CAST,
+ Attribute_NONE,
+ nullptr,
+ {outputNameAdd},
+ {outputName});
+
+ tensors.push_back(new TosaSerializationTensor(outputName, outputShape0, outputDType0, {}));
+
+ // operatorInputNames/operatorOutputNames ends up being the same as
+ // blockInputNames/blockOutputNames for one-to-one ArmNN to TOSA mappings
+ return new TosaSerializationBasicBlock(blockName, // name
+ mainName, // region name
+ {zeroPointOp, scaleOp, mulOp, addOp, castOp}, // operators
+ tensors, // tensors
+ {inputName}, // inputs
+ {outputName}); // outputs
+ }
+ else
+ {
+ double scale_alpha = inputs[0]->GetQuantizationScale() / outputs[0]->GetQuantizationScale();
+ int32_t input_zp = inputs[0]->GetQuantizationOffset();
+ int32_t output_zp = outputs[0]->GetQuantizationOffset();
+
+ TosaSerializationOperator* rescaleOp = nullptr;
+ CreateRescaleTosaOperator(inputName,
+ outputName,
+ scale_alpha,
+ input_zp,
+ output_zp,
+ true,
+ true,
+ &rescaleOp);
+ tensors.push_back(new TosaSerializationTensor(outputName,
+ inputShape0,
+ outputDType0, {}));
+
+ // operatorInputNames/operatorOutputNames ends up being the same as
+ // blockInputNames/blockOutputNames for one-to-one ArmNN to TOSA mappings
+ return new TosaSerializationBasicBlock(blockName, // name
+ mainName, // region name
+ {rescaleOp}, // operators
+ tensors, // tensors
+ {inputName}, // inputs
+ {outputName}); // outputs
+ }
+} \ No newline at end of file