aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJohn Mcloughlin <john.mcloughlin@arm.com>2024-04-23 16:47:04 +0100
committerTeresaARM <teresa.charlinreyes@arm.com>2024-04-23 17:14:09 +0000
commitceb442825b8c19e2450fa7bd43341d571a9b2eeb (patch)
tree228146ac7d94099d1c9c81d426ef4526977129be
parent5b664a84a593e7bbfc6961cd65b24f39b1b60b06 (diff)
downloadarmnn-ceb442825b8c19e2450fa7bd43341d571a9b2eeb.tar.gz
IVGCVSW-8294 Fix quantized Conv2d TOSA mapping
* TosaConv2d * TosaQuantization * TosaRescale Signed-off-by: John Mcloughlin <john.mcloughlin@arm.com> Signed-off-by: Teresa Charlin <teresa.charlinreyes@arm.com> Change-Id: I6c7ceca1f7df62896b41a84e6a6448afd8c32b74
-rw-r--r--src/backends/backendsCommon/test/Convolution2dEndToEndTestImpl.hpp81
-rw-r--r--src/backends/backendsCommon/test/QuantizationEndToEndTestImpl.hpp22
-rw-r--r--src/backends/reference/test/RefEndToEndTests.cpp13
-rw-r--r--src/backends/tosaCommon/operatorMappings/Conv2dOperator.cpp73
-rw-r--r--src/backends/tosaCommon/operatorMappings/QuantizeOperator.cpp186
-rw-r--r--src/backends/tosaCommon/operatorMappings/TosaOperatorUtils.hpp27
-rw-r--r--src/backends/tosaCommon/operatorMappings/TosaRescaleOperatorUtils.hpp211
-rw-r--r--src/backends/tosaReference/test/TosaRefEndToEndTests.cpp14
8 files changed, 449 insertions, 178 deletions
diff --git a/src/backends/backendsCommon/test/Convolution2dEndToEndTestImpl.hpp b/src/backends/backendsCommon/test/Convolution2dEndToEndTestImpl.hpp
index bc9a94289b..f53f97ae88 100644
--- a/src/backends/backendsCommon/test/Convolution2dEndToEndTestImpl.hpp
+++ b/src/backends/backendsCommon/test/Convolution2dEndToEndTestImpl.hpp
@@ -1,5 +1,5 @@
//
-// Copyright © 2022 Arm Ltd and Contributors. All rights reserved.
+// Copyright © 2022, 2024 Arm Ltd and Contributors. All rights reserved.
// SPDX-License-Identifier: MIT
//
#pragma once
@@ -49,46 +49,51 @@ armnn::INetworkPtr CreateConstConvolution2dNetwork(const armnn::Convolution2dDes
return network;
}
-template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
+template<DataType ArmnnIType, DataType ArmnnWType = ArmnnIType, DataType ArmnnBType = ArmnnIType,
+ DataType ArmnnOType = ArmnnIType>
void Convolution2dEndToEnd(const std::vector<armnn::BackendId>& backends,
armnn::DataLayout dataLayout,
bool biasEnabled = true)
{
using namespace armnn;
+ using IT = ResolveType<ArmnnIType>;
+ using WT = ResolveType<ArmnnWType>;
+ using BT = ResolveType<ArmnnBType>;
+ using OT = ResolveType<ArmnnOType>;
- const float qScale = IsQuantizedType<T>() ? 0.25f : 1.0f;
- const int32_t qOffset = IsQuantizedType<T>() ? 50 : 0;
+ const float qScale = 1.0f;
+ const int32_t qOffset = IsQuantizedType<IT>() ? 10 : 0; // offset must be zero for non-quantized types
- TensorInfo inputInfo({ 1, 5, 5, 1 }, ArmnnType, qScale, qOffset, true);
- TensorInfo outputInfo({ 1, 3, 3, 1 }, ArmnnType, qScale, qOffset);
- TensorInfo weightsInfo({ 1, 3, 3, 1 }, ArmnnType, qScale, qOffset, true);
- TensorInfo biasesInfo({ 1 }, ArmnnType, qScale * qScale, 0, true);
+ TensorInfo inputInfo( { 1, 5, 5, 1 }, ArmnnIType, qScale, qOffset, true);
+ TensorInfo weightsInfo({ 1, 3, 3, 1 }, ArmnnWType, qScale, qOffset, true);
+ TensorInfo biasesInfo( { 1 }, ArmnnBType, qScale * qScale, 0, true);
+ TensorInfo outputInfo( { 1, 3, 3, 1 }, ArmnnOType, qScale, qOffset);
std::vector<float> inputData =
- {
- 1.0f, 5.0f, 2.0f, 3.0f, 5.0f,
- 8.0f, 7.0f, 3.0f, 6.0f, 3.0f,
- 3.0f, 3.0f, 9.0f, 1.0f, 9.0f,
- 4.0f, 1.0f, 8.0f, 1.0f, 3.0f,
- 6.0f, 8.0f, 1.0f, 9.0f, 2.0f
- };
+ {
+ 1, 5, 2, 3, 5,
+ 8, 7, 3, 6, 3,
+ 3, 3, 9, 1, 9,
+ 4, 1, 8, 1, 3,
+ 6, 8, 1, 9, 2
+ };
std::vector<float> weightsData =
- {
- 4.0f, 5.0f, 6.0f,
- 0.0f, 0.0f, 0.0f,
- 3.0f, 2.0f, 1.0f
- };
+ {
+ 4, 5, 6,
+ 0, 0, 0,
+ 3, 2, 1
+ };
- std::vector<float> biasesData = { 1.0f };
+ std::vector<float> biasesData = { 1 };
+ float bias = biasEnabled ? biasesData[0] : 0;
- float bias = biasEnabled ? biasesData[0] : 0.0f;
std::vector<float> expectedOutputData =
- {
- 65.0f + bias, 76.0f + bias, 91.0f + bias,
- 107.0f + bias, 99.0f + bias, 89.0f + bias,
- 116.0f + bias, 98.0f + bias, 118.0f + bias,
- };
+ {
+ 65 + bias, 76 + bias, 91 + bias,
+ 107 + bias, 99 + bias, 89 + bias,
+ 116 + bias, 98 + bias, 118 + bias
+ };
Convolution2dDescriptor descriptor;
descriptor.m_PadLeft = 0;
@@ -102,16 +107,16 @@ void Convolution2dEndToEnd(const std::vector<armnn::BackendId>& backends,
if (dataLayout == DataLayout::NCHW)
{
- PermuteTensorNhwcToNchw(inputInfo, inputData);
+ PermuteTensorNhwcToNchw(inputInfo, inputData);
PermuteTensorNhwcToNchw(weightsInfo, weightsData);
- PermuteTensorNhwcToNchw(outputInfo, expectedOutputData);
+ PermuteTensorNhwcToNchw(outputInfo, expectedOutputData);
}
- // Quantize data
- std::vector<T> qInputData = armnnUtils::QuantizedVector<T>(inputData, qScale, qOffset);
- std::vector<T> qWeightsData = armnnUtils::QuantizedVector<T>(weightsData, qScale, qOffset);
- std::vector<T> qExpectedOutputData = armnnUtils::QuantizedVector<T>(expectedOutputData, qScale, qOffset);
- std::vector<T> qBiasesData = armnnUtils::QuantizedVector<T>(biasesData, qScale * qScale, 0);
+ // Convert data
+ std::vector<IT> qInputData = armnnUtils::QuantizedVector<IT>(inputData, qScale, qOffset);
+ std::vector<WT> qWeightsData = armnnUtils::QuantizedVector<WT>(weightsData, qScale, qOffset);
+ std::vector<BT> qBiasesData = armnnUtils::QuantizedVector<BT>(biasesData, qScale * qScale, 0);
+ std::vector<OT> qExpectedOutputData = armnnUtils::QuantizedVector<OT>(expectedOutputData, qScale, qOffset);
ConstTensor weights(weightsInfo, qWeightsData);
ConstTensor biases(biasesInfo, qBiasesData);
@@ -125,10 +130,10 @@ void Convolution2dEndToEnd(const std::vector<armnn::BackendId>& backends,
biases,
biasEnabled);
- EndToEndLayerTestImpl<ArmnnType, ArmnnType>(std::move(network),
- {{ 0, qInputData }},
- {{ 0, qExpectedOutputData }},
- backends);
+ EndToEndLayerTestImpl<ArmnnIType, ArmnnOType>(std::move(network),
+ {{ 0, qInputData }},
+ {{ 0, qExpectedOutputData }},
+ backends);
}
} // anonymous namespace
diff --git a/src/backends/backendsCommon/test/QuantizationEndToEndTestImpl.hpp b/src/backends/backendsCommon/test/QuantizationEndToEndTestImpl.hpp
index f5c2eea601..3039b9b5a3 100644
--- a/src/backends/backendsCommon/test/QuantizationEndToEndTestImpl.hpp
+++ b/src/backends/backendsCommon/test/QuantizationEndToEndTestImpl.hpp
@@ -1,5 +1,5 @@
//
-// Copyright © 2023 Arm Ltd and Contributors. All rights reserved.
+// Copyright © 2023-2024 Arm Ltd and Contributors. All rights reserved.
// SPDX-License-Identifier: MIT
//
@@ -105,4 +105,24 @@ void QuantizationEndToEndFloat16(const std::vector<armnn::BackendId>& backends)
qOffset);
};
+inline void QuantizationEndToEndInt8(const std::vector<armnn::BackendId>& backends)
+{
+ using namespace armnn;
+
+ const TensorShape tensorShape({ 1, 1, 1, 5 });
+
+ std::vector<int8_t> inputData = { 113, 16, 13, 101, 13 };
+ std::vector<int8_t> expectedOutputData = { 127, 45, 41, 127, 41 };
+
+ float qScale = 0.75f;
+ int32_t qOffset = 24;
+
+ QuantizeEndToEndLayerTestImpl<DataType::QSymmS8, DataType::QSymmS8>(backends,
+ tensorShape,
+ inputData,
+ expectedOutputData,
+ qScale,
+ qOffset);
+};
+
} \ No newline at end of file
diff --git a/src/backends/reference/test/RefEndToEndTests.cpp b/src/backends/reference/test/RefEndToEndTests.cpp
index 73786b5ccd..68b7fbff90 100644
--- a/src/backends/reference/test/RefEndToEndTests.cpp
+++ b/src/backends/reference/test/RefEndToEndTests.cpp
@@ -626,6 +626,13 @@ TEST_CASE("RefConvolution2dFloat16Test")
Convolution2dEndToEnd<armnn::DataType::Float16>(defaultBackends, armnn::DataLayout::NHWC);
}
+TEST_CASE("RefConvolution2dInt8Test")
+{
+ Convolution2dEndToEnd<armnn::DataType::QSymmS8,
+ armnn::DataType::QSymmS8,
+ armnn::DataType::Signed32>(defaultBackends, armnn::DataLayout::NHWC);
+}
+
TEST_CASE("RefConvolution3dFloat32Test")
{
Convolution3dEndToEnd<armnn::DataType::Float32, armnn::DataType::Float32>(defaultBackends,
@@ -818,6 +825,12 @@ TEST_CASE("DepthToSpaceEndToEndNhwcInt16")
DepthToSpaceEndToEnd<armnn::DataType::QSymmS16>(defaultBackends, armnn::DataLayout::NHWC);
}
+// Quantization
+TEST_CASE("RefQuantizeInt8")
+{
+ QuantizationEndToEndInt8(defaultBackends);
+}
+
// Dequantize
TEST_CASE("DequantizeEndToEndSimpleTest")
{
diff --git a/src/backends/tosaCommon/operatorMappings/Conv2dOperator.cpp b/src/backends/tosaCommon/operatorMappings/Conv2dOperator.cpp
index c65f1891da..1c8682b1ab 100644
--- a/src/backends/tosaCommon/operatorMappings/Conv2dOperator.cpp
+++ b/src/backends/tosaCommon/operatorMappings/Conv2dOperator.cpp
@@ -4,6 +4,8 @@
//
#include "Conv2dOperator.hpp"
+#include "TosaRescaleOperatorUtils.hpp"
+#include <ResolveType.hpp>
TosaSerializationBasicBlock* ConvertConv2dToTosaOperator(const Layer* layer,
const std::vector<const TensorInfo*>& inputs,
@@ -14,6 +16,9 @@ TosaSerializationBasicBlock* ConvertConv2dToTosaOperator(const Layer* layer,
std::string outputName = std::string("output0_");
std::string blockName = std::string("Op_CONV2D_block_") + GetUniqueTosaMappingID();
+ DType inputDType0 = ArmNNToDType(inputs[0]->GetDataType());
+ DType outputDType0 = ArmNNToDType(outputs[0]->GetDataType());
+
// Set input names for validation purposes only.
if(layer == nullptr)
{
@@ -52,7 +57,6 @@ TosaSerializationBasicBlock* ConvertConv2dToTosaOperator(const Layer* layer,
if(inputNames[0].find("input0_") != std::string::npos)
{
std::vector<int32_t> inputShape0 = GetTosaTensorShape(inputs[0]->GetShape());
- DType inputDType0 = ArmNNToDType(inputs[0]->GetDataType());
tensors.push_back(new TosaSerializationTensor(inputNames[0], inputShape0, inputDType0, {}));
}
@@ -87,23 +91,32 @@ TosaSerializationBasicBlock* ConvertConv2dToTosaOperator(const Layer* layer,
// The size of the bias must match the channels dimension, so get the correct index.
unsigned int index = (conv2dDescriptor->m_DataLayout == DataLayout::NHWC) ? 3 : 1;
- std::vector<uint8_t> uint8Data;
- std::vector<float> data(outputs[0]->GetShape()[index], 0.0f);
+ const DType dType = (inputDType0 == DType_INT8) ? DType_INT32 : outputDType0;
+ std::vector<float> data(outputs[0]->GetShape()[index], 0);
+ std::vector<uint8_t> uint8Data;
TosaSerializationHandler::ConvertF32toU8(data, uint8Data);
tensors.push_back(new TosaSerializationTensor(constantName,
{static_cast<int32_t>(outputs[0]->GetShape()[index])},
- DType_FP32,
+ dType,
uint8Data));
inputNames.emplace_back(constantName);
}
// Setup Output Tensor
- std::vector<int32_t> outputShape0 = GetTosaTensorShape(outputs[0]->GetShape());
- DType outputDType0 = ArmNNToDType(outputs[0]->GetDataType());
-
- tensors.push_back(new TosaSerializationTensor(outputName, outputShape0, outputDType0, {}));
+ std::vector<int32_t> outputShape0 = {GetTosaTensorShape(outputs[0]->GetShape())};
+ std::string outputConv2dName;
+ bool isInputInt8 = (inputDType0 == DType_INT8);
+ if (isInputInt8)
+ {
+ outputConv2dName = std::string("intermediate0_") + GetUniqueTosaMappingID();
+ tensors.push_back(new TosaSerializationTensor(outputConv2dName, outputShape0, DType_INT32, {}));
+ }
+ else
+ {
+ tensors.push_back(new TosaSerializationTensor(outputName, outputShape0, outputDType0, {}));
+ }
// Set up CONV2D operator
std::vector<int> pad = {static_cast<int>(conv2dDescriptor->m_PadTop),
@@ -114,15 +127,45 @@ TosaSerializationBasicBlock* ConvertConv2dToTosaOperator(const Layer* layer,
static_cast<int>(conv2dDescriptor->m_StrideX)};
std::vector<int> dilation = {static_cast<int>(conv2dDescriptor->m_DilationY),
static_cast<int>(conv2dDescriptor->m_DilationX)};
- TosaConvAttribute attribute(pad, stride, dilation, 0, 0, false); // input_zp, weight_zp, local_bound
+ TosaConvAttribute attribute(pad, stride, dilation,
+ inputs[0]->GetQuantizationOffset(), // input_zp
+ inputs[1]->GetQuantizationOffset(), // weight_zp
+ false); // local_bound
- auto* op = new TosaSerializationOperator(Op_CONV2D,
- Attribute_ConvAttribute,
- &attribute,
- inputNames,
- {outputName});
- operators.push_back(op);
+ std::string& convOutStr = isInputInt8 ? outputConv2dName : outputName;
+ auto* conv2d_op = new TosaSerializationOperator(Op_CONV2D,
+ Attribute_ConvAttribute,
+ &attribute,
+ inputNames,
+ {convOutStr});
+ operators.push_back(conv2d_op);
+
+ if (isInputInt8)
+ {
+ int32_t output_zp = outputs[0]->GetQuantizationOffset();
+ double output_scale = outputs[0]->GetQuantizationScales()[0];
+ double input_scale = inputs[0]->GetQuantizationScales()[0];
+ const std::vector<float>& weight_scales = inputs[1]->GetQuantizationScales();
+
+ TosaSerializationOperator* rescaleOp = nullptr;
+ TosaSerializationTensor* rescaleTensor = nullptr;
+ CreateRescaleTosaOperatorPerChannel(outputConv2dName,
+ outputName,
+ DType_INT8,
+ outputShape0,
+ 0,
+ output_zp,
+ true,
+ true,
+ input_scale,
+ output_scale,
+ weight_scales,
+ &rescaleOp,
+ &rescaleTensor);
+ operators.push_back(rescaleOp);
+ tensors.push_back(rescaleTensor);
+ }
// operatorInputNames/operatorOutputNames ends up being the same as
// blockInputNames/blockOutputNames for one-to-one ArmNN to TOSA mappings
return new TosaSerializationBasicBlock(blockName, // name
diff --git a/src/backends/tosaCommon/operatorMappings/QuantizeOperator.cpp b/src/backends/tosaCommon/operatorMappings/QuantizeOperator.cpp
index 1107add6e9..1a104d8423 100644
--- a/src/backends/tosaCommon/operatorMappings/QuantizeOperator.cpp
+++ b/src/backends/tosaCommon/operatorMappings/QuantizeOperator.cpp
@@ -1,5 +1,5 @@
//
-// Copyright © 2023 Arm Ltd and Contributors. All rights reserved.
+// Copyright © 2023-2024 Arm Ltd and Contributors. All rights reserved.
// SPDX-License-Identifier: MIT
//
// Copyright © 2020 The TensorFlow Authors. All Rights Reserved.
@@ -8,6 +8,8 @@
#include "QuantizeOperator.hpp"
+#include "TosaRescaleOperatorUtils.hpp"
+
// This function is paraphrased from:
// tensorflow/compiler/mlir/tosa/transforms/legalize_common.cc from function convertQuantizeOp
TosaSerializationBasicBlock* ConvertQuantizeToTosaOperator(const Layer* layer,
@@ -20,10 +22,6 @@ TosaSerializationBasicBlock* ConvertQuantizeToTosaOperator(const Layer* layer,
"ConvertQuantizeToTosaOperator: Quantize must have only one output" );
std::string inputName = std::string("input0_");
- std::string outputNameZeroPoint = std::string("intermediate0_") + GetUniqueTosaMappingID();
- std::string outputNameScale = std::string("intermediate1_") + GetUniqueTosaMappingID();
- std::string outputNameMul = std::string("intermediate2_") + GetUniqueTosaMappingID();
- std::string outputNameAdd = std::string("intermediate3_") + GetUniqueTosaMappingID();
std::string outputName = std::string("output0_");
std::string blockName = std::string("Op_QUANTIZE_block_") + GetUniqueTosaMappingID();
@@ -55,85 +53,121 @@ TosaSerializationBasicBlock* ConvertQuantizeToTosaOperator(const Layer* layer,
std::vector<TosaSerializationTensor*> tensors;
+ std::vector<int32_t> inputShape0 = GetTosaTensorShape(inputInfo.GetShape());
+ DType inputDType0 = ArmNNToDType(inputInfo.GetDataType());
+ float isFloatInput = inputDType0 == DType::DType_FP16 || inputDType0 == DType::DType_FP32;
+
// Only add input tensors if connected layer is an input layer.
// As intermediate or constant tensors will be created separately.
// There also can't be duplicate tensor.
- std::vector<int32_t> inputShape0;
- DType inputDType0 = DType::DType_UNKNOWN;
if(inputName.find("input0_") != std::string::npos)
{
- inputShape0 = GetTosaTensorShape(inputInfo.GetShape());
- inputDType0 = ArmNNToDType(inputInfo.GetDataType());
- ARMNN_THROW_INVALIDARG_MSG_IF_FALSE( inputDType0 == DType::DType_FP16 || inputDType0 == DType::DType_FP32,
- "ConvertQuantizeToTosaOperator: Quantize input must be of type Float" );
tensors.push_back(new TosaSerializationTensor(inputName, inputShape0, inputDType0, {}));
}
std::vector<int32_t> outputShape0 = GetTosaTensorShape(outputInfo.GetShape());
DType outputDType0 = ArmNNToDType(outputInfo.GetDataType());
- // quantize:
- // const_zeroPoint = constant(zeroPoint)
- // const_scale = constant(scale)
- // out_mul = mul(input, const_scale)
- // out_add = add(out_mul, const_zeroPoint)
- // output = cast<output_type>(out_add)
-
- // const_zeroPoint
- TosaSerializationOperator* zeroPointOp = nullptr;
- TosaSerializationTensor* zeroPointTensor = nullptr;
- CreateConstTosaOperator<float>(outputNameZeroPoint,
- zeroPoint,
- inputDType0,
- inputShape0,
- zeroPointOp,
- zeroPointTensor);
- tensors.push_back(zeroPointTensor);
-
- // const_scale
- TosaSerializationOperator *scaleOp = nullptr;
- TosaSerializationTensor* scaleTensor = nullptr;
- CreateConstTosaOperator<float>(outputNameScale,
- scale,
- inputDType0,
- inputShape0,
- scaleOp,
- scaleTensor);
- tensors.push_back(scaleTensor);
-
- // mul
- int32_t shift = 0;
- TosaMulAttribute mulAttribute(shift);
- TosaSerializationOperator* mulOp = new TosaSerializationOperator(Op_MUL,
- Attribute_MulAttribute,
- &mulAttribute,
- {inputName, outputNameScale},
- {outputNameMul});
- tensors.push_back(new TosaSerializationTensor(outputNameMul, inputShape0, inputDType0, {}));
-
- // add
- TosaSerializationOperator* addOp = new TosaSerializationOperator(Op_ADD,
- Attribute_NONE,
- nullptr,
- {outputNameMul, outputNameZeroPoint},
- {outputNameAdd});
- tensors.push_back(new TosaSerializationTensor(outputNameAdd, inputShape0, inputDType0, {}));
-
- // cast
- TosaSerializationOperator* castOp = new TosaSerializationOperator(Op_CAST,
- Attribute_NONE,
- nullptr,
- {outputNameAdd},
- {outputName});
-
- tensors.push_back(new TosaSerializationTensor(outputName, outputShape0, outputDType0, {}));
-
- // operatorInputNames/operatorOutputNames ends up being the same as
- // blockInputNames/blockOutputNames for one-to-one ArmNN to TOSA mappings
- return new TosaSerializationBasicBlock(blockName, // name
- mainName, // region name
- {zeroPointOp, scaleOp, mulOp, addOp, castOp}, // operators
- tensors, // tensors
- {inputName}, // inputs
- {outputName}); // outputs
-}
+ if (isFloatInput)
+ {
+ // quantize:
+ // const_zeroPoint = constant(zeroPoint)
+ // const_scale = constant(scale)
+ // out_mul = mul(input, const_scale)
+ // out_add = add(out_mul, const_zeroPoint)
+ // output = cast<output_type>(out_add)
+
+ std::string outputNameScale = std::string("input1_") + GetUniqueTosaMappingID();
+ std::string outputNameZeroPoint = std::string("input2_") + GetUniqueTosaMappingID();
+ std::string outputNameMul = std::string("intermediate0_") + GetUniqueTosaMappingID();
+ std::string outputNameAdd = std::string("intermediate1_") + GetUniqueTosaMappingID();
+
+ // const_zeroPoint
+ TosaSerializationOperator* zeroPointOp = nullptr;
+ TosaSerializationTensor* zeroPointTensor = nullptr;
+ CreateConstTosaOperator<float>(outputNameZeroPoint,
+ zeroPoint,
+ inputDType0,
+ inputShape0,
+ zeroPointOp,
+ zeroPointTensor);
+ tensors.push_back(zeroPointTensor);
+
+ // const_scale
+ TosaSerializationOperator *scaleOp = nullptr;
+ TosaSerializationTensor* scaleTensor = nullptr;
+ CreateConstTosaOperator<float>(outputNameScale,
+ scale,
+ inputDType0,
+ inputShape0,
+ scaleOp,
+ scaleTensor);
+ tensors.push_back(scaleTensor);
+
+ // mul
+ int32_t shift = 0;
+ TosaMulAttribute mulAttribute(shift);
+ TosaSerializationOperator* mulOp = new TosaSerializationOperator(Op_MUL,
+ Attribute_MulAttribute,
+ &mulAttribute,
+ {inputName, outputNameScale},
+ {outputNameMul});
+ tensors.push_back(new TosaSerializationTensor(outputNameMul, inputShape0, inputDType0, {}));
+
+ // add
+ TosaSerializationOperator* addOp = new TosaSerializationOperator(Op_ADD,
+ Attribute_NONE,
+ nullptr,
+ {outputNameMul, outputNameZeroPoint},
+ {outputNameAdd});
+ tensors.push_back(new TosaSerializationTensor(outputNameAdd, inputShape0, inputDType0, {}));
+
+ // cast
+ TosaSerializationOperator* castOp = new TosaSerializationOperator(Op_CAST,
+ Attribute_NONE,
+ nullptr,
+ {outputNameAdd},
+ {outputName});
+
+ tensors.push_back(new TosaSerializationTensor(outputName, outputShape0, outputDType0, {}));
+
+ // operatorInputNames/operatorOutputNames ends up being the same as
+ // blockInputNames/blockOutputNames for one-to-one ArmNN to TOSA mappings
+ return new TosaSerializationBasicBlock(blockName, // name
+ mainName, // region name
+ {zeroPointOp, scaleOp, mulOp, addOp, castOp}, // operators
+ tensors, // tensors
+ {inputName}, // inputs
+ {outputName}); // outputs
+ }
+ else
+ {
+ double scale_alpha = inputs[0]->GetQuantizationScale() / outputs[0]->GetQuantizationScale();
+ int32_t input_zp = inputs[0]->GetQuantizationOffset();
+ int32_t output_zp = outputs[0]->GetQuantizationOffset();
+
+ TosaSerializationOperator* rescaleOp = nullptr;
+ TosaSerializationTensor* rescaleTensor = nullptr;
+ CreateRescaleTosaOperator(inputName,
+ outputName,
+ outputDType0,
+ inputShape0,
+ scale_alpha,
+ input_zp,
+ output_zp,
+ true,
+ true,
+ &rescaleOp,
+ &rescaleTensor);
+ tensors.push_back(rescaleTensor);
+
+ // operatorInputNames/operatorOutputNames ends up being the same as
+ // blockInputNames/blockOutputNames for one-to-one ArmNN to TOSA mappings
+ return new TosaSerializationBasicBlock(blockName, // name
+ mainName, // region name
+ {rescaleOp}, // operators
+ tensors, // tensors
+ {inputName}, // inputs
+ {outputName}); // outputs
+ }
+} \ No newline at end of file
diff --git a/src/backends/tosaCommon/operatorMappings/TosaOperatorUtils.hpp b/src/backends/tosaCommon/operatorMappings/TosaOperatorUtils.hpp
index 047e0a1f42..b7f14bf5b7 100644
--- a/src/backends/tosaCommon/operatorMappings/TosaOperatorUtils.hpp
+++ b/src/backends/tosaCommon/operatorMappings/TosaOperatorUtils.hpp
@@ -48,6 +48,33 @@ inline DType ArmNNToDType(const DataType& type)
}
}
+// Function to return ArmNN datatype from input Tosa datatype.
+inline DataType DtypeToArmNN(const DType type)
+{
+ switch (type)
+ {
+ case DType_FP16:
+ return DataType::Float16;
+ case DType_BF16:
+ return DataType::BFloat16;
+ case DType_FP32:
+ return DataType::Float32;
+ case DType_UINT8:
+ return DataType::QAsymmU8;
+ case DType_INT8:
+ return DataType::QSymmS8;
+ case DType_INT16:
+ return DataType::QSymmS16;
+ case DType_INT32:
+ return DataType::Signed32;
+ case DType_BOOL:
+ return DataType::Boolean;
+ default:
+ throw armnn::Exception("DtypeToArmNN: Unsupported tosa::DType in ArmNN.");
+ return DataType::Boolean;
+ }
+}
+
// Function to return Tosa tensor shape from input ArmNN tensor shape.
inline std::vector<int32_t> GetTosaTensorShape(const TensorShape& shape)
{
diff --git a/src/backends/tosaCommon/operatorMappings/TosaRescaleOperatorUtils.hpp b/src/backends/tosaCommon/operatorMappings/TosaRescaleOperatorUtils.hpp
index 1a4dd7aac3..56337cfdf4 100644
--- a/src/backends/tosaCommon/operatorMappings/TosaRescaleOperatorUtils.hpp
+++ b/src/backends/tosaCommon/operatorMappings/TosaRescaleOperatorUtils.hpp
@@ -11,12 +11,13 @@ inline void CreateRescaleTosaOperator(const std::string& inputName,
const std::string& outputName,
DType output_type,
const std::vector<int32_t>& shape,
- int32_t scale_multiplier,
- int32_t scale_shift,
+ const std::vector<int32_t>& multipliers,
+ const std::vector<int32_t>& shifts,
int32_t input_zp,
int32_t output_zp,
bool double_round,
bool scale32,
+ bool per_channel,
TosaSerializationOperator** op,
TosaSerializationTensor** tensor)
{
@@ -25,15 +26,13 @@ inline void CreateRescaleTosaOperator(const std::string& inputName,
throw armnn::Exception("CreateRescaleTosaOperator: nullptr op");
}
- std::vector<int32_t> multipliers{scale_multiplier};
- std::vector<int32_t> shifts{scale_shift};
TosaRescaleAttribute attribute(input_zp,
output_zp,
multipliers,
shifts,
scale32,
double_round,
- false, // per_channel
+ per_channel,
false, // input_unsigned
false); // output_unsigned
@@ -58,75 +57,191 @@ inline void CreateRescaleTosaOperator(const std::string& inputName,
const std::string& outputName,
DType output_type,
const std::vector<int32_t>& shape,
- double scale,
+ int32_t scale_multiplier,
+ int32_t scale_shift,
int32_t input_zp,
int32_t output_zp,
bool double_round,
bool scale32,
+ bool per_channel,
TosaSerializationOperator** op,
TosaSerializationTensor** tensor)
{
- // The code that follows is based on the behaviour specified in
- // https://www.mlplatform.org/tosa/tosa_spec.html#_precision_scaling
+ const std::vector<int32_t> multipliers{scale_multiplier};
+ const std::vector<int32_t> shifts{scale_shift};
+ CreateRescaleTosaOperator(inputName, outputName, output_type, shape, multipliers, shifts,
+ input_zp, output_zp, double_round, scale32, per_channel, op, tensor);
+}
+
+/// The following is taken from mlir/lib/Dialect/Tosa/Utils/QuantUtils.cpp in the LLVM project
+/// From a scale value, generates multiplier and shift values where
+/// mantissa is in [-1.0,-0.5] or [0.5, 1.0] such that
+/// multiplier = mantissa*2^shift for 32-bit scaling.
+static void ComputeMultiplierAndShiftTosaScale32(double scale,
+ int32_t &multiplier,
+ int32_t &shift)
+{
+ const double mantissa = std::frexp(scale, &shift);
+ auto shiftedM = std::round(mantissa * (int64_t(1) << 31));
+
+ // Can't be greater than 1.0.
+ if (!(shiftedM <= (int64_t(1) << 31)))
+ {
+ throw armnn::Exception("Shifted mantissa exceeds 32 signed bits");
+ }
- auto GetScaleParams = [](double scale, double& m, int32_t& n)
+ if (shiftedM == (int64_t(1) << 31))
{
- m = 0;
- n = 0;
+ shiftedM /= 2;
+ shift++;
+ }
- double lastErr = 1e06;
+ // TOSA expects right shift to be positive, and embed (1 << 31) into right
+ // shift bits.
+ shift = (-shift) + 31;
- const int32_t numExponents = 62;
- const double start = 1.0;
- const double end = 2.0;
+ if (!(shiftedM <= std::numeric_limits<int32_t>::max()))
+ {
+ throw armnn::Exception("Shifted mantissa exceeds 32-bit signed output type");
+ }
- // Slow iterative approach but running in Reference only
- for (int32_t i = 0; i < numExponents; ++i)
- {
- double exp = 1.0 / (1 << i);
- double currentM = scale / exp; // Find current m given value = currentM * exp
- if ((currentM >= start) && (currentM < end))
- {
- double value = currentM * exp;
- double err = std::abs(scale - value);
- if (err < lastErr)
- {
- // Take the m, n that minimize the error
- n = i;
- m = currentM;
- lastErr = err;
- }
- }
- }
- };
+ multiplier = static_cast<int32_t>(shiftedM);
- auto GetMultiplierShiftByScale = [GetScaleParams](bool scale32, double scale, int32_t& multiplier, int32_t& shift)
+ // Shifting tops out at 62 bits. Right shift to make 62 bits the max.
+ // The limit of 62 on shift allows the shift to be decomposed as
+ // two right shifts of 31.
+ if (shift > 62)
{
- double m = 0;
- int32_t n = 0;
+ // Shifting the multiplier by more than 32-bits is unnecessary.
+ multiplier = multiplier >> std::min<int32_t>(31, shift - 62);
+ shift = 62;
+ }
+}
- GetScaleParams(scale, m, n);
+/// The following is taken from mlir/lib/Dialect/Tosa/Utils/QuantUtils.cpp in the LLVM project
+/// From a scale value, generates multiplier and shift values where
+/// mantissa is in [-1.0,-0.5] or [0.5, 1.0] such that
+/// multiplier = mantissa*2^shift for 16-bit scaling.
+static void ComputeMultiplierAndShiftTosaScale16(double scale,
+ int32_t &multiplier,
+ int32_t &shift)
+{
+ const double mantissa = std::frexp(scale, &shift);
+ auto shiftedM = std::round(mantissa * (int64_t(1) << 15));
- multiplier = (scale32) ? (1 << 30) * static_cast<int32_t>(m) : (1 << 14) * static_cast<int32_t>(m);
- shift = (scale32) ? (30 + n) : (14 + n);
- };
+ // Can't be greater than 1.0.
+ if (!(shiftedM <= (int64_t(1) << 15)))
+ {
+ throw armnn::Exception("Shifted mantissa exceeds 16 signed bits");
+ }
+ if (shiftedM == (int64_t(1) << 15))
+ {
+ shiftedM /= 2;
+ shift++;
+ }
+
+ // TOSA expects right shift to be positive and embed (1 << 15) into right
+ // shift bits.
+ shift = (-shift) + 15;
+
+ if (!(shiftedM <= std::numeric_limits<int32_t>::max()))
+ {
+ throw armnn::Exception("Shifted mantissa exceeds 32-bit signed output type");
+ }
+
+ multiplier = static_cast<int32_t>(shiftedM);
+
+ // Shifting tops out at 62 bits. Right shift to make 62 bits the max.
+ // The limit of 62 on shift allows the shift to be decomposed as
+ // two right shifts of 31.
+ if (shift > 62)
+ {
+ // Shifting the multiplier by more than 31-bits is unnecessary.
+ multiplier = multiplier >> std::min<int32_t>(31, shift - 62);
+ shift = 62;
+ }
+}
+
+inline void CreateRescaleTosaOperator(const std::string& inputName,
+ const std::string& outputName,
+ DType output_type,
+ const std::vector<int32_t>& shape,
+ double scale,
+ int32_t input_zp,
+ int32_t output_zp,
+ bool double_round,
+ bool scale32,
+ TosaSerializationOperator** op,
+ TosaSerializationTensor** tensor)
+{
int32_t multiplier;
int32_t shift;
- GetMultiplierShiftByScale(scale32, scale, multiplier, shift);
+
+ if (scale32)
+ {
+ ComputeMultiplierAndShiftTosaScale32(scale, multiplier, shift);
+ }
+ else
+ {
+ ComputeMultiplierAndShiftTosaScale16(scale, multiplier, shift);
+ }
+
CreateRescaleTosaOperator(inputName, outputName, output_type, shape, multiplier, shift,
- input_zp, output_zp, double_round, scale32, op, tensor);
+ input_zp, output_zp, double_round, scale32, false, op, tensor);
}
-inline void CreateFromInt32RescaleTosaOperator(const std::string& inputName,
- const std::string& outputName,
+inline void CreateRescaleTosaOperatorPerChannel(const std::string& inputName,
+ const std::string& outputName,
DType output_type,
const std::vector<int32_t>& shape,
- double output_scale,
+ int32_t input_zp,
int32_t output_zp,
+ bool double_round,
+ bool scale32,
+ double input_scale,
+ double output_scale,
+ const std::vector<float>& weight_scales,
TosaSerializationOperator** op,
TosaSerializationTensor** tensor)
{
- CreateRescaleTosaOperator(inputName, outputName, output_type, shape,
- output_scale, 0, output_zp, true, true, op, tensor);
+ std::vector<int32_t> op_tensor_multipliers;
+ std::vector<int32_t> op_tensor_shifts;
+ op_tensor_multipliers.reserve(weight_scales.size());
+ op_tensor_shifts.reserve(weight_scales.size());
+
+ for (const float& weight_scale : weight_scales)
+ {
+ double op_tensor_scale = (input_scale * weight_scale) / output_scale;
+ int32_t multiplier;
+ int32_t shift;
+
+ if (scale32)
+ {
+ ComputeMultiplierAndShiftTosaScale32(op_tensor_scale, multiplier, shift);
+ }
+ else
+ {
+ ComputeMultiplierAndShiftTosaScale16(op_tensor_scale, multiplier, shift);
+ }
+
+ op_tensor_multipliers.push_back(multiplier);
+ op_tensor_shifts.push_back(shift);
+ }
+
+ CreateRescaleTosaOperator(inputName, outputName, output_type, shape, op_tensor_multipliers, op_tensor_shifts,
+ input_zp, output_zp, double_round, scale32, true, op, tensor);
+}
+
+inline void CreateFromInt32RescaleTosaOperator(const std::string& inputName,
+ const std::string& outputName,
+ DType output_type,
+ const std::vector<int32_t>& shape,
+ double output_scale,
+ int32_t output_zp,
+ TosaSerializationOperator** op,
+ TosaSerializationTensor** tensor)
+{
+ CreateRescaleTosaOperator(inputName, outputName, output_type, shape, output_scale,
+ 0, output_zp, true, true, op, tensor);
}
diff --git a/src/backends/tosaReference/test/TosaRefEndToEndTests.cpp b/src/backends/tosaReference/test/TosaRefEndToEndTests.cpp
index 68531f89ff..f5edfb0e0b 100644
--- a/src/backends/tosaReference/test/TosaRefEndToEndTests.cpp
+++ b/src/backends/tosaReference/test/TosaRefEndToEndTests.cpp
@@ -95,6 +95,20 @@ TEST_CASE("TosaRefConv2dWithoutBiasEndtoEndTestFloat32")
Convolution2dEndToEnd<armnn::DataType::Float32>(tosaDefaultBackends, armnn::DataLayout::NHWC, false);
}
+TEST_CASE("TosaRefConv2dEndtoEndTestInt8")
+{
+ Convolution2dEndToEnd<armnn::DataType::QSymmS8,
+ armnn::DataType::QSymmS8,
+ armnn::DataType::Signed32>(tosaDefaultBackends, armnn::DataLayout::NHWC);
+}
+
+TEST_CASE("TosaRefConv2dWithoutBiasEndtoEndTestInt8")
+{
+ Convolution2dEndToEnd<armnn::DataType::QSymmS8,
+ armnn::DataType::QSymmS8,
+ armnn::DataType::Signed32>(tosaDefaultBackends, armnn::DataLayout::NHWC, false);
+}
+
// Maximum
TEST_CASE("TosaRefMaximumEndtoEndTestInt8")
{