diff options
author | Kevin May <kevin.may@arm.com> | 2024-06-19 12:36:22 +0100 |
---|---|---|
committer | Kevin May <kevin.may@arm.com> | 2024-06-19 11:53:41 +0000 |
commit | ae67c47365dcd6d629a7d04726535edd21388795 (patch) | |
tree | 5e829db95620fbb06b4351fec9bfddc3e2c2eed7 /src/backends | |
parent | 398f18ee3e8822d3143f0bca7fb2b5bbc066e232 (diff) | |
download | armnn-ae67c47365dcd6d629a7d04726535edd21388795.tar.gz |
IVGCVSW-8407 Add quantized Mul and Sub in TosaCommon and TosaRef
* Add rescales before and/or after ops when using int8
Signed-off-by: Kevin May <kevin.may@arm.com>
Change-Id: Ic9704f91147a1a439411511c7d57d0aa52b93e26
Diffstat (limited to 'src/backends')
3 files changed, 191 insertions, 27 deletions
diff --git a/src/backends/backendsCommon/test/ElementwiseBinaryEndToEndTestImpl.hpp b/src/backends/backendsCommon/test/ElementwiseBinaryEndToEndTestImpl.hpp index dbc270e0c9..dd9dfef7cf 100644 --- a/src/backends/backendsCommon/test/ElementwiseBinaryEndToEndTestImpl.hpp +++ b/src/backends/backendsCommon/test/ElementwiseBinaryEndToEndTestImpl.hpp @@ -183,8 +183,8 @@ void ElementwiseBinarySimpleNoReshapeEndToEnd(const std::vector<BackendId>& back { using namespace armnn; - const float qScale = IsQuantizedType<TInput>() ? 0.25f : 1.0f; - const int32_t qOffset = IsQuantizedType<TInput>() ? 50 : 0; + const float qScale = 1.0f; + const int32_t qOffset = 0; const TensorShape& input1Shape = { 2, 2, 2, 2 }; const TensorShape& input2Shape = { 2, 2, 2, 2 }; diff --git a/src/backends/tosaCommon/operatorMappings/ElementwiseBinaryOperator.cpp b/src/backends/tosaCommon/operatorMappings/ElementwiseBinaryOperator.cpp index 55b4f15e49..aa4201467d 100644 --- a/src/backends/tosaCommon/operatorMappings/ElementwiseBinaryOperator.cpp +++ b/src/backends/tosaCommon/operatorMappings/ElementwiseBinaryOperator.cpp @@ -4,6 +4,34 @@ // #include "ElementwiseBinaryOperator.hpp" +#include "TosaRescaleOperatorUtils.hpp" + +TosaSerializationOperator* AddRescaleOp(const string &inputName, + const string &outputName, + std::vector<TosaSerializationTensor *> &tensors, + const std::vector<const TensorInfo *> &inputs, + const std::vector<const TensorInfo *> &outputs) +{ + double scale_alpha = inputs[1]->GetQuantizationScale() / outputs[0]->GetQuantizationScale(); + int32_t input_zp = inputs[1]->GetQuantizationOffset(); + int32_t output_zp = outputs[0]->GetQuantizationOffset(); + + TosaSerializationOperator* rescaleOp = nullptr; + CreateRescaleTosaOperator(inputName, + outputName, + scale_alpha, + input_zp, + output_zp, + true, + true, + &rescaleOp); + + std::vector<int32_t> shape = GetTosaTensorShape(inputs[1]->GetShape()); + tensors.push_back(new TosaSerializationTensor(outputName, + shape, + DType_INT32, {})); + return rescaleOp; +} TosaSerializationBasicBlock* ConvertElementwiseBinaryToTosaOperator(const Layer* layer, const LayerType type, @@ -14,6 +42,8 @@ TosaSerializationBasicBlock* ConvertElementwiseBinaryToTosaOperator(const Layer* std::string input0Name = std::string("input_0"); std::string input1Name = std::string("input_1"); std::string outputName = std::string("output0_"); + std::string input0ElemenwiseBinaryName = std::string("intermediate0_") + GetUniqueTosaMappingID(); + std::string input1ElemenwiseBinaryName = std::string("intermediate0_") + GetUniqueTosaMappingID(); std::string blockName; // If a layer is present then the block will be used for execution, so input and output names need to be determined @@ -26,6 +56,46 @@ TosaSerializationBasicBlock* ConvertElementwiseBinaryToTosaOperator(const Layer* } TosaSerializationOperator* op = nullptr; + + std::vector<TosaSerializationTensor*> tensors; + std::vector<TosaSerializationOperator*> operators; + DType inputDType0 = ArmNNToDType(inputs[0]->GetDataType()); + DType inputDType1 = ArmNNToDType(inputs[1]->GetDataType()); + DType outputDType0 = ArmNNToDType(outputs[0]->GetDataType()); + bool isInputInt8 = (inputDType0 == DType_INT8); + + // Only add input tensors if connected layer is an input layer. + // As intermediate or constant tensors will be created separately. + // There also can't be duplicate tensor. + if(input0Name.find("input_") != std::string::npos) + { + std::vector<int32_t> inputShape0 = GetTosaTensorShape(inputs[0]->GetShape()); + tensors.push_back(new TosaSerializationTensor(input0Name, inputShape0, inputDType0, {})); + } + if(input1Name.find("input_") != std::string::npos) + { + std::vector<int32_t> inputShape1 = GetTosaTensorShape(inputs[1]->GetShape()); + tensors.push_back(new TosaSerializationTensor(input1Name, inputShape1, inputDType1, {})); + } + + std::vector<int32_t> outputShape0 = GetTosaTensorShape(outputs[0]->GetShape()); + + // Assign an output name and add to tensors based on the input type + // An int8 input for all ops will require the output to be rescaled from int32 to int8 + std::string outputElemenwiseBinaryName; + if (isInputInt8) + { + outputElemenwiseBinaryName = std::string("intermediate0_") + GetUniqueTosaMappingID(); + tensors.push_back(new TosaSerializationTensor(outputElemenwiseBinaryName, outputShape0, DType_INT32, {})); + } + else + { + tensors.push_back(new TosaSerializationTensor(outputName, outputShape0, outputDType0, {})); + } + + std::string& elementwiseInput0Str = isInputInt8 ? input0ElemenwiseBinaryName : input0Name; + std::string& elementwiseInput1Str = isInputInt8 ? input1ElemenwiseBinaryName : input1Name; + std::string& elementwiseOutputStr = isInputInt8 ? outputElemenwiseBinaryName : outputName; switch(type) { case LayerType::Addition: @@ -42,16 +112,79 @@ TosaSerializationBasicBlock* ConvertElementwiseBinaryToTosaOperator(const Layer* { switch (descriptor->m_Operation) { + case armnn::BinaryOperation::Add: + { + // Add supports DType_INT32 input only, so a rescale is required when input is DType_INT8 + if (inputDType0 == DType_INT8) + { + operators.push_back( + AddRescaleOp(input0Name, input0ElemenwiseBinaryName, tensors, inputs, outputs)); + + operators.push_back( + AddRescaleOp(input1Name, input1ElemenwiseBinaryName, tensors, inputs, outputs)); + } + op = new TosaSerializationOperator(Op_ADD, + Attribute_NONE, + nullptr, + {elementwiseInput0Str, elementwiseInput1Str}, + {elementwiseOutputStr}); + blockName = std::string("Op_ADD_block_") + GetUniqueTosaMappingID(); + break; + } case armnn::BinaryOperation::Maximum: { + // Add supports DType_INT32 input only, so a rescale is required when input is DType_INT8 + if (inputDType0 == DType_INT8) + { + operators.push_back( + AddRescaleOp(input0Name, input0ElemenwiseBinaryName, tensors, inputs, outputs)); + + operators.push_back( + AddRescaleOp(input1Name, input1ElemenwiseBinaryName, tensors, inputs, outputs)); + } op = new TosaSerializationOperator(Op_MAXIMUM, Attribute_NONE, nullptr, - {input0Name, input1Name}, - {outputName}); + {elementwiseInput0Str, elementwiseInput1Str}, + {elementwiseOutputStr}); blockName = std::string("Op_MAXIMUM_block_") + GetUniqueTosaMappingID(); break; } + case armnn::BinaryOperation::Mul: + { + int8_t shift = 0; + TosaMulAttribute mulAttribute(shift); + + // Mul supports input DType_INT8 so will not require a rescale before the op. + // i.e "input0Name" is used for the input and not intermediate "elementwiseInput0Str" + op = new TosaSerializationOperator(Op_MUL, + Attribute_MulAttribute, + &mulAttribute, + {input0Name, input1Name}, + {elementwiseOutputStr}); + blockName = std::string("Op_MUL_block_") + GetUniqueTosaMappingID(); + break; + } + case armnn::BinaryOperation::Sub: + { + // Sub supports DType_INT32 input only, so a rescale is required when input is DType_INT8 + if (inputDType0 == DType_INT8) + { + operators.push_back( + AddRescaleOp(input0Name, input0ElemenwiseBinaryName, tensors, inputs, outputs)); + + operators.push_back( + AddRescaleOp(input1Name, input1ElemenwiseBinaryName, tensors, inputs, outputs)); + } + + op = new TosaSerializationOperator(Op_SUB, + Attribute_NONE, + nullptr, + {elementwiseInput0Str, elementwiseInput1Str}, + {elementwiseOutputStr}); + blockName = std::string("Op_SUB_block_") + GetUniqueTosaMappingID(); + break; + } default: throw armnn::Exception("ConvertElementwiseBinaryToTosaOperator: Unsupported layer type."); } @@ -83,33 +216,18 @@ TosaSerializationBasicBlock* ConvertElementwiseBinaryToTosaOperator(const Layer* throw armnn::Exception("ConvertElementwiseBinaryToTosaOperator: Unsupported layer type."); } - std::vector<TosaSerializationTensor*> tensors; - // Only add input tensors if connected layer is an input layer. - // As intermediate or constant tensors will be created separately. - // There also can't be duplicate tensor. - if(input0Name.find("input_") != std::string::npos) - { - std::vector<int32_t> inputShape0 = GetTosaTensorShape(inputs[0]->GetShape()); - DType inputDType0 = ArmNNToDType(inputs[0]->GetDataType()); - tensors.push_back(new TosaSerializationTensor(input0Name, inputShape0, inputDType0, {})); - } - if(input1Name.find("input_") != std::string::npos) + operators.push_back(op); + + // All operators require a rescale of the output from DType_INT32 to DType_INT8 when the input is DType_INT8 + if (inputDType0 == DType_INT8) { - std::vector<int32_t> inputShape1 = GetTosaTensorShape(inputs[1]->GetShape()); - DType inputDType1 = ArmNNToDType(inputs[1]->GetDataType()); - tensors.push_back(new TosaSerializationTensor(input1Name, inputShape1, inputDType1, {})); + operators.push_back( + AddRescaleOp(outputElemenwiseBinaryName, outputName, tensors, inputs, outputs)); } - std::vector<int32_t> outputShape0 = GetTosaTensorShape(outputs[0]->GetShape()); - DType outputDType0 = ArmNNToDType(outputs[0]->GetDataType()); - - tensors.push_back(new TosaSerializationTensor(outputName, outputShape0, outputDType0, {})); - - // operatorInputNames/operatorOutputNames ends up being the same as - // blockInputNames/blockOutputNames for one-to-one ArmNN to Tosa mappings return new TosaSerializationBasicBlock(blockName, // name mainName, // region name - {op}, // operators + {operators}, // operators tensors, // tensors {input0Name, input1Name}, // inputs {outputName}); // outputs diff --git a/src/backends/tosaReference/test/TosaRefEndToEndTests.cpp b/src/backends/tosaReference/test/TosaRefEndToEndTests.cpp index a5f7ad88a8..f86edd52f4 100644 --- a/src/backends/tosaReference/test/TosaRefEndToEndTests.cpp +++ b/src/backends/tosaReference/test/TosaRefEndToEndTests.cpp @@ -129,13 +129,59 @@ TEST_CASE("TosaRefConv2dWithoutBiasEndtoEndTestInt8") armnn::DataType::Signed32>(tosaDefaultBackends, armnn::DataLayout::NHWC, false); } +// Elementwise Binary +//Add +TEST_CASE("TosaRefAddEndtoEndTestInt32") +{ + ElementwiseBinarySimpleNoReshapeEndToEnd<DataType::Signed32>(tosaDefaultBackends, + armnn::BinaryOperation::Add); +} + +TEST_CASE("TosaRefAddEndtoEndTestInt8") +{ + ElementwiseBinarySimpleNoReshapeEndToEnd<DataType::QSymmS8>(tosaDefaultBackends, + armnn::BinaryOperation::Add); +} + // Maximum -TEST_CASE("TosaRefMaximumEndtoEndTestInt8") +TEST_CASE("TosaRefMaximumEndtoEndTestInt32") { ElementwiseBinarySimpleNoReshapeEndToEnd<DataType::Signed32>(tosaDefaultBackends, armnn::BinaryOperation::Maximum); } +TEST_CASE("TosaRefMaximumEndtoEndTestInt8") +{ + ElementwiseBinarySimpleNoReshapeEndToEnd<DataType::QSymmS8>(tosaDefaultBackends, + armnn::BinaryOperation::Maximum); +} + +//Mul +TEST_CASE("TosaRefMulEndtoEndTestInt32") +{ + ElementwiseBinarySimpleNoReshapeEndToEnd<DataType::Signed32>(tosaDefaultBackends, + armnn::BinaryOperation::Mul); +} + +TEST_CASE("TosaRefMulEndtoEndTestInt8") +{ + ElementwiseBinarySimpleNoReshapeEndToEnd<DataType::QSymmS8>(tosaDefaultBackends, + armnn::BinaryOperation::Mul); +} + +//Sub +TEST_CASE("TosaRefMulEndtoEndTestInt32") +{ + ElementwiseBinarySimpleNoReshapeEndToEnd<DataType::Signed32>(tosaDefaultBackends, + armnn::BinaryOperation::Sub); +} + +TEST_CASE("TosaRefSubEndtoEndTestInt8") +{ + ElementwiseBinarySimpleNoReshapeEndToEnd<DataType::QSymmS8>(tosaDefaultBackends, + armnn::BinaryOperation::Sub); +} + // Pooling // Average Pool 2D TEST_CASE("TosaRefAvgPool2DEndtoEndTestFloat32") |