diff options
Diffstat (limited to 'src/backends/tosaCommon/operatorMappings')
23 files changed, 1381 insertions, 60 deletions
diff --git a/src/backends/tosaCommon/operatorMappings/BatchMatMulOperator.cpp b/src/backends/tosaCommon/operatorMappings/BatchMatMulOperator.cpp new file mode 100644 index 0000000000..35a00302a0 --- /dev/null +++ b/src/backends/tosaCommon/operatorMappings/BatchMatMulOperator.cpp @@ -0,0 +1,262 @@ +// +// Copyright © 2024 Arm Ltd and Contributors. All rights reserved. +// SPDX-License-Identifier: MIT +// +// Copyright © 2020 The TensorFlow Authors. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 +// + +#include "BatchMatMulOperator.hpp" +#include "TosaRescaleOperatorUtils.hpp" + +// This function is paraphrased from: +// tensorflow/compiler/mlir/tosa/transforms/legalize_tfl.cc from function ConvertTFLBatchMatMulOp +TosaSerializationBasicBlock* ConvertBatchMatMulToTosaOperator(const Layer* layer, + const std::vector<const TensorInfo*>& inputs, + const std::vector<const TensorInfo*>& outputs, + const BatchMatMulDescriptor* descriptor) +{ + if (descriptor->m_AdjointX || descriptor->m_AdjointY ) + { + throw Exception("Support for adjoint not implemented."); + } + if (descriptor->m_DataLayoutX != armnn::DataLayout::NCHW || descriptor->m_DataLayoutY != armnn::DataLayout::NCHW ) + { + throw Exception("MatMul only supported in the last 2 dimensions"); + } + + std::string input0Name = std::string("input_0"); + std::string input1Name = std::string("input_1"); + std::string outputName = std::string("output_0"); + std::string outputReshape0Name = std::string("intermediate0_") + GetUniqueTosaMappingID(); + std::string outputReshape1Name = std::string("intermediate0_") + GetUniqueTosaMappingID(); + std::string outputTranspose0Name = std::string("intermediate1_") + GetUniqueTosaMappingID(); + std::string outputTranspose1Name = std::string("intermediate1_") + GetUniqueTosaMappingID(); + + std::string blockName = std::string("Op_BATCHMATMUL_block_") + GetUniqueTosaMappingID(); + + // If a layer is present then the block will be used for execution, so input and output names need to be determined + // using the previous and following layers so the graph is connected correctly. For validation this doesn't matter. + if(layer != nullptr) + { + // Get the layer connected to the input slot and determine unique tensor names. + input0Name = GenerateUniqueInputName(layer->GetInputSlot(0)); + input1Name = GenerateUniqueInputName(layer->GetInputSlot(1)); + outputName = GenerateUniqueOutputName(*layer); + } + + // Assumes both input types are same data type + DType inputDType = ArmNNToDType(inputs[0]->GetDataType()); + bool isInputInt8 = (inputDType == DType_INT8); + bool isInputInt16 = (inputDType == DType_INT16); + + std::vector<TosaSerializationTensor*> tensors; + std::vector<TosaSerializationOperator*> operators; + + // Only add input tensors if connected layer is an input layer. + // As intermediate or constant tensors will be created separately. + // There also can't be duplicate tensor. + if(input0Name.find("input_") != std::string::npos) + { + std::vector<int32_t> inputShape0 = GetTosaTensorShape(inputs[0]->GetShape()); + tensors.push_back(new TosaSerializationTensor(input0Name, inputShape0, inputDType, {})); + } + if(input1Name.find("input_") != std::string::npos) + { + std::vector<int32_t> inputShape1 = GetTosaTensorShape(inputs[1]->GetShape()); + tensors.push_back(new TosaSerializationTensor(input1Name, inputShape1, inputDType, {})); + } + + std::string input0TransposeName = input0Name; + std::string input1TransposeName = input1Name; + std::vector<int32_t> outputShape0 = GetTosaTensorShape(outputs[0]->GetShape()); + + std::string input0MatMulName = input0Name; + std::string input1MatMulName = input1Name; + + // *** ADD OP STEPS *** + + // ADD a RESHAPE OPs if BATCH DIMS > 1 + // RESHAPE input 1 + std::vector<int32_t> targetShape0 = GetTosaTensorShape(outputs[0]->GetShape()); + std::vector<int32_t> transpose0Shape = GetTosaTensorShape(inputs[0]->GetShape()); + uint32_t input0Dimensions = inputs[0]->GetNumDimensions(); + if (input0Dimensions > 3) + { + uint32_t x = 1; + for (uint32_t i = 0; i < (input0Dimensions - 2); ++i) + { + x *=(inputs[0]->GetShape()[i]); + } + + targetShape0 = {static_cast<int32_t>(x), + static_cast<int32_t>(inputs[0]->GetShape()[input0Dimensions - 2]), + static_cast<int32_t>(inputs[0]->GetShape()[input0Dimensions - 1])}; + + TosaReshapeAttribute attribute(targetShape0); + + auto* input0ReshapeOp = new TosaSerializationOperator(Op_RESHAPE, + Attribute_ReshapeAttribute, + &attribute, + {input0Name}, + {outputReshape0Name}); + + operators.push_back(input0ReshapeOp); + transpose0Shape = targetShape0; + tensors.push_back(new TosaSerializationTensor(outputReshape0Name, targetShape0, inputDType, {})); + input0TransposeName = outputReshape0Name; + input0MatMulName = outputReshape0Name; + } + + // RESHAPE input 2 + std::vector<int32_t> targetShape1 = GetTosaTensorShape(outputs[0]->GetShape()); + std::vector<int32_t> transpose1Shape = GetTosaTensorShape(inputs[1]->GetShape()); + uint32_t input1Dimensions = inputs[1]->GetNumDimensions(); + if (input1Dimensions > 3) + { + uint32_t x = 1; + for (uint32_t i = 0; i < (input1Dimensions - 2); i++) + { + x *= (inputs[1]->GetShape()[i]); + } + + targetShape1 = {static_cast<int32_t>(x), + static_cast<int32_t>(inputs[1]->GetShape()[input1Dimensions - 2]), + static_cast<int32_t>(inputs[1]->GetShape()[input1Dimensions - 1])}; + + TosaReshapeAttribute attribute(targetShape1); + + auto* input1ReshapeOp = new TosaSerializationOperator(Op_RESHAPE, + Attribute_ReshapeAttribute, + &attribute, + {input1Name}, + {outputReshape1Name}); + + operators.push_back(input1ReshapeOp); + transpose1Shape = targetShape1; + tensors.push_back(new TosaSerializationTensor(outputReshape1Name, targetShape1, inputDType, {})); + input1TransposeName = outputReshape1Name; + input1MatMulName = outputReshape1Name; + } + bool needsReshape = input0Dimensions > 3 || input1Dimensions > 3; + + // ADD a TRANSPOSE OP for one/both inputs if transpose set to true + if (descriptor->m_TransposeX) + { + auto permuteVec = BatchMatMulDescriptor::GetPermuteVec(descriptor->m_DataLayoutX, + inputs[0]->GetShape()); + + std::vector<int32_t> mappings(permuteVec.begin(), + permuteVec.end()); + TosaTransposeAttribute transposeAttribute(mappings); + + TosaSerializationOperator *transposeOp = new TosaSerializationOperator(Op_TRANSPOSE, + Attribute_TransposeAttribute, + &transposeAttribute, + {input0TransposeName}, + {outputTranspose0Name}); + operators.push_back(transposeOp); + tensors.push_back(new TosaSerializationTensor(outputTranspose0Name, transpose0Shape, inputDType, {})); + input0MatMulName = outputTranspose0Name; + } + + if (descriptor->m_TransposeY) + { + auto permuteVec = BatchMatMulDescriptor::GetPermuteVec(descriptor->m_DataLayoutY, + inputs[1]->GetShape()); + + + std::vector<int32_t> mappings(permuteVec.begin(), + permuteVec.end()); + TosaTransposeAttribute transposeAttribute(mappings); + + TosaSerializationOperator *transposeOp = new TosaSerializationOperator(Op_TRANSPOSE, + Attribute_TransposeAttribute, + &transposeAttribute, + {input1TransposeName}, + {outputTranspose1Name}); + operators.push_back(transposeOp); + tensors.push_back(new TosaSerializationTensor(outputTranspose1Name, transpose1Shape, inputDType, {})); + input1MatMulName = outputTranspose1Name; + } + + // ADD MAT MUL layer + std::string matMulOutputStr = needsReshape || isInputInt8 || isInputInt16 ? + std::string("intermediate2_") + GetUniqueTosaMappingID() : outputName; + + TosaMatMulAttribute matMulAttribute(0,0); // input0_zp, input1_zp + DType matMulOutDType = ArmNNToDType(inputs[1]->GetDataType()); + if (isInputInt8) + { + matMulAttribute = TosaMatMulAttribute(inputs[0]->GetQuantizationOffset(), inputs[1]->GetQuantizationOffset()); + matMulOutDType = DType_INT32; + } + if (isInputInt16) + { + matMulAttribute = TosaMatMulAttribute(inputs[0]->GetQuantizationOffset(), inputs[1]->GetQuantizationOffset()); + matMulOutDType = DType_INT48; + } + TosaSerializationOperator* matMulOp = new TosaSerializationOperator(Op_MATMUL, + Attribute_MatMulAttribute, + &matMulAttribute, + {input0MatMulName, input1MatMulName}, + {matMulOutputStr}); + + operators.push_back(matMulOp); + tensors.push_back(new TosaSerializationTensor(matMulOutputStr, targetShape0, matMulOutDType, {})); + + std::string outputRescale = needsReshape ? + std::string("intermediate3_") + GetUniqueTosaMappingID() : outputName; + std::string inputReshape2Name = isInputInt8 || isInputInt16 ? outputRescale : matMulOutputStr; + + // ADD Rescale layer if it is int8 + if (isInputInt8 || isInputInt16) + { + bool scale32 = isInputInt16 ? false : true; + bool doubleRound = isInputInt16 ? false : true; + + double scale_alpha = inputs[0]->GetQuantizationScale() / outputs[0]->GetQuantizationScale(); + int32_t input_zp = inputs[0]->GetQuantizationOffset(); + int32_t output_zp = outputs[0]->GetQuantizationOffset(); + + TosaSerializationOperator* rescaleOp = nullptr; + CreateRescaleTosaOperator(matMulOutputStr, + outputRescale, + scale_alpha, + input_zp, + output_zp, + doubleRound, + scale32, + &rescaleOp); + + tensors.push_back(new TosaSerializationTensor(outputRescale, + targetShape0, + inputDType, {})); + + operators.push_back(rescaleOp); + } + + // ADD a RESHAPE back to expected rank + if (needsReshape) + { + const std::vector<int32_t>& targetShape = GetTosaTensorShape(TensorShape(outputs[0]->GetShape())); + TosaReshapeAttribute attribute(targetShape); + + auto* outputReshapeOp = new TosaSerializationOperator(Op_RESHAPE, + Attribute_ReshapeAttribute, + &attribute, + {inputReshape2Name}, + {outputName}); + + operators.push_back(outputReshapeOp); + tensors.push_back(new TosaSerializationTensor(outputName, targetShape, inputDType, {})); + } + + return new TosaSerializationBasicBlock(blockName, // name + mainName, // region name + {operators}, // operators + tensors, // tensors + {input0Name, input1Name}, // inputs + {outputName}); // outputs +} + diff --git a/src/backends/tosaCommon/operatorMappings/BatchMatMulOperator.hpp b/src/backends/tosaCommon/operatorMappings/BatchMatMulOperator.hpp new file mode 100644 index 0000000000..0efd76f6f3 --- /dev/null +++ b/src/backends/tosaCommon/operatorMappings/BatchMatMulOperator.hpp @@ -0,0 +1,17 @@ +// +// Copyright © 2024 Arm Ltd and Contributors. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include "TosaOperatorUtils.hpp" + +using namespace armnn; +using namespace tosa; + +TosaSerializationBasicBlock* ConvertBatchMatMulToTosaOperator(const Layer* layer, + const std::vector<const TensorInfo*>& inputs, + const std::vector<const TensorInfo*>& outputs, + const BatchMatMulDescriptor* + descriptor = nullptr);
\ No newline at end of file diff --git a/src/backends/tosaCommon/operatorMappings/CMakeLists.txt b/src/backends/tosaCommon/operatorMappings/CMakeLists.txt index bd86958de1..40091a7896 100644 --- a/src/backends/tosaCommon/operatorMappings/CMakeLists.txt +++ b/src/backends/tosaCommon/operatorMappings/CMakeLists.txt @@ -8,8 +8,12 @@ list(APPEND armnnTosaBackendOperators_sources LeakyReluOperator.cpp ReluOperator.hpp ReluOperator.cpp + GeluOperator.hpp + GeluOperator.cpp AvgPool2DIgnoreValueOperator.hpp AvgPool2DIgnoreValueOperator.cpp + BatchMatMulOperator.hpp + BatchMatMulOperator.cpp ConcatOperator.hpp ConcatOperator.cpp ConstantOperator.hpp @@ -20,12 +24,22 @@ list(APPEND armnnTosaBackendOperators_sources DepthwiseConv2dOperator.cpp ElementwiseBinaryOperator.hpp ElementwiseBinaryOperator.cpp - ElementwiseUnaryOperator.cpp - ElementwiseUnaryOperator.hpp + FullyConnectedOperator.cpp + FullyConnectedOperator.hpp + RsqrtOperator.cpp + RsqrtOperator.hpp + ExpOperator.cpp + ExpOperator.hpp + LogOperator.cpp + LogOperator.hpp + PadOperator.cpp + PadOperator.hpp Pooling2DOperator.hpp Pooling2DOperator.cpp QuantizeOperator.hpp QuantizeOperator.cpp + ReduceOperator.hpp + ReduceOperator.cpp ReshapeOperator.hpp ReshapeOperator.cpp ResizeOperator.hpp diff --git a/src/backends/tosaCommon/operatorMappings/ElementwiseUnaryOperator.hpp b/src/backends/tosaCommon/operatorMappings/ElementwiseUnaryOperator.hpp deleted file mode 100644 index 635abd6f3c..0000000000 --- a/src/backends/tosaCommon/operatorMappings/ElementwiseUnaryOperator.hpp +++ /dev/null @@ -1,16 +0,0 @@ -// -// Copyright © 2023 Arm Ltd and Contributors. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#pragma once - -#include "TosaOperatorUtils.hpp" - -using namespace armnn; -using namespace tosa; - -TosaSerializationBasicBlock* ConvertElementwiseUnaryOperator(const Layer* layer, - const std::vector<const TensorInfo*>& inputs, - const std::vector<const TensorInfo*>& outputs, - const ElementwiseUnaryDescriptor* unaryDescriptor); diff --git a/src/backends/tosaCommon/operatorMappings/ExpOperator.cpp b/src/backends/tosaCommon/operatorMappings/ExpOperator.cpp new file mode 100644 index 0000000000..72b309789a --- /dev/null +++ b/src/backends/tosaCommon/operatorMappings/ExpOperator.cpp @@ -0,0 +1,118 @@ +// +// Copyright © 2024 Arm Ltd and Contributors. All rights reserved. +// SPDX-License-Identifier: MIT +// +// +// Copyright © 2020 The TensorFlow Authors. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 +// + +#include "ExpOperator.hpp" +#include "TosaTableUtils.hpp" + +TosaSerializationBasicBlock* ConvertExpOperator(const Layer* layer, + const std::vector<const TensorInfo*>& inputs, + const std::vector<const TensorInfo*>& outputs, + const ElementwiseUnaryDescriptor* unaryDescriptor) +{ + if (unaryDescriptor->m_Operation != UnaryOperation::Exp) + { + throw armnn::Exception("ConvertExpOperator: Unsupported elementwise unary operation in descriptor."); + } + + std::string inputName = std::string("input_"); + std::string outputName = std::string("output0_"); + std::string blockName = std::string("Op_EXP_block_") + GetUniqueTosaMappingID(); + + // If a layer is present then the block will be used for execution, so input and output names need to be determined + // using the previous and following layers so the graph is connected correctly. For validation this doesn't matter. + if(layer != nullptr) + { + inputName = GenerateUniqueInputName(layer->GetInputSlot(0)); + outputName = GenerateUniqueOutputName(*layer); + } + + std::vector<TosaSerializationTensor*> tensors; + std::vector<TosaSerializationOperator*> operators; + + float input_scale = inputs[0]->GetQuantizationScale(); + float output_scale = outputs[0]->GetQuantizationScale(); + int32_t input_zp = inputs[0]->GetQuantizationOffset(); + int32_t output_zp = outputs[0]->GetQuantizationOffset(); + DataType inputDType = inputs[0]->GetDataType(); + if (inputDType == DataType::QAsymmS8 || + inputDType == DataType::QSymmS8) + { + auto exp_func = [](float x) -> float { return std::exp(x); }; + TosaTableAttribute attribute( + getTosaConst8bitTable(input_scale, input_zp, output_scale, output_zp, exp_func)); + operators.push_back(new TosaSerializationOperator(tosa::Op_TABLE, + Attribute_TableAttribute, + &attribute, + {inputName}, + {outputName})); + } + else if (inputDType == DataType::QSymmS16) + { + throw Exception("ConvertExpOperator() unsupported int 16 not implemented yet."); + // The following generates the table, tosa attribute and operator for int16 exponential. + // However, running the int16 EXP EndToEnd test causes incorrect output values. + // At the time of writing the EXP operator there is no requirment for int16 support. + // Points to enable int16 in the future: + // - TOSA specifies EXP int16 input must have int32 output + // - We potentially need a rescale after the int32 EXP output to convert back to int16. + /* + auto exp_func = [](float x) -> float { return std::exp(x); }; + TosaTableAttribute attribute( + getTosaConst16bitTable<float>(input_scale, input_zp, output_scale, output_zp, exp_func)); + operators.push_back(new TosaSerializationOperator(tosa::Op_TABLE, + Attribute_TableAttribute, + &attribute, + {inputName}, + {outputName})); + */ + } + else if (inputDType == DataType::Signed32 || + inputDType == DataType::Signed64) + { + throw Exception( + "ConvertExpOperator() unsupported int 32. Only int 8 and int 16 quantized types are supported."); + } + // Floating point EXP operator + else + { + operators.push_back(new TosaSerializationOperator(tosa::Op_EXP, + Attribute_NONE, + nullptr, + {inputName}, + {outputName})); + } + + // Only add input tensor if connected layer is an input layer. + // As intermediate or constant tensors will be created separately. + // There also can't be duplicate tensor. + if(inputName.find("input_") != std::string::npos) + { + std::vector<int32_t> inputShape0 = GetTosaTensorShape(inputs[0]->GetShape()); + DType inputDType0 = ArmNNToDType(inputDType); + tensors.push_back(new TosaSerializationTensor(inputName, inputShape0, inputDType0, {})); + } + + std::vector<int32_t> outputShape0 = GetTosaTensorShape(outputs[0]->GetShape()); + + // Re-enable below line for int16 EXP support which requires int32 output in TOSA and remove second line. + // DType outputDType0 = + // (inputDType == DataType::QSymmS16) ? DType::DType_INT32 : ArmNNToDType(outputs[0]->GetDataType()); + DType outputDType0 = ArmNNToDType(outputs[0]->GetDataType()); + + tensors.push_back(new TosaSerializationTensor(outputName, outputShape0, outputDType0, {})); + + // operatorInputNames/operatorOutputNames ends up being the same as + // blockInputNames/blockOutputNames for one-to-one ArmNN to Tosa mappings + return new TosaSerializationBasicBlock(blockName, // name + mainName, // region name + operators, // operators + tensors, // tensors + {inputName}, // inputs + {outputName}); // outputs +}
\ No newline at end of file diff --git a/src/backends/tosaCommon/operatorMappings/ExpOperator.hpp b/src/backends/tosaCommon/operatorMappings/ExpOperator.hpp new file mode 100644 index 0000000000..5bdd411149 --- /dev/null +++ b/src/backends/tosaCommon/operatorMappings/ExpOperator.hpp @@ -0,0 +1,16 @@ +// +// Copyright © 2024 Arm Ltd and Contributors. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include "TosaOperatorUtils.hpp" + +using namespace armnn; +using namespace tosa; + +TosaSerializationBasicBlock* ConvertExpOperator(const Layer* layer, + const std::vector<const TensorInfo*>& inputs, + const std::vector<const TensorInfo*>& outputs, + const ElementwiseUnaryDescriptor* unaryDescriptor); diff --git a/src/backends/tosaCommon/operatorMappings/FullyConnectedOperator.cpp b/src/backends/tosaCommon/operatorMappings/FullyConnectedOperator.cpp new file mode 100644 index 0000000000..331a6ec54a --- /dev/null +++ b/src/backends/tosaCommon/operatorMappings/FullyConnectedOperator.cpp @@ -0,0 +1,189 @@ +// +// Copyright © 2024 Arm Ltd and Contributors. All rights reserved. +// SPDX-License-Identifier: MIT +// +// Copyright © 2020 The TensorFlow Authors. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 +// + +#include <numeric> +#include "FullyConnectedOperator.hpp" +#include "TosaRescaleOperatorUtils.hpp" + + +// This function is paraphrased from: +// tensorflow/compiler/mlir/tosa/transforms/legalize_tfl.cc from function ConvertTFLFullyConnectedOp +TosaSerializationBasicBlock* ConvertFullyConnectedToTosaOperator(const Layer* layer, + const std::vector<const TensorInfo*>& inputs, + const std::vector<const TensorInfo*>& outputs, + const FullyConnectedDescriptor* fcDescriptor) +{ + std::vector<std::string> inputNames; + std::vector<std::string> fcInputNames; + std::string outputName = std::string("output0_"); + std::string blockName = std::string("Op_FULLY_CONNECTED_block_") + GetUniqueTosaMappingID(); + + DType inputDType0 = ArmNNToDType(inputs[0]->GetDataType()); + DType outputDType0 = ArmNNToDType(outputs[0]->GetDataType()); + + // Set input names for validation purposes only. + if(layer == nullptr) + { + inputNames.emplace_back("input_0"); + inputNames.emplace_back("constant_1"); + if(fcDescriptor->m_BiasEnabled) + { + inputNames.emplace_back("constant_2"); + } + } + // If a layer is present then the block will be used for execution, so input and output names need to be + // determined using the previous and following layers so the graph is connected correctly. + // For validation this doesn't matter. + else + { + // Get the layer connected to the input slot and determine unique tensor names. + for (uint32_t i = 0; i < inputs.size(); ++i) + { + std::string inputName = GenerateUniqueInputName(layer->GetInputSlot(i)); + inputNames.push_back(inputName); + } + + // Determine unique output tensor name. + outputName = GenerateUniqueOutputName(*layer); + } + + std::vector<TosaSerializationTensor*> tensors; + std::vector<TosaSerializationOperator*> operators; + + // Setup input Tensor + // Only add tensor if connected layer is an input layer. + // As intermediate or constant tensors will be created separately. + // There also can't be duplicate tensors. + std::vector<int32_t> inputShape0 = GetTosaTensorShape(inputs[0]->GetShape()); + if(inputNames[0].find("input_") != std::string::npos) + { + tensors.push_back(new TosaSerializationTensor(inputNames[0], inputShape0, inputDType0, {})); + } + + // Only add input tensors if weights and bias are not constant or if running validation. + // Constant tensors will be created in the ConvertConstantToTosaOperator function. + if(!inputs[1]->IsConstant() || layer == nullptr) + { + std::vector<int32_t> inputShape1 = GetTosaTensorShape(inputs[1]->GetShape()); + DType inputDType1 = ArmNNToDType(inputs[1]->GetDataType()); + tensors.push_back(new TosaSerializationTensor(inputNames[1], inputShape1, inputDType1, {})); + } + + if(fcDescriptor->m_BiasEnabled) + { + if(!inputs[2]->IsConstant() || layer == nullptr) + { + std::vector<int32_t> inputShape2 = GetTosaTensorShape(inputs[2]->GetShape()); + DType inputDType2 = ArmNNToDType(inputs[2]->GetDataType()); + tensors.push_back(new TosaSerializationTensor(inputNames[2], inputShape2, inputDType2, {})); + } + } + else + { + // If bias is disabled, create a constant bias of 0 as three inputs are required. + std::string constantName = std::string("constant_") + GetUniqueTosaMappingID(); + + operators.push_back(new TosaSerializationOperator(Op_CONST, Attribute_NONE, nullptr, {}, {constantName})); + + const DType dType = (inputDType0 == DType_INT8) ? DType_INT32 : outputDType0; + std::vector<float> data(outputs[0]->GetShape()[1], 0); + + std::vector<uint8_t> uint8Data; + TosaSerializationHandler::ConvertF32toU8(data, uint8Data); + + tensors.push_back(new TosaSerializationTensor(constantName, + {static_cast<int32_t>(outputs[0]->GetShape()[1])}, + dType, + uint8Data)); + inputNames.emplace_back(constantName); + } + + fcInputNames = inputNames; + + // Set up Reshape operator. TOSA Fully Connected only accepts 2D rank tensors. + if (inputs[0]->GetShape().GetNumDimensions() != 2) + { + uint32_t num_elems = inputs[1]->GetShape()[1]; + uint32_t num_batch = inputs[0]->GetShape().GetNumElements() / num_elems; + + std::string outputReshapeName = std::string("intermediate0_") + GetUniqueTosaMappingID(); + const std::vector<int32_t>& targetShape = {static_cast<int32_t>(num_batch), static_cast<int32_t>(num_elems)}; + TosaReshapeAttribute attribute(GetTosaTensorShape(TensorShape({num_batch, num_elems}))); + + auto* reshapeOp = new TosaSerializationOperator(Op_RESHAPE, + Attribute_ReshapeAttribute, + &attribute, + {inputNames[0]}, + {outputReshapeName}); + operators.push_back(reshapeOp); + + tensors.push_back(new TosaSerializationTensor(outputReshapeName, targetShape, inputDType0, {})); + + fcInputNames[0] = outputReshapeName; + } + + + // Setup Output Tensor + std::vector<int32_t> outputShape0 = {GetTosaTensorShape(outputs[0]->GetShape())}; + std::string fcOutputName; + bool isInputInt8 = (inputDType0 == DType_INT8); + if (isInputInt8) + { + fcOutputName = std::string("intermediate0_") + GetUniqueTosaMappingID(); + tensors.push_back(new TosaSerializationTensor(fcOutputName, outputShape0, DType_INT32, {})); + } + else + { + tensors.push_back(new TosaSerializationTensor(outputName, outputShape0, outputDType0, {})); + } + + // Set up Fully Connected operator + TosaFullyConnectedAttribute attribute(inputs[0]->GetQuantizationOffset(), // input_zp + inputs[1]->GetQuantizationOffset()); // weight_zp + + std::string& fcOutStr = isInputInt8 ? fcOutputName : outputName; + auto* fullyConnected_op = new TosaSerializationOperator(Op_FULLY_CONNECTED, + Attribute_FullyConnectedAttribute, + &attribute, + fcInputNames, + {fcOutStr}); + operators.push_back(fullyConnected_op); + + if (isInputInt8) + { + int32_t output_zp = outputs[0]->GetQuantizationOffset(); + double output_scale = outputs[0]->GetQuantizationScales()[0]; + double input_scale = inputs[0]->GetQuantizationScales()[0]; + const std::vector<float>& weight_scales = inputs[1]->GetQuantizationScales(); + + TosaSerializationOperator* rescaleOp = nullptr; + CreateRescaleTosaOperatorPerChannel(fcOutputName, + outputName, + 0, + output_zp, + true, + true, + input_scale, + output_scale, + weight_scales, + &rescaleOp); + operators.push_back(rescaleOp); + tensors.push_back(new TosaSerializationTensor(outputName, + outputShape0, + DType_INT8, {})); + } + + // operatorInputNames/operatorOutputNames ends up being the same as + // blockInputNames/blockOutputNames for one-to-one ArmNN to TOSA mappings + return new TosaSerializationBasicBlock(blockName, // name + mainName, // region name + operators, // operators + tensors, // tensors + inputNames, // inputs + {outputName}); // outputs +}
\ No newline at end of file diff --git a/src/backends/tosaCommon/operatorMappings/FullyConnectedOperator.hpp b/src/backends/tosaCommon/operatorMappings/FullyConnectedOperator.hpp new file mode 100644 index 0000000000..12f888d01c --- /dev/null +++ b/src/backends/tosaCommon/operatorMappings/FullyConnectedOperator.hpp @@ -0,0 +1,16 @@ +// +// Copyright © 2024 Arm Ltd and Contributors. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include "TosaOperatorUtils.hpp" + +using namespace armnn; +using namespace tosa; + +TosaSerializationBasicBlock* ConvertFullyConnectedToTosaOperator(const Layer* layer, + const std::vector<const TensorInfo*>& inputs, + const std::vector<const TensorInfo*>& outputs, + const FullyConnectedDescriptor* fcDescriptor); diff --git a/src/backends/tosaCommon/operatorMappings/GeluOperator.cpp b/src/backends/tosaCommon/operatorMappings/GeluOperator.cpp new file mode 100644 index 0000000000..9dd4f2ebc7 --- /dev/null +++ b/src/backends/tosaCommon/operatorMappings/GeluOperator.cpp @@ -0,0 +1,108 @@ +// +// Copyright © 2024 Arm Ltd and Contributors. All rights reserved. +// SPDX-License-Identifier: MIT +// +// +// Copyright © 2020 The TensorFlow Authors. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 +// + +#include "GeluOperator.hpp" +#include "TosaTableUtils.hpp" + +#include <layers/ActivationLayer.hpp> + +// This function is paraphrased from: +// tensorflow/compiler/mlir/tosa/transforms/legalize_tfl.cc from function ConvertTFLGeluOp +TosaSerializationBasicBlock* ConvertGeluToTosaOperator(const Layer* layer, + const std::vector<const TensorInfo*>& inputs, + const std::vector<const TensorInfo*>& outputs, + const ActivationDescriptor* desc) +{ + if (inputs.size() != 1) + { + throw armnn::Exception("ConvertGeluToTosaOperator: 1 input tensors required."); + } + + if (outputs.size() != 1) + { + throw armnn::Exception("ConvertGeluToTosaOperator: 1 output tensor required."); + } + + if (desc->m_Function != ActivationFunction::Gelu) + { + throw armnn::Exception("ConvertGeluToTosaOperator ActivationDescriptor only supports function Gelu."); + } + + std::string inputName = std::string("input_"); + std::string outputName = std::string("output0_"); + std::string blockName = std::string("Op_GELU_block_") + GetUniqueTosaMappingID(); + + // If a layer is present then the block will be used for execution, so input and output names need to be determined + // using the previous and following layers so the graph is connected correctly. For validation this doesn't matter. + if (layer != nullptr) + { + inputName = GenerateUniqueInputName(layer->GetInputSlot(0)); + outputName = GenerateUniqueOutputName(*layer); + } + + std::vector<TosaSerializationTensor*> tensors; + std::vector<TosaSerializationOperator*> operators; + + float input_scale = inputs[0]->GetQuantizationScale(); + float output_scale = outputs[0]->GetQuantizationScale(); + int32_t input_zp = inputs[0]->GetQuantizationOffset(); + int32_t output_zp = outputs[0]->GetQuantizationOffset(); + DataType inputDType = inputs[0]->GetDataType(); + + bool isInt8 = inputDType == DataType::QAsymmS8 || inputDType == DataType::QSymmS8; + if (isInt8) + { + auto gelu_transform = [](float in) -> float { + return 0.5f * in * std::erfc(in * static_cast<float>(-0.70710678118654752440)); + }; + + TosaTableAttribute attribute( + getTosaConst8bitTable(input_scale, input_zp, output_scale, output_zp, gelu_transform)); + operators.push_back(new TosaSerializationOperator(tosa::Op_TABLE, + Attribute_TableAttribute, + &attribute, + {inputName}, + {outputName})); + } + else if (inputDType == DataType::QSymmS16 || + inputDType == DataType::Signed32 || + inputDType == DataType::Signed64) + { + throw Exception("ConvertGeluOperator() only supports int8 quantized types."); + } + else + { + throw Exception("ConvertGeluOperator() floating point types currently unimplemented."); + } + + // Only add input tensors if connected layer is an input layer. + // As intermediate or constant tensors will be created separately. + // There also can't be duplicate tensor. + std::vector<int32_t> inputShape0; + DType inputDType0 = DType::DType_UNKNOWN; + if(inputName.find("input_") != std::string::npos) + { + inputShape0 = GetTosaTensorShape(inputs[0]->GetShape()); + inputDType0 = ArmNNToDType(inputs[0]->GetDataType()); + tensors.push_back(new TosaSerializationTensor(inputName, inputShape0, inputDType0, {})); + } + + std::vector<int32_t> outputShape0 = GetTosaTensorShape(outputs[0]->GetShape()); + DType outputDType0 = ArmNNToDType(outputs[0]->GetDataType()); + tensors.push_back(new TosaSerializationTensor(outputName, outputShape0, outputDType0, {})); + + // operatorInputNames/operatorOutputNames ends up being the same as + // blockInputNames/blockOutputNames for one-to-one ArmNN to Tosa mappings + return new TosaSerializationBasicBlock(blockName, // name + mainName, // region name + operators, // operators + tensors, // tensors + {inputName}, // inputs + {outputName}); // outputs +} diff --git a/src/backends/tosaCommon/operatorMappings/GeluOperator.hpp b/src/backends/tosaCommon/operatorMappings/GeluOperator.hpp new file mode 100644 index 0000000000..30db68f114 --- /dev/null +++ b/src/backends/tosaCommon/operatorMappings/GeluOperator.hpp @@ -0,0 +1,20 @@ +// +// Copyright © 2024 Arm Ltd and Contributors. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include <Layer.hpp> + +#include <tosa_serialization_handler.h> + +#include "TosaOperatorUtils.hpp" + +using namespace armnn; +using namespace tosa; + +TosaSerializationBasicBlock* ConvertGeluToTosaOperator(const Layer* layer, + const std::vector<const TensorInfo*>& inputs, + const std::vector<const TensorInfo*>& outputs, + const ActivationDescriptor* activationDescriptor); diff --git a/src/backends/tosaCommon/operatorMappings/LogOperator.cpp b/src/backends/tosaCommon/operatorMappings/LogOperator.cpp new file mode 100644 index 0000000000..846950a000 --- /dev/null +++ b/src/backends/tosaCommon/operatorMappings/LogOperator.cpp @@ -0,0 +1,137 @@ +// +// Copyright © 2024 Arm Ltd and Contributors. All rights reserved. +// SPDX-License-Identifier: MIT +// +// +// Copyright © 2020 The TensorFlow Authors. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 +// + +#include "LogOperator.hpp" +#include "TosaTableUtils.hpp" + +TosaSerializationBasicBlock* ConvertLogOperator(const Layer* layer, + const std::vector<const TensorInfo*>& inputs, + const std::vector<const TensorInfo*>& outputs, + const ElementwiseUnaryDescriptor* unaryDescriptor) +{ + if (unaryDescriptor->m_Operation != UnaryOperation::Log) + { + throw armnn::Exception("ConvertLogOperator: Unsupported elementwise unary operation in descriptor."); + } + + std::string inputName = std::string("input_"); + std::string outputName = std::string("output0_"); + std::string blockName = std::string("Op_LOG_block_") + GetUniqueTosaMappingID(); + + // If a layer is present then the block will be used for execution, so input and output names need to be determined + // using the previous and following layers so the graph is connected correctly. For validation this doesn't matter. + if(layer != nullptr) + { + inputName = GenerateUniqueInputName(layer->GetInputSlot(0)); + outputName = GenerateUniqueOutputName(*layer); + } + + std::vector<TosaSerializationTensor*> tensors; + std::vector<TosaSerializationOperator*> operators; + + float input_scale = inputs[0]->GetQuantizationScale(); + float output_scale = outputs[0]->GetQuantizationScale(); + int32_t input_zp = inputs[0]->GetQuantizationOffset(); + int32_t output_zp = outputs[0]->GetQuantizationOffset(); + DataType inputDType = inputs[0]->GetDataType(); + + if (inputDType == DataType::QAsymmS8 || + inputDType == DataType::QSymmS8) + { + const float output_min = static_cast<float>(-128 - output_zp) * output_scale; + + auto log_func = [&](float x) -> float + { + if (x <= 0.0f) + { + return output_min; + } + return std::log(x); + }; + + TosaTableAttribute attribute( + getTosaConst8bitTable(input_scale, input_zp, output_scale, output_zp, log_func)); + operators.push_back(new TosaSerializationOperator(tosa::Op_TABLE, + Attribute_TableAttribute, + &attribute, + {inputName}, + {outputName})); + } + else if (inputDType == DataType::QSymmS16) + { + throw Exception("ConvertLogOperator() unsupported int 16 not implemented yet."); + // The following generates the table, tosa attribute and operator for int16 log. + // However, running the int16 LOG EndToEnd test causes incorrect output values. + // At the time of writing the LOG operator there is no requirment for int16 support. + // Points to enable int16 in the future: + // - TOSA specifies LOG int16 input must have int32 output + // - We potentially need a rescale after the int32 LOG output to convert back to int16. + /* + const float output_min = (-32768 - output_zp) * static_cast<float>(output_scale); + + auto log_func = [&](float x) -> float { + if (x <= 0.0f) { + return output_min; + } + return std::log(x); + }; + + TosaTableAttribute attribute( + getTosaConst16bitTable<float>(input_scale, input_zp, output_scale, output_zp, log_func)); + operators.push_back(new TosaSerializationOperator(tosa::Op_TABLE, + Attribute_TableAttribute, + &attribute, + {inputName}, + {outputName})); + */ + } + else if (inputDType == DataType::Signed32 || + inputDType == DataType::Signed64) + { + throw Exception( + "ConvertLogOperator() unsupported int 32. Only int 8 and int 16 quantized types are supported."); + } + // Floating point LOG operator + else + { + operators.push_back(new TosaSerializationOperator(tosa::Op_LOG, + Attribute_NONE, + nullptr, + {inputName}, + {outputName})); + } + + // Only add input tensor if connected layer is an input layer. + // As intermediate or constant tensors will be created separately. + // There also can't be duplicate tensor. + if(inputName.find("input_") != std::string::npos) + { + std::vector<int32_t> inputShape0 = GetTosaTensorShape(inputs[0]->GetShape()); + DType inputDType0 = ArmNNToDType(inputDType); + tensors.push_back(new TosaSerializationTensor(inputName, inputShape0, inputDType0, {})); + } + + std::vector<int32_t> outputShape0 = GetTosaTensorShape(outputs[0]->GetShape()); + + // Re-enable below line for int16 LOG support which requires int32 output in TOSA and remove second line. + // DType outputDType0 = + // (inputDType == DataType::QSymmS16) ? DType::DType_INT32 : ArmNNToDType(outputs[0]->GetDataType()); + DType outputDType0 = ArmNNToDType(outputs[0]->GetDataType()); + + tensors.push_back(new TosaSerializationTensor(outputName, outputShape0, outputDType0, {})); + + // operatorInputNames/operatorOutputNames ends up being the same as + // blockInputNames/blockOutputNames for one-to-one ArmNN to Tosa mappings + return new TosaSerializationBasicBlock(blockName, // name + mainName, // region name + operators, // operators + tensors, // tensors + {inputName}, // inputs + {outputName}); // outputs +}
\ No newline at end of file diff --git a/src/backends/tosaCommon/operatorMappings/LogOperator.hpp b/src/backends/tosaCommon/operatorMappings/LogOperator.hpp new file mode 100644 index 0000000000..5946d8d621 --- /dev/null +++ b/src/backends/tosaCommon/operatorMappings/LogOperator.hpp @@ -0,0 +1,16 @@ +// +// Copyright © 2024 Arm Ltd and Contributors. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include "TosaOperatorUtils.hpp" + +using namespace armnn; +using namespace tosa; + +TosaSerializationBasicBlock* ConvertLogOperator(const Layer* layer, + const std::vector<const TensorInfo*>& inputs, + const std::vector<const TensorInfo*>& outputs, + const ElementwiseUnaryDescriptor* unaryDescriptor); diff --git a/src/backends/tosaCommon/operatorMappings/PadOperator.cpp b/src/backends/tosaCommon/operatorMappings/PadOperator.cpp new file mode 100644 index 0000000000..c82dcb05a5 --- /dev/null +++ b/src/backends/tosaCommon/operatorMappings/PadOperator.cpp @@ -0,0 +1,70 @@ +// +// Copyright © 2024 Arm Ltd and Contributors. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "PadOperator.hpp" +#include <armnnUtils/QuantizeHelper.hpp> + +TosaSerializationBasicBlock* ConvertPadToTosaOperator(const Layer* layer, + const std::vector<const TensorInfo*>& inputs, + const std::vector<const TensorInfo*>& outputs, + const PadDescriptor* padDescriptor) +{ + std::string inputName = std::string("input_"); + std::string outputName = std::string("output0_"); + std::string blockName = std::string("Op_PAD_block_") + GetUniqueTosaMappingID(); + + // If a layer is present then the block will be used for execution, so input and output names need to be determined + // using the previous and following layers so the graph is connected correctly. For validation this doesn't matter. + if(layer != nullptr) + { + inputName = GenerateUniqueInputName(layer->GetInputSlot(0)); + outputName = GenerateUniqueOutputName(*layer); + } + + std::vector<int32_t> padding; + padding.reserve(padDescriptor->m_PadList.size()); + for (size_t it = 0; it < padDescriptor->m_PadList.size(); ++it) { + padding.push_back(static_cast<int32_t>(padDescriptor->m_PadList[it].first)); + padding.push_back(static_cast<int32_t>(padDescriptor->m_PadList[it].second)); + } + + auto intPadValue = armnnUtils::SelectiveQuantize<int32_t>(padDescriptor->m_PadValue, + inputs[0]->GetQuantizationScale(), + inputs[0]->GetQuantizationOffset()); + TosaPadAttribute padAttribute(padding, intPadValue ,padDescriptor->m_PadValue); + + auto* op = new TosaSerializationOperator(Op_PAD, + Attribute_PadAttribute, + &padAttribute, + {inputName}, + {outputName}); + + std::vector<TosaSerializationTensor*> tensors; + + // Only add input tensors if connected layer is an input layer. + // As intermediate or constant tensors will be created separately. + // There also can't be duplicate tensor. + if(inputName.find("input_") != std::string::npos) + { + std::vector<int32_t> inputShape0 = GetTosaTensorShape(inputs[0]->GetShape()); + DType inputDType0 = ArmNNToDType(inputs[0]->GetDataType()); + + tensors.push_back(new TosaSerializationTensor(inputName, inputShape0, inputDType0, {})); + } + + std::vector<int32_t> outputShape0 = GetTosaTensorShape(outputs[0]->GetShape()); + DType outputDType0 = ArmNNToDType(outputs[0]->GetDataType()); + + tensors.push_back(new TosaSerializationTensor(outputName, outputShape0, outputDType0, {})); + + // operatorInputNames/operatorOutputNames ends up being the same as + // blockInputNames/blockOutputNames for one-to-one ArmNN to TOSA mappings + return new TosaSerializationBasicBlock(blockName, // name + mainName, // region name + {op}, // operators + tensors, // tensors + {inputName}, // inputs + {outputName}); // outputs +}
\ No newline at end of file diff --git a/src/backends/tosaCommon/operatorMappings/PadOperator.hpp b/src/backends/tosaCommon/operatorMappings/PadOperator.hpp new file mode 100644 index 0000000000..7844669b9c --- /dev/null +++ b/src/backends/tosaCommon/operatorMappings/PadOperator.hpp @@ -0,0 +1,16 @@ +// +// Copyright © 2024 Arm Ltd and Contributors. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include "TosaOperatorUtils.hpp" + +using namespace armnn; +using namespace tosa; + +TosaSerializationBasicBlock* ConvertPadToTosaOperator(const Layer* layer, + const std::vector<const TensorInfo*>& inputs, + const std::vector<const TensorInfo*>& outputs, + const PadDescriptor* padDescriptor); diff --git a/src/backends/tosaCommon/operatorMappings/QuantizeOperator.cpp b/src/backends/tosaCommon/operatorMappings/QuantizeOperator.cpp index a4d7d0ed28..6c9b565416 100644 --- a/src/backends/tosaCommon/operatorMappings/QuantizeOperator.cpp +++ b/src/backends/tosaCommon/operatorMappings/QuantizeOperator.cpp @@ -90,7 +90,7 @@ TosaSerializationBasicBlock* ConvertQuantizeToTosaOperator(const Layer* layer, tensors.push_back(zeroPointTensor); // const_scale - TosaSerializationOperator *scaleOp = nullptr; + TosaSerializationOperator* scaleOp = nullptr; TosaSerializationTensor* scaleTensor = nullptr; CreateConstTosaOperator<float>(outputNameScale, scale, @@ -138,9 +138,9 @@ TosaSerializationBasicBlock* ConvertQuantizeToTosaOperator(const Layer* layer, } else { - double scale_alpha = inputs[0]->GetQuantizationScale() / outputs[0]->GetQuantizationScale(); - int32_t input_zp = inputs[0]->GetQuantizationOffset(); - int32_t output_zp = outputs[0]->GetQuantizationOffset(); + double scale_alpha = inputs[0]->GetQuantizationScale() / outputs[0]->GetQuantizationScale(); + int32_t input_zp = inputs[0]->GetQuantizationOffset(); + int32_t output_zp = outputs[0]->GetQuantizationOffset(); TosaSerializationOperator* rescaleOp = nullptr; CreateRescaleTosaOperator(inputName, diff --git a/src/backends/tosaCommon/operatorMappings/ReduceOperator.cpp b/src/backends/tosaCommon/operatorMappings/ReduceOperator.cpp new file mode 100644 index 0000000000..7ce51297b0 --- /dev/null +++ b/src/backends/tosaCommon/operatorMappings/ReduceOperator.cpp @@ -0,0 +1,178 @@ +// +// Copyright © 2024 Arm Ltd and Contributors. All rights reserved. +// SPDX-License-Identifier: MIT +// +// Copyright © 2020 The TensorFlow Authors. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 + +#include "ReduceOperator.hpp" + +#include <armnn/TypesUtils.hpp> +#include "TosaRescaleOperatorUtils.hpp" + +TosaSerializationBasicBlock* ConvertReduceToTosaOperator(const Layer* layer, + const std::vector<const TensorInfo*>& inputs, + const std::vector<const TensorInfo*>& outputs, + const ReduceDescriptor* reduceDescriptor) +{ + // Early exits + if (!inputs[0]) + { + throw armnn::Exception("Must provide a valid input tensor."); + } + + if (inputs[0]->IsQuantized() ^ outputs[0]->IsQuantized()) + { + throw armnn::Exception("Both input and output tensors must be either quantised or non-quantised data types."); + } + + if (reduceDescriptor->m_vAxis.size() > 1) + { + throw armnn::Exception("ConvertReduceOperator: Reduce Operation with multiple axes not implemented."); + } + + if (reduceDescriptor->m_vAxis.empty()) + { + throw armnn::Exception("ConvertReduceOperator: Reduce Operation with empty axis not implemented."); + } + + auto axis = static_cast<int32_t>(reduceDescriptor->m_vAxis[0]); + auto rank = static_cast<int32_t>(inputs[0]->GetNumDimensions()); + + if (axis < 0 || axis >= rank) + { + throw armnn::Exception("Axis value not within range of input shape."); + } + + // Tensor names + std::string inputName = "input_"; + + std::string outputNameRescale1 = "intermediate0_" + GetUniqueTosaMappingID(); + std::string outputNameReduce = "intermediate1_" + GetUniqueTosaMappingID(); + std::string outputNameRescale2 = "intermediate2_" + GetUniqueTosaMappingID(); + + std::string outputName = "output0_"; + + std::string reduceOpName = GetReduceOperationAsCString(reduceDescriptor->m_ReduceOperation); + std::string blockName = "Op_REDUCE_" + reduceOpName + "_block_" + GetUniqueTosaMappingID(); + + std::vector<int32_t> inputShape = GetTosaTensorShape(inputs[0]->GetShape()); + std::vector<int32_t> outputShape = GetTosaTensorShape(outputs[0]->GetShape()); + + if (layer) + { + inputName = GenerateUniqueInputName(layer->GetInputSlot(0)); + outputName = GenerateUniqueOutputName(*layer); + } + + std::vector<TosaSerializationTensor*> tensors; + + DType inputType = ArmNNToDType(inputs[0]->GetDataType()); + + if (inputName.substr(0, 6) == "input_") + { + tensors.emplace_back(new TosaSerializationTensor(inputName, + inputShape, + inputType, + {})); + } + + int32_t input_shift = 20; + + double input_scale = static_cast<double>(1 << input_shift) * inputs[0]->GetQuantizationScale(); + double output_scale = 1.0 / (outputs[0]->GetQuantizationScale() * static_cast<double>(1 << input_shift)); + + int32_t input_zp = 0; + int32_t output_zp = 0; + + std::vector<TosaSerializationOperator*> operators; + + // Conditional RESCALE + if (inputs[0]->IsQuantized()) + { + TosaSerializationOperator* rescaleOp1 = nullptr; + + CreateRescaleTosaOperator(inputName, + outputNameRescale1, + input_scale, + input_zp, + output_zp, + true, + true, + &rescaleOp1); + + operators.emplace_back(rescaleOp1); + + tensors.emplace_back(new TosaSerializationTensor(outputNameRescale1, + inputShape, + DType_INT32, + {})); + } + + // REDUCE + TosaAxisAttribute reduceAttribute(axis); + + switch(reduceDescriptor->m_ReduceOperation) + { + case ReduceOperation::Sum: + operators.emplace_back(new TosaSerializationOperator(Op_REDUCE_SUM, + Attribute_AxisAttribute, + &reduceAttribute, + { tensors.back()->GetName() }, + { outputNameReduce })); + break; + default: + throw armnn::Exception("ConvertReduceOperator: Reduce Operation not implemented."); + } + + std::vector<int32_t>& outputShapeReduce = inputShape; + outputShapeReduce[reduceDescriptor->m_vAxis[0]] = 1; + + tensors.emplace_back(new TosaSerializationTensor(outputNameReduce, + outputShapeReduce, + tensors.back()->GetDtype(), + {})); + + // Conditional RESCALE + if (inputs[0]->IsQuantized()) + { + TosaSerializationOperator* rescaleOp2 = nullptr; + + CreateRescaleTosaOperator(outputNameReduce, + outputNameRescale2, + output_scale, + output_zp, + input_zp, + true, + true, + &rescaleOp2); + + operators.push_back(rescaleOp2); + + tensors.emplace_back(new TosaSerializationTensor(outputNameRescale2, + outputShapeReduce, + inputType, + {})); + } + + // RESHAPE + TosaReshapeAttribute reshapeAttribute(GetTosaTensorShape(outputs[0]->GetShape())); + + operators.emplace_back(new TosaSerializationOperator(Op_RESHAPE, + Attribute_ReshapeAttribute, + &reshapeAttribute, + { tensors.back()->GetName() }, + { outputName })); + + tensors.emplace_back(new TosaSerializationTensor(outputName, + outputShape, + inputType, + {})); + + return new TosaSerializationBasicBlock(blockName, // name + mainName, // region name + operators, // operators + tensors, // tensors + { inputName }, // inputs + { outputName }); // outputs +} diff --git a/src/backends/tosaCommon/operatorMappings/ReduceOperator.hpp b/src/backends/tosaCommon/operatorMappings/ReduceOperator.hpp new file mode 100644 index 0000000000..cbbe297d40 --- /dev/null +++ b/src/backends/tosaCommon/operatorMappings/ReduceOperator.hpp @@ -0,0 +1,13 @@ +// +// Copyright © 2024 Arm Ltd and Contributors. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include "TosaOperatorUtils.hpp" + +TosaSerializationBasicBlock* ConvertReduceToTosaOperator(const Layer* layer, + const std::vector<const TensorInfo*>& inputs, + const std::vector<const TensorInfo*>& outputs, + const ReduceDescriptor* reduceDescriptor); diff --git a/src/backends/tosaCommon/operatorMappings/ReluOperator.cpp b/src/backends/tosaCommon/operatorMappings/ReluOperator.cpp index bd1a59670e..541b39cd8d 100644 --- a/src/backends/tosaCommon/operatorMappings/ReluOperator.cpp +++ b/src/backends/tosaCommon/operatorMappings/ReluOperator.cpp @@ -17,7 +17,7 @@ TosaSerializationBasicBlock* ConvertReluToTosaOperator(const Layer* layer, const std::vector<const TensorInfo*>& inputs, const std::vector<const TensorInfo*>& outputs, - const ActivationDescriptor*) + const ActivationDescriptor* desc) { if (inputs.size() != 1) { @@ -31,7 +31,36 @@ TosaSerializationBasicBlock* ConvertReluToTosaOperator(const Layer* layer, std::string inputName = std::string("input_"); std::string outputName = std::string("output0_"); - std::string blockName = std::string("Op_RELU_block_") + GetUniqueTosaMappingID(); + std::string blockName = ""; + + int32_t clamp_min = 0; + int32_t clamp_max = 0; + float float_max = 0.0f; + switch (desc->m_Function) + { + case ActivationFunction::ReLu: + { + clamp_max = std::numeric_limits<int32_t>::max(); + float_max = std::numeric_limits<float>::max(); + blockName = std::string("Op_RELU_block_") + GetUniqueTosaMappingID(); + break; + } + case ActivationFunction::BoundedReLu: + { + clamp_max = static_cast<int32_t>(desc->m_A); + float_max = desc->m_A; + blockName = std::string("Op_BOUNDED_RELU_block_") + GetUniqueTosaMappingID(); + break; + } + case ActivationFunction::LeakyReLu: + { + throw Exception("LeakyRelu TOSA mappings are performed in ConvertLeakyReluToTosaOperator()."); + } + default: + { + throw Exception("Activation function is not supported in ConvertReluToTosaOperator()."); + } + } // If a layer is present then the block will be used for execution, so input and output names need to be determined // using the previous and following layers so the graph is connected correctly. For validation this doesn't matter. @@ -60,8 +89,6 @@ TosaSerializationBasicBlock* ConvertReluToTosaOperator(const Layer* layer, DType outputDType0 = ArmNNToDType(outputs[0]->GetDataType()); tensors.push_back(new TosaSerializationTensor(outputName, outputShape0, outputDType0, {})); - int32_t clamp_min = 0; - int32_t clamp_max = std::numeric_limits<int32_t>::max(); std::string clampInputNameStr = inputName; if (inputDType0 == tosa::DType::DType_INT8 || inputDType0 == tosa::DType::DType_INT16) { @@ -72,18 +99,26 @@ TosaSerializationBasicBlock* ConvertReluToTosaOperator(const Layer* layer, int32_t input_zp = inputs[0]->GetQuantizationOffset(); int32_t output_zp = outputs[0]->GetQuantizationOffset(); - clamp_min = outputs[0]->GetQuantizationOffset(); + clamp_min = output_zp; + + if (desc->m_Function == ActivationFunction::BoundedReLu) + { + clamp_max = static_cast<int32_t>(std::round(desc->m_A / outputs[0]->GetQuantizationScale())) + output_zp; + } + if (inputDType0 == tosa::DType::DType_INT8) { clamp_min = clamp_min < std::numeric_limits<int8_t>::min() ? std::numeric_limits<int8_t>::min() : clamp_min; - clamp_max = std::numeric_limits<int8_t>::max(); + clamp_max = + clamp_max > std::numeric_limits<int8_t>::max() ? std::numeric_limits<int8_t>::max() : clamp_max; } else { clamp_min = clamp_min < std::numeric_limits<int16_t>::min() ? std::numeric_limits<int16_t>::min() : clamp_min; - clamp_max = std::numeric_limits<int16_t>::max(); + clamp_max = + clamp_max > std::numeric_limits<int16_t>::max() ? std::numeric_limits<int16_t>::max() : clamp_max; } TosaSerializationOperator* rescaleOp = nullptr; @@ -101,8 +136,8 @@ TosaSerializationBasicBlock* ConvertReluToTosaOperator(const Layer* layer, inputDType0, {})); } - - TosaClampAttribute attribute(clamp_min, clamp_max, 0, std::numeric_limits<float>::max()); + + TosaClampAttribute attribute(clamp_min, clamp_max, 0, float_max); auto* clamp_op = new TosaSerializationOperator(Op_CLAMP, Attribute_ClampAttribute, &attribute, diff --git a/src/backends/tosaCommon/operatorMappings/ResizeOperator.hpp b/src/backends/tosaCommon/operatorMappings/ResizeOperator.hpp index 881e7c79ad..502fa7af09 100644 --- a/src/backends/tosaCommon/operatorMappings/ResizeOperator.hpp +++ b/src/backends/tosaCommon/operatorMappings/ResizeOperator.hpp @@ -1,5 +1,5 @@ // -// Copyright © 2023 Arm Ltd and Contributors. All rights reserved. +// Copyright © 2023-2024 Arm Ltd and Contributors. All rights reserved. // SPDX-License-Identifier: MIT // @@ -10,7 +10,7 @@ using namespace armnn; using namespace tosa; -TosaSerializationBasicBlock* ConvertResizeToTosaOperator(const Layer* inputSize, - const std::vector<const TensorInfo*>& outputSize, - const std::vector<const TensorInfo*>& scale_n, - const ResizeDescriptor* scale_d); +TosaSerializationBasicBlock* ConvertResizeToTosaOperator(const Layer* layer, + const std::vector<const TensorInfo*>& inputs, + const std::vector<const TensorInfo*>& outputs, + const ResizeDescriptor* resizeDescriptor); diff --git a/src/backends/tosaCommon/operatorMappings/ElementwiseUnaryOperator.cpp b/src/backends/tosaCommon/operatorMappings/RsqrtOperator.cpp index d0eac0b4f4..cc1c70a663 100644 --- a/src/backends/tosaCommon/operatorMappings/ElementwiseUnaryOperator.cpp +++ b/src/backends/tosaCommon/operatorMappings/RsqrtOperator.cpp @@ -3,17 +3,21 @@ // SPDX-License-Identifier: MIT // -#include "ElementwiseUnaryOperator.hpp" +#include "RsqrtOperator.hpp" -TosaSerializationBasicBlock* ConvertElementwiseUnaryOperator(const Layer* layer, - const std::vector<const TensorInfo*>& inputs, - const std::vector<const TensorInfo*>& outputs, - const ElementwiseUnaryDescriptor* unaryDescriptor) +TosaSerializationBasicBlock* ConvertRsqrtOperator(const Layer* layer, + const std::vector<const TensorInfo*>& inputs, + const std::vector<const TensorInfo*>& outputs, + const ElementwiseUnaryDescriptor* unaryDescriptor) { + if (unaryDescriptor->m_Operation != UnaryOperation::Rsqrt) + { + throw armnn::Exception("ConvertRsqrtOperator: Unsupported elementwise unary operation in descriptor."); + } + std::string input0Name = std::string("input_"); std::string outputName = std::string("output0_"); - std::string blockName = std::string("Op_ELEMENTWISEUNARY_block_") + GetUniqueTosaMappingID(); - + std::string blockName = std::string("Op_RSQRT_block_") + GetUniqueTosaMappingID(); // If a layer is present then the block will be used for execution, so input and output names need to be determined // using the previous and following layers so the graph is connected correctly. For validation this doesn't matter. @@ -23,22 +27,11 @@ TosaSerializationBasicBlock* ConvertElementwiseUnaryOperator(const Layer* layer, outputName = GenerateUniqueOutputName(*layer); } - TosaSerializationOperator* op = nullptr; - switch(unaryDescriptor->m_Operation) - { - case UnaryOperation::Rsqrt: - { - op = new TosaSerializationOperator(tosa::Op_RSQRT, - Attribute_NONE, - nullptr, - {input0Name}, - {outputName}); - blockName = std::string("Op_RSQRT_block_") + GetUniqueTosaMappingID(); - break; - } - default: - throw armnn::Exception("ConvertElementwiseUnaryToTosaOperator: Unsupported layer type."); - } + auto* op = new TosaSerializationOperator(tosa::Op_RSQRT, + Attribute_NONE, + nullptr, + {input0Name}, + {outputName}); std::vector<TosaSerializationTensor*> tensors; // Only add input tensor if connected layer is an input layer. diff --git a/src/backends/tosaCommon/operatorMappings/RsqrtOperator.hpp b/src/backends/tosaCommon/operatorMappings/RsqrtOperator.hpp new file mode 100644 index 0000000000..7804e91598 --- /dev/null +++ b/src/backends/tosaCommon/operatorMappings/RsqrtOperator.hpp @@ -0,0 +1,16 @@ +// +// Copyright © 2023 Arm Ltd and Contributors. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include "TosaOperatorUtils.hpp" + +using namespace armnn; +using namespace tosa; + +TosaSerializationBasicBlock* ConvertRsqrtOperator(const Layer* layer, + const std::vector<const TensorInfo*>& inputs, + const std::vector<const TensorInfo*>& outputs, + const ElementwiseUnaryDescriptor* unaryDescriptor); diff --git a/src/backends/tosaCommon/operatorMappings/TosaCommonOperators.hpp b/src/backends/tosaCommon/operatorMappings/TosaCommonOperators.hpp index 9d7ff1e4c9..5e1d3ff4ff 100644 --- a/src/backends/tosaCommon/operatorMappings/TosaCommonOperators.hpp +++ b/src/backends/tosaCommon/operatorMappings/TosaCommonOperators.hpp @@ -6,19 +6,26 @@ #pragma once #include "ReluOperator.hpp" +#include "GeluOperator.hpp" #include "LeakyReluOperator.hpp" #include "AvgPool2DIgnoreValueOperator.hpp" +#include "BatchMatMulOperator.hpp" #include "ConcatOperator.hpp" #include "ConstantOperator.hpp" #include "Conv2dOperator.hpp" #include "DepthwiseConv2dOperator.hpp" #include "ElementwiseBinaryOperator.hpp" -#include "ElementwiseUnaryOperator.hpp" +#include "FullyConnectedOperator.hpp" +#include "PadOperator.hpp" +#include "RsqrtOperator.hpp" #include "Pooling2DOperator.hpp" #include "QuantizeOperator.hpp" +#include "ReduceOperator.hpp" #include "ReshapeOperator.hpp" #include "ResizeOperator.hpp" #include "SliceOperator.hpp" #include "SplitOperator.hpp" #include "TransposeConv2dOperator.hpp" #include "TransposeOperator.hpp" +#include "ExpOperator.hpp" +#include "LogOperator.hpp"
\ No newline at end of file diff --git a/src/backends/tosaCommon/operatorMappings/TosaTableUtils.hpp b/src/backends/tosaCommon/operatorMappings/TosaTableUtils.hpp new file mode 100644 index 0000000000..d12f0d0986 --- /dev/null +++ b/src/backends/tosaCommon/operatorMappings/TosaTableUtils.hpp @@ -0,0 +1,96 @@ +// +// Copyright © 2024 Arm Ltd and Contributors. All rights reserved. +// SPDX-License-Identifier: MIT +// +// +// Copyright © 2020 The TensorFlow Authors. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 +// + +#include <cfloat> +#include <vector> +#include <functional> +#include <cstdint> +#include <cmath> + + +// Abstract of getTosaConst8bitTable() function from: +// tensorflow/compiler/mlir/tosa/transforms/legalize_utils.cc +inline std::vector<int16_t> getTosaConst8bitTable(float input_scale, + int32_t input_zp, + float output_scale, + int32_t output_zp, + std::function<float(float)> func) +{ + // TosaTableAttribute requires int16 vector input. However, TOSA TABLE legalizations are performed using int8. + std::vector<int16_t> table; + table.reserve(256); + float inverse_scale = 1.0f / output_scale; + for (int32_t i = -128; i < 128; i++) + { + float dequantized = input_scale * static_cast<float>(i - input_zp); + float transformed = func(dequantized); + + float max = (output_scale > 1.0) ? FLT_MAX : (FLT_MAX * output_scale); + if (transformed >= max) + { + table.push_back(INT8_MAX); + continue; + } + + int32_t rescaled = static_cast<int32_t>(std::round(transformed * inverse_scale)); + int32_t quantized = static_cast<int32_t>(rescaled + output_zp); + table.push_back( + static_cast<int8_t>(std::min(std::max(quantized, -128), 127))); + } + return table; +} + +// Abstract of getTosaConst16bitTable() function from: +// tensorflow/compiler/mlir/tosa/transforms/legalize_utils.cc +template <typename FloatT> +inline std::vector<int16_t> getTosaConst16bitTable(float input_scale, + int32_t input_zp, + float output_scale, + int32_t output_zp, + std::function<FloatT(FloatT)> func) +{ + std::vector<int16_t> table; + table.reserve(513); + + FloatT input_min = + input_scale * static_cast<FloatT>(std::numeric_limits<int16_t>::min() - input_zp); + FloatT input_max = + input_scale * static_cast<FloatT>(std::numeric_limits<int16_t>::max() - input_zp); + FloatT output_min = + output_scale * static_cast<FloatT>(std::numeric_limits<int16_t>::min() - output_zp); + FloatT output_max = + output_scale * static_cast<FloatT>(std::numeric_limits<int16_t>::max() - output_zp); + + FloatT step = (input_max - input_min) / 512; + FloatT half_step = step / 2; + FloatT output_scaling_inv = 65536 / (output_max - output_min); + + for (int32_t i = 0; i < 512; i++) + { + FloatT iFloat = static_cast<FloatT>(i); + FloatT sample_val = + std::round(func(input_min + (iFloat * step)) * output_scaling_inv); + FloatT midpoint_interp_val = std::round( + ((func(input_min + (iFloat + 1) * step) * output_scaling_inv) + + std::round(func(input_min + (iFloat * step)) * output_scaling_inv)) / + 2); + FloatT midpoint_val = std::round(func(input_min + (iFloat * step) + half_step) * + output_scaling_inv); + FloatT midpoint_err = midpoint_interp_val - midpoint_val; + FloatT bias = std::round(midpoint_err / 2); + + table.push_back(static_cast<int16_t>( + std::min<FloatT>(std::max<FloatT>(sample_val - bias, -32768), 32767))); + } + + FloatT max_val = std::round(func(input_max) * output_scaling_inv); + table.push_back(static_cast<int16_t>( + std::min<FloatT>(std::max<FloatT>(max_val, -32768), 32767))); + return table; +}
\ No newline at end of file |