// // Copyright © 2021 Arm Ltd and Contributors. All rights reserved. // SPDX-License-Identifier: MIT // #pragma once #include "TestUtils.hpp" #include #include #include #include #include #include #include #include #include namespace { template std::vector CreateLstmTfLiteModel(tflite::TensorType tensorType, int32_t batchSize, int32_t inputSize, int32_t outputSize, int32_t numUnits, bool hasInputToInputWeights, const std::vector& inputToInputWeights, const std::vector& inputToForgetWeights, const std::vector& inputToCellWeights, const std::vector& inputToOutputWeights, bool hasRecurrentToInputWeights, const std::vector& recurrentToInputWeights, const std::vector& recurrentToForgetWeights, const std::vector& recurrentToCellWeights, const std::vector& recurrentToOutputWeights, bool hasCellToInputWeights, const std::vector& cellToInputWeights, bool hasCellToForgetWeights, const std::vector& cellToForgetWeights, bool hasCellToOutputWeights, const std::vector& cellToOutputWeights, bool hasInputGateBias, const std::vector& inputGateBias, const std::vector& forgetGateBias, const std::vector& cellBias, const std::vector& outputGateBias, bool hasProjectionWeights, const std::vector& projectionWeights, bool hasProjectionBias, const std::vector& projectionBias, bool hasInputLayerNormWeights, const std::vector& inputLayerNormWeights, bool hasForgetLayerNormWeights, const std::vector& forgetLayerNormWeights, bool hasCellLayerNormWeights, const std::vector& cellLayerNormWeights, bool hasOutputLayerNormWeights, const std::vector& outputLayerNormWeights, tflite::ActivationFunctionType activationFunction, float clippingThresCell, float clippingThresProj, float quantScale = 1.0f, int quantOffset = 0, float outputQuantScale = 2.0f, int outputQuantOffset = 0) { std::vector tensorInfo0 {}; std::vector tensorInfo4 {numUnits}; std::vector tensorInfo8 {numUnits, static_cast(2)}; std::vector tensorInfo16 {numUnits, static_cast(4)}; std::vector inputShape {batchSize , inputSize}; std::vector outputShape {batchSize , outputSize}; std::vector outputStateInDimensions{batchSize, outputSize}; std::vector cellStateInDimensions{batchSize, numUnits}; std::vector operatorInputs; using namespace tflite; flatbuffers::FlatBufferBuilder flatBufferBuilder; std::vector> buffers; std::vector> tensors; auto quantizationParameters = CreateQuantizationParameters(flatBufferBuilder, 0, 0, flatBufferBuilder.CreateVector({ quantScale }), flatBufferBuilder.CreateVector({ quantOffset })); auto outputQuantizationParameters = CreateQuantizationParameters(flatBufferBuilder, 0, 0, flatBufferBuilder.CreateVector({ outputQuantScale }), flatBufferBuilder.CreateVector({ outputQuantOffset })); buffers.push_back(CreateBuffer(flatBufferBuilder, flatBufferBuilder.CreateVector({}))); tensors.push_back(CreateTensor(flatBufferBuilder, flatBufferBuilder.CreateVector(inputShape.data(), inputShape.size()), tensorType, buffers.size() - 1, flatBufferBuilder.CreateString("input_0"), quantizationParameters)); operatorInputs.push_back(buffers.size() - 1); if (hasInputToInputWeights) { buffers.push_back( CreateBuffer(flatBufferBuilder, flatBufferBuilder.CreateVector(reinterpret_cast(inputToInputWeights.data()), sizeof(T) * inputToInputWeights.size()))); tensors.push_back(CreateTensor(flatBufferBuilder, flatBufferBuilder.CreateVector(tensorInfo8.data(), tensorInfo8.size()), tensorType, buffers.size() - 1, flatBufferBuilder.CreateString("inputToInputWeights"), outputQuantizationParameters)); operatorInputs.push_back(buffers.size() - 1); } else { operatorInputs.push_back(kTfLiteOptionalTensor); } buffers.push_back( CreateBuffer(flatBufferBuilder, flatBufferBuilder.CreateVector(reinterpret_cast(inputToForgetWeights.data()), sizeof(T) * inputToForgetWeights.size()))); tensors.push_back(CreateTensor(flatBufferBuilder, flatBufferBuilder.CreateVector(tensorInfo8.data(), tensorInfo8.size()), tensorType, buffers.size() - 1, flatBufferBuilder.CreateString("inputToForgetWeights"), outputQuantizationParameters)); operatorInputs.push_back(buffers.size() - 1); buffers.push_back( CreateBuffer(flatBufferBuilder, flatBufferBuilder.CreateVector(reinterpret_cast(inputToCellWeights.data()), sizeof(T) * inputToCellWeights.size()))); tensors.push_back(CreateTensor(flatBufferBuilder, flatBufferBuilder.CreateVector(tensorInfo8.data(), tensorInfo8.size()), tensorType, buffers.size() - 1, flatBufferBuilder.CreateString("inputToCellWeights"), outputQuantizationParameters)); operatorInputs.push_back(buffers.size() - 1); buffers.push_back( CreateBuffer(flatBufferBuilder, flatBufferBuilder.CreateVector(reinterpret_cast(inputToOutputWeights.data()), sizeof(T) * inputToOutputWeights.size()))); tensors.push_back(CreateTensor(flatBufferBuilder, flatBufferBuilder.CreateVector(tensorInfo8.data(), tensorInfo8.size()), tensorType, buffers.size() - 1, flatBufferBuilder.CreateString("inputToOutputWeights"), outputQuantizationParameters)); operatorInputs.push_back(buffers.size() - 1); if (hasRecurrentToInputWeights) { buffers.push_back(CreateBuffer( flatBufferBuilder, flatBufferBuilder.CreateVector(reinterpret_cast(recurrentToInputWeights.data()), sizeof(T) * recurrentToInputWeights.size()))); tensors.push_back(CreateTensor(flatBufferBuilder, flatBufferBuilder.CreateVector(tensorInfo16.data(), tensorInfo16.size()), tensorType, buffers.size() - 1, flatBufferBuilder.CreateString("recurrentToInputWeights"), outputQuantizationParameters)); operatorInputs.push_back(buffers.size() - 1); } else { operatorInputs.push_back(kTfLiteOptionalTensor); } buffers.push_back( CreateBuffer(flatBufferBuilder, flatBufferBuilder.CreateVector(reinterpret_cast(recurrentToForgetWeights.data()), sizeof(T) * recurrentToForgetWeights.size()))); tensors.push_back(CreateTensor(flatBufferBuilder, flatBufferBuilder.CreateVector(tensorInfo16.data(), tensorInfo16.size()), tensorType, buffers.size() - 1, flatBufferBuilder.CreateString("recurrentToForgetWeights"), outputQuantizationParameters)); operatorInputs.push_back(buffers.size() - 1); buffers.push_back( CreateBuffer(flatBufferBuilder, flatBufferBuilder.CreateVector(reinterpret_cast(recurrentToCellWeights.data()), sizeof(T) * recurrentToCellWeights.size()))); tensors.push_back(CreateTensor(flatBufferBuilder, flatBufferBuilder.CreateVector(tensorInfo16.data(), tensorInfo16.size()), tensorType, buffers.size() - 1, flatBufferBuilder.CreateString("recurrentToCellWeights"), outputQuantizationParameters)); operatorInputs.push_back(buffers.size() - 1); buffers.push_back( CreateBuffer(flatBufferBuilder, flatBufferBuilder.CreateVector(reinterpret_cast(recurrentToOutputWeights.data()), sizeof(T) * recurrentToOutputWeights.size()))); tensors.push_back(CreateTensor(flatBufferBuilder, flatBufferBuilder.CreateVector(tensorInfo16.data(), tensorInfo16.size()), tensorType, buffers.size() - 1 , flatBufferBuilder.CreateString("recurrentToOutputWeights"), outputQuantizationParameters)); operatorInputs.push_back(buffers.size() - 1); if (hasCellToInputWeights) { buffers.push_back( CreateBuffer(flatBufferBuilder, flatBufferBuilder.CreateVector(reinterpret_cast(cellToInputWeights.data()), sizeof(T) * cellToInputWeights.size()))); tensors.push_back(CreateTensor(flatBufferBuilder, flatBufferBuilder.CreateVector(tensorInfo4.data(), tensorInfo4.size()), tensorType, buffers.size() - 1, flatBufferBuilder.CreateString("cellToInputWeights"), outputQuantizationParameters)); operatorInputs.push_back(buffers.size() - 1); } else { operatorInputs.push_back(kTfLiteOptionalTensor); } if (hasCellToForgetWeights) { buffers.push_back( CreateBuffer(flatBufferBuilder, flatBufferBuilder.CreateVector(reinterpret_cast(cellToForgetWeights.data()), sizeof(T) * cellToForgetWeights.size()))); tensors.push_back(CreateTensor(flatBufferBuilder, flatBufferBuilder.CreateVector(tensorInfo4.data(), tensorInfo4.size()), tensorType, buffers.size() - 1, flatBufferBuilder.CreateString("cellToForgetWeights"), outputQuantizationParameters)); operatorInputs.push_back(buffers.size() - 1); } else { operatorInputs.push_back(kTfLiteOptionalTensor); } if (hasCellToOutputWeights) { buffers.push_back( CreateBuffer(flatBufferBuilder, flatBufferBuilder.CreateVector(reinterpret_cast(cellToOutputWeights.data()), sizeof(T) * cellToOutputWeights.size()))); tensors.push_back(CreateTensor(flatBufferBuilder, flatBufferBuilder.CreateVector(tensorInfo4.data(), tensorInfo4.size()), tensorType, buffers.size() - 1, flatBufferBuilder.CreateString("cellToOutputWeights"), outputQuantizationParameters)); operatorInputs.push_back(buffers.size() - 1); } else { operatorInputs.push_back(kTfLiteOptionalTensor); } if (hasInputGateBias) { buffers.push_back( CreateBuffer(flatBufferBuilder, flatBufferBuilder.CreateVector(reinterpret_cast(inputGateBias.data()), sizeof(T) * inputGateBias.size()))); tensors.push_back(CreateTensor(flatBufferBuilder, flatBufferBuilder.CreateVector(tensorInfo4.data(), tensorInfo4.size()), tensorType, buffers.size() - 1, flatBufferBuilder.CreateString("inputGateBias"), outputQuantizationParameters)); operatorInputs.push_back(buffers.size() - 1); } else { operatorInputs.push_back(kTfLiteOptionalTensor); } buffers.push_back( CreateBuffer(flatBufferBuilder, flatBufferBuilder.CreateVector(reinterpret_cast(forgetGateBias.data()), sizeof(T) * forgetGateBias.size()))); tensors.push_back(CreateTensor(flatBufferBuilder, flatBufferBuilder.CreateVector(tensorInfo4.data(), tensorInfo4.size()), tensorType, buffers.size() - 1, flatBufferBuilder.CreateString("forgetGateBias"), outputQuantizationParameters)); operatorInputs.push_back(buffers.size() - 1); buffers.push_back( CreateBuffer(flatBufferBuilder, flatBufferBuilder.CreateVector(reinterpret_cast(cellBias.data()), sizeof(T) * cellBias.size()))); tensors.push_back(CreateTensor(flatBufferBuilder, flatBufferBuilder.CreateVector(tensorInfo4.data(), tensorInfo4.size()), tensorType, buffers.size() - 1, flatBufferBuilder.CreateString("cellBias"), outputQuantizationParameters)); operatorInputs.push_back(buffers.size() - 1); buffers.push_back( CreateBuffer(flatBufferBuilder, flatBufferBuilder.CreateVector(reinterpret_cast(outputGateBias.data()), sizeof(T) * outputGateBias.size()))); tensors.push_back(CreateTensor(flatBufferBuilder, flatBufferBuilder.CreateVector(tensorInfo4.data(), tensorInfo4.size()), tensorType, buffers.size() - 1, flatBufferBuilder.CreateString("outputGateBias"), outputQuantizationParameters)); operatorInputs.push_back(buffers.size() - 1); if (hasProjectionWeights) { buffers.push_back( CreateBuffer(flatBufferBuilder, flatBufferBuilder.CreateVector(reinterpret_cast(projectionWeights.data()), sizeof(T) * projectionWeights.size()))); tensors.push_back(CreateTensor(flatBufferBuilder, flatBufferBuilder.CreateVector(tensorInfo4.data(), tensorInfo4.size()), tensorType, buffers.size() - 1, flatBufferBuilder.CreateString("outputGateBias"), outputQuantizationParameters)); operatorInputs.push_back(buffers.size() - 1); } else { operatorInputs.push_back(kTfLiteOptionalTensor); } if (hasProjectionBias) { buffers.push_back( CreateBuffer(flatBufferBuilder, flatBufferBuilder.CreateVector(reinterpret_cast(projectionBias.data()), sizeof(T) * projectionBias.size()))); tensors.push_back(CreateTensor(flatBufferBuilder, flatBufferBuilder.CreateVector(tensorInfo4.data(), tensorInfo4.size()), tensorType, buffers.size() - 1, flatBufferBuilder.CreateString("projectionBias"), outputQuantizationParameters)); operatorInputs.push_back(buffers.size() - 1); } else { operatorInputs.push_back(kTfLiteOptionalTensor); } buffers.push_back(CreateBuffer(flatBufferBuilder, flatBufferBuilder.CreateVector({}))); tensors.push_back(CreateTensor(flatBufferBuilder, flatBufferBuilder.CreateVector(outputStateInDimensions.data(), outputStateInDimensions.size()), tensorType, buffers.size() - 1, flatBufferBuilder.CreateString("outputStateInInfo"), outputQuantizationParameters, true)); operatorInputs.push_back(buffers.size() - 1); buffers.push_back(CreateBuffer(flatBufferBuilder, flatBufferBuilder.CreateVector({}))); tensors.push_back(CreateTensor(flatBufferBuilder, flatBufferBuilder.CreateVector(cellStateInDimensions.data(), cellStateInDimensions.size()), tensorType, buffers.size() - 1, flatBufferBuilder.CreateString("cellStateInInfo"), outputQuantizationParameters, true)); operatorInputs.push_back(buffers.size() - 1); if (hasInputLayerNormWeights) { buffers.push_back( CreateBuffer(flatBufferBuilder, flatBufferBuilder.CreateVector( reinterpret_cast(inputLayerNormWeights.data()), sizeof(T) * inputLayerNormWeights.size()))); tensors.push_back(CreateTensor(flatBufferBuilder, flatBufferBuilder.CreateVector(tensorInfo4.data(), tensorInfo4.size()), tensorType, buffers.size() - 1, flatBufferBuilder.CreateString("inputLayerNormWeights"), outputQuantizationParameters)); operatorInputs.push_back(buffers.size() - 1); } else { operatorInputs.push_back(kTfLiteOptionalTensor); } if (hasForgetLayerNormWeights) { buffers.push_back( CreateBuffer(flatBufferBuilder, flatBufferBuilder.CreateVector( reinterpret_cast(forgetLayerNormWeights.data()), sizeof(T) * forgetLayerNormWeights.size()))); tensors.push_back(CreateTensor(flatBufferBuilder, flatBufferBuilder.CreateVector(tensorInfo4.data(), tensorInfo4.size()), tensorType, buffers.size() - 1, flatBufferBuilder.CreateString("forgetLayerNormWeights"), outputQuantizationParameters)); operatorInputs.push_back(buffers.size() - 1); } else { operatorInputs.push_back(kTfLiteOptionalTensor); } if (hasCellLayerNormWeights) { buffers.push_back( CreateBuffer(flatBufferBuilder, flatBufferBuilder.CreateVector(reinterpret_cast(cellLayerNormWeights.data()), sizeof(T) * cellLayerNormWeights.size()))); tensors.push_back(CreateTensor(flatBufferBuilder, flatBufferBuilder.CreateVector(tensorInfo4.data(), tensorInfo4.size()), tensorType, buffers.size() - 1, flatBufferBuilder.CreateString("cellLayerNormWeights"), outputQuantizationParameters)); operatorInputs.push_back(buffers.size() - 1); } else { operatorInputs.push_back(kTfLiteOptionalTensor); } if (hasOutputLayerNormWeights) { buffers.push_back( CreateBuffer(flatBufferBuilder, flatBufferBuilder.CreateVector( reinterpret_cast(outputLayerNormWeights.data()), sizeof(T) * outputLayerNormWeights.size()))); tensors.push_back(CreateTensor(flatBufferBuilder, flatBufferBuilder.CreateVector(tensorInfo4.data(), tensorInfo4.size()), tensorType, buffers.size() - 1, flatBufferBuilder.CreateString("outputLayerNormWeights"), outputQuantizationParameters)); operatorInputs.push_back(buffers.size() - 1); } else { operatorInputs.push_back(kTfLiteOptionalTensor); } int outputBufferId = buffers.size(); buffers.push_back(CreateBuffer(flatBufferBuilder, flatBufferBuilder.CreateVector({}))); tensors.push_back(CreateTensor(flatBufferBuilder, flatBufferBuilder.CreateVector(outputShape.data(), outputShape.size()), tensorType, outputBufferId, flatBufferBuilder.CreateString("output"), outputQuantizationParameters)); std::vector operatorOutputs; operatorOutputs.push_back(buffers.size() - 1); // create operator tflite::BuiltinOptions operatorBuiltinOptionsType = BuiltinOptions_LSTMOptions; flatbuffers::Offset operatorBuiltinOptions = CreateLSTMOptions(flatBufferBuilder, activationFunction, clippingThresCell, clippingThresProj).Union(); flatbuffers::Offset lstmOperator = CreateOperator(flatBufferBuilder, 0, flatBufferBuilder.CreateVector(operatorInputs.data(), operatorInputs.size()), flatBufferBuilder.CreateVector(operatorOutputs.data(), operatorOutputs.size()), operatorBuiltinOptionsType, operatorBuiltinOptions); flatbuffers::Offset subgraph = CreateSubGraph(flatBufferBuilder, flatBufferBuilder.CreateVector(tensors.data(), tensors.size()), flatBufferBuilder.CreateVector(operatorInputs.data(), operatorInputs.size()), flatBufferBuilder.CreateVector(operatorOutputs.data(), operatorOutputs.size()), flatBufferBuilder.CreateVector(&lstmOperator, 1)); flatbuffers::Offset modelDescription = flatBufferBuilder.CreateString("ArmnnDelegate: LSTM Operator Model"); flatbuffers::Offset operatorCode = CreateOperatorCode(flatBufferBuilder, tflite::BuiltinOperator_LSTM); flatbuffers::Offset flatbufferModel = CreateModel(flatBufferBuilder, TFLITE_SCHEMA_VERSION, flatBufferBuilder.CreateVector(&operatorCode, 1), flatBufferBuilder.CreateVector(&subgraph, 1), modelDescription, flatBufferBuilder.CreateVector(buffers.data(), buffers.size())); flatBufferBuilder.Finish(flatbufferModel); return std::vector(flatBufferBuilder.GetBufferPointer(), flatBufferBuilder.GetBufferPointer() + flatBufferBuilder.GetSize()); } template void LstmTestImpl(std::vector& backends, tflite::TensorType tensorType, int32_t batchSize, int32_t inputSize, int32_t outputSize, int32_t numUnits, bool hasInputToInputWeights, const std::vector& inputToInputWeights, const std::vector& inputToForgetWeights, const std::vector& inputToCellWeights, const std::vector& inputToOutputWeights, bool hasRecurrentToInputWeights, const std::vector& recurrentToInputWeights, const std::vector& recurrentToForgetWeights, const std::vector& recurrentToCellWeights, const std::vector& recurrentToOutputWeights, bool hasCellToInputWeights, const std::vector& cellToInputWeights, bool hasCellToForgetWeights, const std::vector& cellToForgetWeights, bool hasCellToOutputWeights, const std::vector& cellToOutputWeights, bool hasInputGateBias, const std::vector& inputGateBias, const std::vector& forgetGateBias, const std::vector& cellBias, const std::vector& outputGateBias, bool hasProjectionWeights, const std::vector& projectionWeights, bool hasProjectionBias, const std::vector& projectionBias, bool hasInputLayerNormWeights, const std::vector& inputLayerNormWeights, bool hasForgetLayerNormWeights, const std::vector& forgetLayerNormWeights, bool hasCellLayerNormWeights, const std::vector& cellLayerNormWeights, bool hasOutputLayerNormWeights, const std::vector& outputLayerNormWeights, std::vector& inputValues, std::vector& expectedOutputValues, tflite::ActivationFunctionType activationFunction, float clippingThresCell, float clippingThresProj) { using namespace tflite; std::vector modelBuffer = CreateLstmTfLiteModel(tensorType, batchSize, inputSize, outputSize, numUnits, hasInputToInputWeights, inputToInputWeights, inputToForgetWeights, inputToCellWeights, inputToOutputWeights, hasRecurrentToInputWeights, recurrentToInputWeights, recurrentToForgetWeights, recurrentToCellWeights, recurrentToOutputWeights, hasCellToInputWeights, cellToInputWeights, hasCellToForgetWeights, cellToForgetWeights, hasCellToOutputWeights, cellToOutputWeights, hasInputGateBias, inputGateBias, forgetGateBias, cellBias, outputGateBias, hasProjectionWeights, projectionWeights, hasProjectionBias, projectionBias, hasInputLayerNormWeights, inputLayerNormWeights, hasForgetLayerNormWeights, forgetLayerNormWeights, hasCellLayerNormWeights, cellLayerNormWeights, hasOutputLayerNormWeights, outputLayerNormWeights, activationFunction, clippingThresCell, clippingThresProj); const Model* tfLiteModel = GetModel(modelBuffer.data()); // Create TfLite Interpreters std::unique_ptr armnnDelegateInterpreter; CHECK(InterpreterBuilder(tfLiteModel, ::tflite::ops::builtin::BuiltinOpResolver()) (&armnnDelegateInterpreter) == kTfLiteOk); CHECK(armnnDelegateInterpreter != nullptr); CHECK(armnnDelegateInterpreter->AllocateTensors() == kTfLiteOk); std::unique_ptr tfLiteInterpreter; CHECK(InterpreterBuilder(tfLiteModel, ::tflite::ops::builtin::BuiltinOpResolver()) (&tfLiteInterpreter) == kTfLiteOk); CHECK(tfLiteInterpreter != nullptr); CHECK(tfLiteInterpreter->AllocateTensors() == kTfLiteOk); // Create the ArmNN Delegate armnnDelegate::DelegateOptions delegateOptions(backends); std::unique_ptr theArmnnDelegate(armnnDelegate::TfLiteArmnnDelegateCreate(delegateOptions), armnnDelegate::TfLiteArmnnDelegateDelete); CHECK(theArmnnDelegate != nullptr); // Modify armnnDelegateInterpreter to use armnnDelegate CHECK(armnnDelegateInterpreter->ModifyGraphWithDelegate(theArmnnDelegate.get()) == kTfLiteOk); // Set input data auto tfLiteDelegateInputId = tfLiteInterpreter->inputs()[0]; auto tfLiteDelageInputData = tfLiteInterpreter->typed_tensor(tfLiteDelegateInputId); for (unsigned int i = 0; i < inputValues.size(); ++i) { tfLiteDelageInputData[i] = inputValues[i]; } auto armnnDelegateInputId = armnnDelegateInterpreter->inputs()[0]; auto armnnDelegateInputData = armnnDelegateInterpreter->typed_tensor(armnnDelegateInputId); for (unsigned int i = 0; i < inputValues.size(); ++i) { armnnDelegateInputData[i] = inputValues[i]; } // Run EnqueWorkload CHECK(tfLiteInterpreter->Invoke() == kTfLiteOk); CHECK(armnnDelegateInterpreter->Invoke() == kTfLiteOk); // Compare output data auto tfLiteDelegateOutputId = tfLiteInterpreter->outputs()[0]; auto tfLiteDelagateOutputData = tfLiteInterpreter->typed_tensor(tfLiteDelegateOutputId); auto armnnDelegateOutputId = armnnDelegateInterpreter->outputs()[0]; auto armnnDelegateOutputData = armnnDelegateInterpreter->typed_tensor(armnnDelegateOutputId); armnnDelegate::CompareData(expectedOutputValues.data(), armnnDelegateOutputData, expectedOutputValues.size()); armnnDelegate::CompareData(expectedOutputValues.data(), tfLiteDelagateOutputData, expectedOutputValues.size()); armnnDelegate::CompareData(tfLiteDelagateOutputData, armnnDelegateOutputData, expectedOutputValues.size()); } } // anonymous namespace