// // Copyright © 2020 Arm Ltd and Contributors. All rights reserved. // SPDX-License-Identifier: MIT // #pragma once #include #include #include #include #include #include #include #include namespace { template std::vector CreateFullyConnectedTfLiteModel(tflite::TensorType tensorType, tflite::ActivationFunctionType activationType, const std::vector & inputTensorShape, const std::vector & weightsTensorShape, const std::vector & biasTensorShape, const std::vector & outputTensorShape, const std::vector & weightsData, float quantScale = 1.0f, int quantOffset = 0, float outputQuantScale = 2.0f, int outputQuantOffset = 0) { using namespace tflite; flatbuffers::FlatBufferBuilder flatBufferBuilder; std::array, 3> buffers; buffers[0] = CreateBuffer(flatBufferBuilder, flatBufferBuilder.CreateVector({})); buffers[1] = CreateBuffer(flatBufferBuilder, flatBufferBuilder.CreateVector(reinterpret_cast(weightsData.data()), sizeof(T) * weightsData.size())); auto biasTensorType = ::tflite::TensorType_FLOAT32; if (tensorType == ::tflite::TensorType_UINT8) { biasTensorType = ::tflite::TensorType_INT32; std::vector biasData = { 10 }; buffers[2] = CreateBuffer(flatBufferBuilder, flatBufferBuilder.CreateVector(reinterpret_cast(biasData.data()), sizeof(int32_t) * biasData.size())); } else { std::vector biasData = { 10 }; buffers[2] = CreateBuffer(flatBufferBuilder, flatBufferBuilder.CreateVector(reinterpret_cast(biasData.data()), sizeof(float) * biasData.size())); } auto quantizationParameters = CreateQuantizationParameters(flatBufferBuilder, 0, 0, flatBufferBuilder.CreateVector({ quantScale }), flatBufferBuilder.CreateVector({ quantOffset })); auto outputQuantizationParameters = CreateQuantizationParameters(flatBufferBuilder, 0, 0, flatBufferBuilder.CreateVector({ outputQuantScale }), flatBufferBuilder.CreateVector({ outputQuantOffset })); std::array, 4> tensors; tensors[0] = CreateTensor(flatBufferBuilder, flatBufferBuilder.CreateVector(inputTensorShape.data(), inputTensorShape.size()), tensorType, 0, flatBufferBuilder.CreateString("input_0"), quantizationParameters); tensors[1] = CreateTensor(flatBufferBuilder, flatBufferBuilder.CreateVector(weightsTensorShape.data(), weightsTensorShape.size()), tensorType, 1, flatBufferBuilder.CreateString("weights"), quantizationParameters); tensors[2] = CreateTensor(flatBufferBuilder, flatBufferBuilder.CreateVector(biasTensorShape.data(), biasTensorShape.size()), biasTensorType, 2, flatBufferBuilder.CreateString("bias"), quantizationParameters); tensors[3] = CreateTensor(flatBufferBuilder, flatBufferBuilder.CreateVector(outputTensorShape.data(), outputTensorShape.size()), tensorType, 0, flatBufferBuilder.CreateString("output"), outputQuantizationParameters); // create operator tflite::BuiltinOptions operatorBuiltinOptionsType = BuiltinOptions_FullyConnectedOptions; flatbuffers::Offset operatorBuiltinOptions = CreateFullyConnectedOptions(flatBufferBuilder, activationType, FullyConnectedOptionsWeightsFormat_DEFAULT, false).Union(); const std::vector operatorInputs{ {0, 1, 2} }; const std::vector operatorOutputs{ {3} }; flatbuffers::Offset fullyConnectedOperator = CreateOperator(flatBufferBuilder, 0, flatBufferBuilder.CreateVector(operatorInputs.data(), operatorInputs.size()), flatBufferBuilder.CreateVector(operatorOutputs.data(), operatorOutputs.size()), operatorBuiltinOptionsType, operatorBuiltinOptions); const std::vector subgraphInputs{ {0, 1, 2} }; const std::vector subgraphOutputs{ {3} }; flatbuffers::Offset subgraph = CreateSubGraph(flatBufferBuilder, flatBufferBuilder.CreateVector(tensors.data(), tensors.size()), flatBufferBuilder.CreateVector(subgraphInputs.data(), subgraphInputs.size()), flatBufferBuilder.CreateVector(subgraphOutputs.data(), subgraphOutputs.size()), flatBufferBuilder.CreateVector(&fullyConnectedOperator, 1)); flatbuffers::Offset modelDescription = flatBufferBuilder.CreateString("ArmnnDelegate: FullyConnected Operator Model"); flatbuffers::Offset operatorCode = CreateOperatorCode(flatBufferBuilder, tflite::BuiltinOperator_FULLY_CONNECTED); flatbuffers::Offset flatbufferModel = CreateModel(flatBufferBuilder, TFLITE_SCHEMA_VERSION, flatBufferBuilder.CreateVector(&operatorCode, 1), flatBufferBuilder.CreateVector(&subgraph, 1), modelDescription, flatBufferBuilder.CreateVector(buffers.data(), buffers.size())); flatBufferBuilder.Finish(flatbufferModel); return std::vector(flatBufferBuilder.GetBufferPointer(), flatBufferBuilder.GetBufferPointer() + flatBufferBuilder.GetSize()); } template void FullyConnectedTest(std::vector& backends, tflite::TensorType tensorType, tflite::ActivationFunctionType activationType, const std::vector & inputTensorShape, const std::vector & weightsTensorShape, const std::vector & biasTensorShape, const std::vector & outputTensorShape, const std::vector & inputValues, const std::vector & expectedOutputValues, const std::vector & weightsData, float quantScale = 1.0f, int quantOffset = 0) { using namespace tflite; std::vector modelBuffer = CreateFullyConnectedTfLiteModel(tensorType, activationType, inputTensorShape, weightsTensorShape, biasTensorShape, outputTensorShape, weightsData, quantScale, quantOffset); const Model* tfLiteModel = GetModel(modelBuffer.data()); // Create TfLite Interpreters std::unique_ptr armnnDelegateInterpreter; CHECK(InterpreterBuilder(tfLiteModel, ::tflite::ops::builtin::BuiltinOpResolver()) (&armnnDelegateInterpreter) == kTfLiteOk); CHECK(armnnDelegateInterpreter != nullptr); CHECK(armnnDelegateInterpreter->AllocateTensors() == kTfLiteOk); std::unique_ptr tfLiteInterpreter; CHECK(InterpreterBuilder(tfLiteModel, ::tflite::ops::builtin::BuiltinOpResolver()) (&tfLiteInterpreter) == kTfLiteOk); CHECK(tfLiteInterpreter != nullptr); CHECK(tfLiteInterpreter->AllocateTensors() == kTfLiteOk); // Create the ArmNN Delegate armnnDelegate::DelegateOptions delegateOptions(backends); std::unique_ptr theArmnnDelegate(armnnDelegate::TfLiteArmnnDelegateCreate(delegateOptions), armnnDelegate::TfLiteArmnnDelegateDelete); CHECK(theArmnnDelegate != nullptr); // Modify armnnDelegateInterpreter to use armnnDelegate CHECK(armnnDelegateInterpreter->ModifyGraphWithDelegate(theArmnnDelegate.get()) == kTfLiteOk); // Set input data auto tfLiteDelegateInputId = tfLiteInterpreter->inputs()[0]; auto tfLiteDelageInputData = tfLiteInterpreter->typed_tensor(tfLiteDelegateInputId); for (unsigned int i = 0; i < inputValues.size(); ++i) { tfLiteDelageInputData[i] = inputValues[i]; } auto armnnDelegateInputId = armnnDelegateInterpreter->inputs()[0]; auto armnnDelegateInputData = armnnDelegateInterpreter->typed_tensor(armnnDelegateInputId); for (unsigned int i = 0; i < inputValues.size(); ++i) { armnnDelegateInputData[i] = inputValues[i]; } // Run EnqueWorkload CHECK(tfLiteInterpreter->Invoke() == kTfLiteOk); CHECK(armnnDelegateInterpreter->Invoke() == kTfLiteOk); // Compare output data auto tfLiteDelegateOutputId = tfLiteInterpreter->outputs()[0]; auto tfLiteDelageOutputData = tfLiteInterpreter->typed_tensor(tfLiteDelegateOutputId); auto armnnDelegateOutputId = armnnDelegateInterpreter->outputs()[0]; auto armnnDelegateOutputData = armnnDelegateInterpreter->typed_tensor(armnnDelegateOutputId); for (size_t i = 0; i < expectedOutputValues.size(); i++) { CHECK(expectedOutputValues[i] == tfLiteDelageOutputData[i]); CHECK(expectedOutputValues[i] == armnnDelegateOutputData[i]); CHECK(tfLiteDelageOutputData[i] == armnnDelegateOutputData[i]); } } } // anonymous namespace