From 27a9bd93bd0f02c8ad866ae0fb70f1c4bb67ef0c Mon Sep 17 00:00:00 2001 From: Jim Flynn Date: Thu, 12 Nov 2020 15:48:34 +0000 Subject: This application allow to benchmark tflite models by providing average inference time. Usage: armnn_tfl_benchmark -m -m --model_file <.tflite file path>: .tflite model to be executed -b --backend : preferred backend device to run layers on by default. Possible choices: CpuAcc, CpuRef -l --loops : provide the number of time the inference will be executed (by default nb_loops=1) Signed-off-by: Vincent ABRIOU Signed-off-by: Jim Flynn Change-Id: Ia26fafd4f382f0ad03856436dcae6e71b5abbd26 --- tests/CMakeLists.txt | 4 + .../TfLiteBenchmark-Armnn.cpp | 232 +++++++++++++++++++++ 2 files changed, 236 insertions(+) create mode 100644 tests/TfLiteBenchmark-Armnn/TfLiteBenchmark-Armnn.cpp diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index edea34dfee..135f6497be 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -137,6 +137,10 @@ if (BUILD_TF_LITE_PARSER) addDllCopyCommands(${testName}) endmacro() + set(TfLiteBenchmark-Armnn_sources + TfLiteBenchmark-Armnn/TfLiteBenchmark-Armnn.cpp) + TfLiteParserTest(TfLiteBenchmark-Armnn "${TfLiteBenchmark-Armnn_sources}") + set(TfLiteMobilenetQuantized-Armnn_sources TfLiteMobilenetQuantized-Armnn/TfLiteMobilenetQuantized-Armnn.cpp ImagePreprocessor.hpp diff --git a/tests/TfLiteBenchmark-Armnn/TfLiteBenchmark-Armnn.cpp b/tests/TfLiteBenchmark-Armnn/TfLiteBenchmark-Armnn.cpp new file mode 100644 index 0000000000..a010717412 --- /dev/null +++ b/tests/TfLiteBenchmark-Armnn/TfLiteBenchmark-Armnn.cpp @@ -0,0 +1,232 @@ +// +// Copyright © 2020 STMicroelectronics and Contributors. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +// Application parameters +std::vector preferred_backends_order = {armnn::Compute::CpuAcc, armnn::Compute::CpuRef}; +std::string model_file_str; +std::string preferred_backend_str; +int nb_loops = 1; + +double get_us(struct timeval t) +{ + return (armnn::numeric_cast(t.tv_sec) * + armnn::numeric_cast(1000000) + + armnn::numeric_cast(t.tv_usec)); +} + +double get_ms(struct timeval t) +{ + return (armnn::numeric_cast(t.tv_sec) * + armnn::numeric_cast(1000) + + armnn::numeric_cast(t.tv_usec) / 1000); +} + +static void print_help(char** argv) +{ + std::cout << + "Usage: " << argv[0] << " -m \n" + "\n" + "-m --model_file <.tflite file path>: .tflite model to be executed\n" + "-b --backend : preferred backend device to run layers on by default. Possible choices: " + << armnn::BackendRegistryInstance().GetBackendIdsAsString() << "\n" + "-l --loops : provide the number of time the inference will be executed\n" + " (by default nb_loops=1)\n" + "--help: show this help\n"; + exit(1); +} + +void process_args(int argc, char** argv) +{ + const char* const short_opts = "m:b:l:h"; + const option long_opts[] = { + {"model_file", required_argument, nullptr, 'm'}, + {"backend", required_argument, nullptr, 'b'}, + {"loops", required_argument, nullptr, 'l'}, + {"help", no_argument, nullptr, 'h'}, + {nullptr, no_argument, nullptr, 0} + }; + + while (true) + { + const auto opt = getopt_long(argc, argv, short_opts, long_opts, nullptr); + + if (-1 == opt) + { + break; + } + + switch (opt) + { + case 'm': + model_file_str = std::string(optarg); + std::cout << "model file set to: " << model_file_str << std::endl; + break; + case 'b': + preferred_backend_str = std::string(optarg); + // Overwrite the prefered backend order + if (preferred_backend_str == "CpuAcc") + { + preferred_backends_order = {armnn::Compute::CpuAcc, armnn::Compute::CpuRef}; + } + else if (preferred_backend_str == "CpuRef") + { + preferred_backends_order = {armnn::Compute::CpuRef, armnn::Compute::CpuAcc}; + } + + std::cout << "preferred backend device set to:"; + for (unsigned int i = 0; i < preferred_backends_order.size(); i++) + { + std::cout << " " << preferred_backends_order.at(i); + } + std::cout << std::endl; + break; + case 'l': + nb_loops = std::stoi(optarg); + std::cout << "benchmark will execute " << nb_loops << " inference(s)" << std::endl; + break; + case 'h': // -h or --help + case '?': // Unrecognized option + default: + print_help(argv); + break; + } + } + + if (model_file_str.empty()) + { + print_help(argv); + } +} + +int main(int argc, char* argv[]) +{ + std::vector inferenceTimes; + + // Get options + process_args(argc, argv); + + // Create the runtime + armnn::IRuntime::CreationOptions options; + armnn::IRuntimePtr runtime(armnn::IRuntime::Create(options)); + + // Create Parser + armnnTfLiteParser::ITfLiteParserPtr armnnparser(armnnTfLiteParser::ITfLiteParser::Create()); + + // Create a network + armnn::INetworkPtr network = armnnparser->CreateNetworkFromBinaryFile(model_file_str.c_str()); + if (!network) + { + throw armnn::Exception("Failed to create an ArmNN network"); + } + + // Optimize the network + armnn::IOptimizedNetworkPtr optimizedNet = armnn::Optimize(*network, + preferred_backends_order, + runtime->GetDeviceSpec()); + armnn::NetworkId networkId; + + // Load the network in to the runtime + runtime->LoadNetwork(networkId, std::move(optimizedNet)); + + // Check the number of subgraph + if (armnnparser->GetSubgraphCount() != 1) + { + std::cout << "Model with more than 1 subgraph is not supported by this benchmark application.\n"; + exit(0); + } + size_t subgraphId = 0; + + // Set up the input network + std::cout << "\nModel information:" << std::endl; + std::vector inputBindings; + std::vector inputTensorInfos; + std::vector inputTensorNames = armnnparser->GetSubgraphInputTensorNames(subgraphId); + for (unsigned int i = 0; i < inputTensorNames.size() ; i++) + { + std::cout << "inputTensorNames[" << i << "] = " << inputTensorNames[i] << std::endl; + armnnTfLiteParser::BindingPointInfo inputBinding = armnnparser->GetNetworkInputBindingInfo( + subgraphId, + inputTensorNames[i]); + armnn::TensorInfo inputTensorInfo = runtime->GetInputTensorInfo(networkId, inputBinding.first); + inputBindings.push_back(inputBinding); + inputTensorInfos.push_back(inputTensorInfo); + } + + // Set up the output network + std::vector outputBindings; + std::vector outputTensorInfos; + std::vector outputTensorNames = armnnparser->GetSubgraphOutputTensorNames(subgraphId); + for (unsigned int i = 0; i < outputTensorNames.size() ; i++) + { + std::cout << "outputTensorNames[" << i << "] = " << outputTensorNames[i] << std::endl; + armnnTfLiteParser::BindingPointInfo outputBinding = armnnparser->GetNetworkOutputBindingInfo( + subgraphId, + outputTensorNames[i]); + armnn::TensorInfo outputTensorInfo = runtime->GetOutputTensorInfo(networkId, outputBinding.first); + outputBindings.push_back(outputBinding); + outputTensorInfos.push_back(outputTensorInfo); + } + + // Allocate input tensors + unsigned int nb_inputs = armnn::numeric_cast(inputTensorInfos.size()); + armnn::InputTensors inputTensors; + std::vector> in; + for (unsigned int i = 0 ; i < nb_inputs ; i++) + { + std::vector in_data(inputTensorInfos.at(i).GetNumElements()); + in.push_back(in_data); + inputTensors.push_back({ inputBindings[i].first, armnn::ConstTensor(inputBindings[i].second, in.data()) }); + } + + // Allocate output tensors + unsigned int nb_ouputs = armnn::numeric_cast(outputTensorInfos.size()); + armnn::OutputTensors outputTensors; + std::vector> out; + for (unsigned int i = 0; i < nb_ouputs ; i++) + { + std::vector out_data(outputTensorInfos.at(i).GetNumElements()); + out.push_back(out_data); + outputTensors.push_back({ outputBindings[i].first, armnn::Tensor(outputBindings[i].second, out[i].data()) }); + } + + // Run the inferences + std::cout << "\ninferences are running: " << std::flush; + for (int i = 0 ; i < nb_loops ; i++) + { + struct timeval start_time, stop_time; + gettimeofday(&start_time, nullptr); + + runtime->EnqueueWorkload(networkId, inputTensors, outputTensors); + + gettimeofday(&stop_time, nullptr); + inferenceTimes.push_back((get_us(stop_time) - get_us(start_time))); + std::cout << "# " << std::flush; + } + + auto maxInfTime = *std::max_element(inferenceTimes.begin(), inferenceTimes.end()); + auto minInfTime = *std::min_element(inferenceTimes.begin(), inferenceTimes.end()); + auto avgInfTime = accumulate(inferenceTimes.begin(), inferenceTimes.end(), 0.0) / + armnn::numeric_cast(inferenceTimes.size()); + std::cout << "\n\ninference time: "; + std::cout << "min=" << minInfTime << "us "; + std::cout << "max=" << maxInfTime << "us "; + std::cout << "avg=" << avgInfTime << "us" << std::endl; + + return 0; +} -- cgit v1.2.1