From 615e06f54a4c4139e81e289991ba4084aa2f69d3 Mon Sep 17 00:00:00 2001 From: Finn Williams Date: Mon, 20 Jun 2022 13:48:20 +0100 Subject: IVGCVSW-6650 Refactor ExecuteNetwork * Remove InferenceModel * Add automatic IO type, shape and name configuration * Depreciate various redundant options * Add internal output comparison Signed-off-by: Finn Williams Change-Id: I2eca248bc91e1655a99ed94990efb8059f541fa9 --- tests/ExecuteNetwork/ExecuteNetwork.cpp | 1074 ++----------------------------- 1 file changed, 44 insertions(+), 1030 deletions(-) (limited to 'tests/ExecuteNetwork/ExecuteNetwork.cpp') diff --git a/tests/ExecuteNetwork/ExecuteNetwork.cpp b/tests/ExecuteNetwork/ExecuteNetwork.cpp index 153fe5bcc7..73cbbb8162 100644 --- a/tests/ExecuteNetwork/ExecuteNetwork.cpp +++ b/tests/ExecuteNetwork/ExecuteNetwork.cpp @@ -1,993 +1,32 @@ // -// Copyright © 2017 Arm Ltd and Contributors. All rights reserved. +// Copyright © 2022 Arm Ltd and Contributors. All rights reserved. // SPDX-License-Identifier: MIT // -#include "NetworkExecutionUtils/NetworkExecutionUtils.hpp" #include "ExecuteNetworkProgramOptions.hpp" -#include -#include - -#include -#include -#include -#include -#include - -#if defined(ARMNN_SERIALIZER) -#include "armnnDeserializer/IDeserializer.hpp" -#endif -#if defined(ARMNN_TF_LITE_PARSER) -#include "armnnTfLiteParser/ITfLiteParser.hpp" -#endif -#if defined(ARMNN_ONNX_PARSER) -#include "armnnOnnxParser/IOnnxParser.hpp" -#endif -#if defined(ARMNN_TFLITE_DELEGATE) -#include -#include - -#include -#include -#include -#include -#include -#include -#include +#include "ArmNNExecutor.hpp" +#if defined(ARMNN_TF_LITE_DELEGATE) +#include "TfliteExecutor.hpp" #endif +#include -#include - -/** - * Given a measured duration and a threshold time tell the user whether we succeeded or not. - * - * @param duration the measured inference duration. - * @param thresholdTime the threshold time in milliseconds. - * @return false if the measured time exceeded the threshold. - */ -bool CheckInferenceTimeThreshold(const std::chrono::duration& duration, - const double& thresholdTime) -{ - ARMNN_LOG(info) << "Inference time: " << std::setprecision(2) - << std::fixed << duration.count() << " ms\n"; - // If thresholdTime == 0.0 (default), then it hasn't been supplied at command line - if (thresholdTime != 0.0) - { - ARMNN_LOG(info) << "Threshold time: " << std::setprecision(2) - << std::fixed << thresholdTime << " ms"; - auto thresholdMinusInference = thresholdTime - duration.count(); - ARMNN_LOG(info) << "Threshold time - Inference time: " << std::setprecision(2) - << std::fixed << thresholdMinusInference << " ms" << "\n"; - if (thresholdMinusInference < 0) - { - std::string errorMessage = "Elapsed inference time is greater than provided threshold time."; - ARMNN_LOG(fatal) << errorMessage; - return false; - } - } - return true; -} -#if defined(ARMNN_TFLITE_DELEGATE) -int TfLiteDelegateMainImpl(const ExecuteNetworkParams& params, const armnn::IRuntime::CreationOptions runtimeOptions) +std::unique_ptr BuildExecutor(ProgramOptions& programOptions) { - // Build model and corresponding interpreter - using namespace tflite; - - std::unique_ptr model = tflite::FlatBufferModel::BuildFromFile(params.m_ModelPath.c_str()); - - auto tfLiteInterpreter = std::make_unique(); - tflite::ops::builtin::BuiltinOpResolver resolver; - - tflite::InterpreterBuilder builder(*model, resolver); - builder(&tfLiteInterpreter); - tfLiteInterpreter->AllocateTensors(); - - int status = 0; - - // Create & populate Armnn Delegate, then register it to TfLiteInterpreter - if (params.m_TfLiteExecutor == ExecuteNetworkParams::TfLiteExecutor::ArmNNTfLiteDelegate) - { - // Create the Armnn Delegate - // Populate a DelegateOptions from the ExecuteNetworkParams. - armnnDelegate::DelegateOptions delegateOptions = params.ToDelegateOptions(); - delegateOptions.SetExternalProfilingParams( - arm::pipe::ConvertExternalProfilingOptions(runtimeOptions.m_ProfilingOptions)); - - std::unique_ptr - theArmnnDelegate(armnnDelegate::TfLiteArmnnDelegateCreate(delegateOptions), - armnnDelegate::TfLiteArmnnDelegateDelete); - // Register armnn_delegate to TfLiteInterpreter - status = tfLiteInterpreter->ModifyGraphWithDelegate(std::move(theArmnnDelegate)); - if (status != kTfLiteOk) - { - ARMNN_LOG(fatal) << "Could not register ArmNN TfLite Delegate to TfLiteInterpreter!"; - return EXIT_FAILURE; - } - } - else - { - std::cout << "Running on TfLite without ArmNN delegate\n"; - } - - const size_t numInputs = params.m_InputNames.size(); - // Populate input tensor of interpreter - for(unsigned int inputIndex = 0; inputIndex < numInputs; ++inputIndex) + if (programOptions.m_ExNetParams.m_TfLiteExecutor == ExecuteNetworkParams::TfLiteExecutor::ArmNNTfLiteDelegate || + programOptions.m_ExNetParams.m_TfLiteExecutor == ExecuteNetworkParams::TfLiteExecutor::TfliteInterpreter) { - // Load (or generate) input data for inference - armnn::Optional dataFile = params.m_GenerateTensorData ? armnn::EmptyOptional() : - armnn::MakeOptional(params.m_InputTensorDataFilePaths[inputIndex]); - - int input = tfLiteInterpreter->inputs()[inputIndex]; - TfLiteIntArray* inputDims = tfLiteInterpreter->tensor(input)->dims; - - unsigned int inputSize = 1; - if (params.m_InputTensorShapes.size() > 0) - { - inputSize = params.m_InputTensorShapes[inputIndex]->GetNumElements(); - } - else - { - for (unsigned int dim = 0; dim < static_cast(inputDims->size); ++dim) - { - inputSize *= inputDims->data[dim]; - } - } - - if (params.m_InputTypes[inputIndex].compare("float") == 0) - { - auto inputData = tfLiteInterpreter->typed_tensor(input); - - if(inputData == NULL) - { - ARMNN_LOG(fatal) << "Input tensor is null, input type: " - "\"" << params.m_InputTypes[inputIndex] << "\" may be incorrect."; - return EXIT_FAILURE; - } - - std::vector tensorData; - PopulateTensorWithDataGeneric(tensorData, - inputSize, - dataFile, - [](const std::string& s) - { return std::stof(s); }); - - std::copy(tensorData.begin(), tensorData.end(), inputData); - } - else if (params.m_InputTypes[inputIndex].compare("qsymms8") == 0 || - params.m_InputTypes[inputIndex].compare("qasymms8") == 0) - { - auto inputData = tfLiteInterpreter->typed_tensor(input); - - if(inputData == NULL) - { - ARMNN_LOG(fatal) << "Input tensor is null, input type: " - "\"" << params.m_InputTypes[inputIndex] << "\" may be incorrect."; - return EXIT_FAILURE; - } - - std::vector tensorData; - PopulateTensorWithDataGeneric(tensorData, - inputSize, - dataFile, - [](const std::string& s) - { return armnn::numeric_cast(std::stoi(s)); }); - - std::copy(tensorData.begin(), tensorData.end(), inputData); - } - else if (params.m_InputTypes[inputIndex].compare("int") == 0) - { - auto inputData = tfLiteInterpreter->typed_tensor(input); - - if(inputData == NULL) - { - ARMNN_LOG(fatal) << "Input tensor is null, input type: " - "\"" << params.m_InputTypes[inputIndex] << "\" may be incorrect."; - return EXIT_FAILURE; - } - - std::vector tensorData; - PopulateTensorWithDataGeneric(tensorData, - inputSize, - dataFile, - [](const std::string& s) - { return std::stoi(s); }); - - std::copy(tensorData.begin(), tensorData.end(), inputData); - } - else if (params.m_InputTypes[inputIndex].compare("qasymm8") == 0 || - params.m_InputTypes[inputIndex].compare("qasymmu8") == 0) - { - auto inputData = tfLiteInterpreter->typed_tensor(input); - - if(inputData == NULL) - { - ARMNN_LOG(fatal) << "Input tensor is null, input type: " - "\"" << params.m_InputTypes[inputIndex] << "\" may be incorrect."; - return EXIT_FAILURE; - } - - std::vector tensorData; - PopulateTensorWithDataGeneric(tensorData, - inputSize, - dataFile, - [](const std::string& s) - { return armnn::numeric_cast(std::stoi(s)); }); - - std::copy(tensorData.begin(), tensorData.end(), inputData); - } - else - { - ARMNN_LOG(fatal) << "Unsupported input tensor data type \"" << params.m_InputTypes[inputIndex] << "\". "; - return EXIT_FAILURE; - } - } - - // Run inference, print the output of the inference - for (size_t x = 0; x < params.m_Iterations; x++) - { - // Start timer to record inference time in milliseconds. - const auto start_time = armnn::GetTimeNow(); - // Run the inference - status = tfLiteInterpreter->Invoke(); - const auto duration = armnn::GetTimeDuration(start_time); - - // The TFLite interpreter's outputs might be in a different order than the user inputted output names. - std::map paramToTfliteOutputIndex; - for (unsigned int paramIndex = 0; paramIndex < params.m_OutputNames.size(); ++paramIndex) - { - paramToTfliteOutputIndex[paramIndex] = -1; - for (unsigned int tfLiteIndex = 0; tfLiteIndex < tfLiteInterpreter->outputs().size(); ++tfLiteIndex) - { - if (params.m_OutputNames[paramIndex] == tfLiteInterpreter->GetOutputName(tfLiteIndex)) - { - paramToTfliteOutputIndex[paramIndex] = tfLiteIndex; - } - } - } - - // Print out the output - for (unsigned int paramOutputIndex = 0; paramOutputIndex < params.m_OutputNames.size(); ++paramOutputIndex) - { - int outputIndex = paramToTfliteOutputIndex[paramOutputIndex]; - if (outputIndex == -1) - { - std::cout << fmt::format("Output name: {} doesn't exist.", params.m_OutputNames[paramOutputIndex]) << - std::endl; - continue; - } - auto tfLiteDelegateOutputId = tfLiteInterpreter->outputs()[outputIndex]; - TfLiteIntArray* outputDims = tfLiteInterpreter->tensor(tfLiteDelegateOutputId)->dims; - // If we've been asked to write to a file then set a file output stream. Otherwise use stdout. - FILE* outputTensorFile = stdout; - if (!params.m_OutputTensorFiles.empty()) - { - outputTensorFile = fopen(params.m_OutputTensorFiles[outputIndex].c_str(), "w"); - if (outputTensorFile == NULL) - { - ARMNN_LOG(fatal) << "Specified output tensor file, \"" << - params.m_OutputTensorFiles[outputIndex] << - "\", cannot be created. Defaulting to stdout. " << - "Error was: " << std::strerror(errno); - outputTensorFile = stdout; - } - else - { - ARMNN_LOG(info) << "Writing output " << outputIndex << "' of iteration: " << x+1 << " to file: '" - << params.m_OutputTensorFiles[outputIndex] << "'"; - } - } - long outputSize = 1; - for (unsigned int dim = 0; dim < static_cast(outputDims->size); ++dim) - { - outputSize *= outputDims->data[dim]; - } - - std::cout << tfLiteInterpreter->GetOutputName(outputIndex) << ": "; - if (params.m_OutputTypes[paramOutputIndex].compare("float") == 0) - { - auto tfLiteDelageOutputData = tfLiteInterpreter->typed_tensor(tfLiteDelegateOutputId); - if(tfLiteDelageOutputData == NULL) - { - ARMNN_LOG(fatal) << "Output tensor is null, output type: " - "\"" << params.m_OutputTypes[paramOutputIndex] << "\" may be incorrect."; - return EXIT_FAILURE; - } - - if (!params.m_DontPrintOutputs) - { - for (int i = 0; i < outputSize; ++i) - { - fprintf(outputTensorFile, "%f ", tfLiteDelageOutputData[i]); - } - } - } - else if (params.m_OutputTypes[paramOutputIndex].compare("int") == 0) - { - auto tfLiteDelageOutputData = tfLiteInterpreter->typed_tensor(tfLiteDelegateOutputId); - if(tfLiteDelageOutputData == NULL) - { - ARMNN_LOG(fatal) << "Output tensor is null, output type: " - "\"" << params.m_OutputTypes[paramOutputIndex] << "\" may be incorrect."; - return EXIT_FAILURE; - } - - if (!params.m_DontPrintOutputs) - { - for (int i = 0; i < outputSize; ++i) - { - fprintf(outputTensorFile, "%d ", tfLiteDelageOutputData[i]); - } - } - } - else if (params.m_OutputTypes[paramOutputIndex].compare("qsymms8") == 0 || - params.m_OutputTypes[paramOutputIndex].compare("qasymms8") == 0) - { - auto tfLiteDelageOutputData = tfLiteInterpreter->typed_tensor(tfLiteDelegateOutputId); - if(tfLiteDelageOutputData == NULL) - { - ARMNN_LOG(fatal) << "Output tensor is null, output type: " - "\"" << params.m_OutputTypes[paramOutputIndex] << "\" may be incorrect."; - return EXIT_FAILURE; - } - - if (!params.m_DontPrintOutputs) - { - for (int i = 0; i < outputSize; ++i) - { - fprintf(outputTensorFile, "%d ", tfLiteDelageOutputData[i]); - } - } - } - else if (params.m_OutputTypes[paramOutputIndex].compare("qasymm8") == 0 || - params.m_OutputTypes[paramOutputIndex].compare("qasymmu8") == 0) - { - auto tfLiteDelageOutputData = tfLiteInterpreter->typed_tensor(tfLiteDelegateOutputId); - if(tfLiteDelageOutputData == NULL) - { - ARMNN_LOG(fatal) << "Output tensor is null, output type: " - "\"" << params.m_OutputTypes[paramOutputIndex] << "\" may be incorrect."; - return EXIT_FAILURE; - } - - if (!params.m_DontPrintOutputs) - { - for (int i = 0; i < outputSize; ++i) - { - fprintf(outputTensorFile, "%u ", tfLiteDelageOutputData[i]); - } - } - } - else - { - ARMNN_LOG(fatal) << "Output tensor is null, output type: " - "\"" << params.m_OutputTypes[paramOutputIndex] << - "\" may be incorrect. Output type can be specified with -z argument"; - return EXIT_FAILURE; - } - std::cout << std::endl; - } - CheckInferenceTimeThreshold(duration, params.m_ThresholdTime); - } - - return status; -} +#if defined(ARMNN_TF_LITE_DELEGATE) + return std::make_unique(programOptions.m_ExNetParams); +#else + ARMNN_LOG(fatal) << "Not built with Arm NN Tensorflow-Lite delegate support."; + return nullptr; #endif -template -int MainImpl(const ExecuteNetworkParams& params, - const std::shared_ptr& runtime = nullptr) -{ - using namespace std::chrono; - - std::vector> inputs; - std::vector> outputs; - - try - { - // Creates an InferenceModel, which will parse the model and load it into an IRuntime. - typename InferenceModel::Params inferenceModelParams; - inferenceModelParams.m_ModelPath = params.m_ModelPath; - inferenceModelParams.m_AllowExpandedDims = params.m_AllowExpandedDims; - inferenceModelParams.m_IsModelBinary = params.m_IsModelBinary; - inferenceModelParams.m_ComputeDevices = params.m_ComputeDevices; - inferenceModelParams.m_DynamicBackendsPath = params.m_DynamicBackendsPath; - inferenceModelParams.m_PrintIntermediateLayers = params.m_PrintIntermediate; - inferenceModelParams.m_VisualizePostOptimizationModel = params.m_EnableLayerDetails; - inferenceModelParams.m_ParseUnsupported = params.m_ParseUnsupported; - inferenceModelParams.m_InferOutputShape = params.m_InferOutputShape; - inferenceModelParams.m_EnableFastMath = params.m_EnableFastMath; - inferenceModelParams.m_SaveCachedNetwork = params.m_SaveCachedNetwork; - inferenceModelParams.m_CachedNetworkFilePath = params.m_CachedNetworkFilePath; - inferenceModelParams.m_NumberOfThreads = params.m_NumberOfThreads; - inferenceModelParams.m_MLGOTuningFilePath = params.m_MLGOTuningFilePath; - inferenceModelParams.m_AsyncEnabled = params.m_Concurrent; - inferenceModelParams.m_ThreadPoolSize = params.m_ThreadPoolSize; - inferenceModelParams.m_OutputDetailsToStdOut = params.m_OutputDetailsToStdOut; - inferenceModelParams.m_OutputDetailsOnlyToStdOut = params.m_OutputDetailsOnlyToStdOut; - inferenceModelParams.m_ImportInputsIfAligned = params.m_ImportInputsIfAligned; - - for(const std::string& inputName: params.m_InputNames) - { - inferenceModelParams.m_InputBindings.push_back(inputName); - } - - for(unsigned int i = 0; i < params.m_InputTensorShapes.size(); ++i) - { - inferenceModelParams.m_InputShapes.push_back(*params.m_InputTensorShapes[i]); - } - - for(const std::string& outputName: params.m_OutputNames) - { - inferenceModelParams.m_OutputBindings.push_back(outputName); - } - - inferenceModelParams.m_SubgraphId = params.m_SubgraphId; - inferenceModelParams.m_EnableFp16TurboMode = params.m_EnableFp16TurboMode; - inferenceModelParams.m_EnableBf16TurboMode = params.m_EnableBf16TurboMode; - - InferenceModel model(inferenceModelParams, - params.m_EnableProfiling, - params.m_DynamicBackendsPath, - runtime); - - const size_t numInputs = inferenceModelParams.m_InputBindings.size(); - - armnn::Optional qParams = params.m_QuantizeInput ? - armnn::MakeOptional( - model.GetInputQuantizationParams()) : - armnn::EmptyOptional(); - - if (params.m_InputTensorDataFilePaths.size() > numInputs) - { - ARMNN_LOG(info) << "Given network has " << numInputs << " input/s. One input-tensor-data file is required " - << "for each input. The user provided " - << params.m_InputTensorDataFilePaths.size() - << " input-tensor-data file/s which will be used to fill the input/s.\n"; - } - - const size_t numOutputs = inferenceModelParams.m_OutputBindings.size(); - - // The user is allowed to specify the data type of each output tensor. It is used here to construct the - // result tensors for each iteration. It is possible for the user to specify a type that does not match - // the data type of the corresponding model output. It may not make sense, but it is historically allowed. - // The potential problem here is a buffer overrun when a larger data type is written into the space for a - // smaller one. Issue a warning to highlight the potential problem. - for (unsigned int outputIdx = 0; outputIdx < model.GetOutputBindingInfos().size(); ++outputIdx) - { - armnn::DataType type = model.GetOutputBindingInfo(outputIdx).second.GetDataType(); - switch (type) - { - // --output-type only supports float, int, qasymms8 or qasymmu8. - case armnn::DataType::Float32: - if (params.m_OutputTypes[outputIdx].compare("float") != 0) - { - ARMNN_LOG(warning) << "Model output index: " << outputIdx << " has data type Float32. The " - << "corresponding --output-type is " << params.m_OutputTypes[outputIdx] << - ". This may cause unexpected problems or random failures."; - } - break; - case armnn::DataType::QAsymmU8: - if (params.m_OutputTypes[outputIdx].compare("qasymmu8") != 0) - { - ARMNN_LOG(warning) << "Model output index: " << outputIdx << " has data type QAsymmU8. The " - << "corresponding --output-type is " << params.m_OutputTypes[outputIdx] << - ". This may cause unexpected problems or random failures."; - } - break; - case armnn::DataType::Signed32: - if (params.m_OutputTypes[outputIdx].compare("int") != 0) - { - ARMNN_LOG(warning) << "Model output index: " << outputIdx << " has data type Signed32. The " - << "corresponding --output-type is " << params.m_OutputTypes[outputIdx] << - ". This may cause unexpected problems or random failures."; - } - break; - case armnn::DataType::QAsymmS8: - if (params.m_OutputTypes[outputIdx].compare("qasymms8") != 0) - { - ARMNN_LOG(warning) << "Model output index: " << outputIdx << " has data type QAsymmS8. The " - << "corresponding --output-type is " << params.m_OutputTypes[outputIdx] << - ". This may cause unexpected problems or random failures."; - } - break; - default: - break; - } - } - - if (!params.m_ReuseBuffers) - { - for (unsigned int j = 0; j < params.m_Iterations; ++j) - { - std::vector inputDataContainers; - for (unsigned int i = 0; i < numInputs; ++i) - { - // If there are fewer input files given than required for the execution of - // params.m_Iterations we simply start with the first input file again - size_t inputFileIndex = j * numInputs + i; - if (!params.m_InputTensorDataFilePaths.empty()) - { - inputFileIndex = inputFileIndex % params.m_InputTensorDataFilePaths.size(); - } - - armnn::Optional dataFile = params.m_GenerateTensorData ? - armnn::EmptyOptional() : - armnn::MakeOptional( - params.m_InputTensorDataFilePaths.at( - inputFileIndex)); - - unsigned int numElements = model.GetInputSize(i); - if (params.m_InputTensorShapes.size() > i && params.m_InputTensorShapes[i]) - { - // If the user has provided a tensor shape for the current input, - // override numElements - numElements = params.m_InputTensorShapes[i]->GetNumElements(); - } - - armnnUtils::TContainer tensorData; - PopulateTensorWithData(tensorData, - numElements, - params.m_InputTypes[i], - qParams, - dataFile); - - inputDataContainers.push_back(tensorData); - } - inputs.push_back(inputDataContainers); - } - - for (unsigned int j = 0; j < params.m_Iterations; ++j) - { - std::vector outputDataContainers; - for (unsigned int i = 0; i < numOutputs; ++i) - { - if (params.m_OutputTypes[i].compare("float") == 0) - { - outputDataContainers.push_back(std::vector(model.GetOutputSize(i))); - } - else if (params.m_OutputTypes[i].compare("int") == 0) - { - outputDataContainers.push_back(std::vector(model.GetOutputSize(i))); - } - else if (params.m_OutputTypes[i].compare("qasymm8") == 0 || - params.m_OutputTypes[i].compare("qasymmu8") == 0) - { - outputDataContainers.push_back(std::vector(model.GetOutputSize(i))); - } - else if (params.m_OutputTypes[i].compare("qasymms8") == 0) - { - outputDataContainers.push_back(std::vector(model.GetOutputSize(i))); - } - else - { - ARMNN_LOG(fatal) << "Unsupported tensor data type \"" << params.m_OutputTypes[i] << "\". "; - return EXIT_FAILURE; - } - } - outputs.push_back(outputDataContainers); - } - } - if (params.m_Iterations > 1) - { - std::stringstream msg; - msg << "Network will be executed " << params.m_Iterations; - if (params.m_Concurrent) - { - msg << " times in an asynchronous manner. "; - } - else - { - msg << " times successively. "; - } - msg << "The input-tensor-data files will be reused recursively if the user didn't provide enough to " - "cover each execution."; - ARMNN_LOG(info) << msg.str(); - } - - // Synchronous execution - if (!params.m_Concurrent && !params.m_ReuseBuffers) - { - for (size_t x = 0; x < params.m_Iterations; x++) - { - // model.Run returns the inference time elapsed in EnqueueWorkload (in milliseconds) - auto inference_duration = model.Run(inputs[x], outputs[x]); - - if (params.m_GenerateTensorData) - { - ARMNN_LOG(warning) << "The input data was generated, note that the output will not be useful"; - } - if (params.m_DontPrintOutputs) - { - ARMNN_LOG(info) << "Printing outputs to console is disabled."; - } - - // Print output tensors - const auto& infosOut = model.GetOutputBindingInfos(); - for (size_t i = 0; i < numOutputs; i++) - { - const armnn::TensorInfo& infoOut = infosOut[i].second; - - // We've made sure before that the number of output files either equals numOutputs, in which - // case we override those files when processing the results of each iteration (only the result - // of the last iteration will be stored), or there are enough - // output files for each output of each iteration. - size_t outputFileIndex = x * numOutputs + i; - if (!params.m_OutputTensorFiles.empty()) - { - outputFileIndex = outputFileIndex % params.m_OutputTensorFiles.size(); - ARMNN_LOG(info) << "Writing output " << i << " named: '" - << inferenceModelParams.m_OutputBindings[i] - << "' of iteration: " << x+1 << " to file: '" - << params.m_OutputTensorFiles[outputFileIndex] << "'"; - } - auto outputTensorFile = params.m_OutputTensorFiles.empty() - ? "" - : params.m_OutputTensorFiles[outputFileIndex]; - - TensorPrinter printer(inferenceModelParams.m_OutputBindings[i], - infoOut, - outputTensorFile, - params.m_DequantizeOutput, - !params.m_DontPrintOutputs); - mapbox::util::apply_visitor(printer, outputs[x][i]); - } - - ARMNN_LOG(info) << "\nInference time: " << std::setprecision(2) - << std::fixed << inference_duration.count() << " ms\n"; - - // If thresholdTime == 0.0 (default), then it hasn't been supplied at command line - if (params.m_ThresholdTime != 0.0) - { - ARMNN_LOG(info) << "Threshold time: " << std::setprecision(2) - << std::fixed << params.m_ThresholdTime << " ms"; - auto thresholdMinusInference = params.m_ThresholdTime - inference_duration.count(); - ARMNN_LOG(info) << "Threshold time - Inference time: " << std::setprecision(2) - << std::fixed << thresholdMinusInference << " ms" << "\n"; - - if (thresholdMinusInference < 0) - { - std::string errorMessage = "Elapsed inference time is greater than provided threshold time."; - ARMNN_LOG(fatal) << errorMessage; - } - } - } - } - // Synchronous Execution using a single buffer for input and output data - else if(!params.m_Concurrent) - { - std::vector input; - std::vector output; - - for (unsigned int i = 0; i < numInputs; ++i) - { - // If there are fewer input files given than required for the execution of - // params.m_Iterations we simply start with the first input file again - size_t inputFileIndex = numInputs + i; - if (!params.m_InputTensorDataFilePaths.empty()) - { - inputFileIndex = inputFileIndex % params.m_InputTensorDataFilePaths.size(); - } - - armnn::Optional dataFile = params.m_GenerateTensorData ? - armnn::EmptyOptional() : - armnn::MakeOptional( - params.m_InputTensorDataFilePaths.at( - inputFileIndex)); - - unsigned int numElements = model.GetInputSize(i); - if (params.m_InputTensorShapes.size() > i && params.m_InputTensorShapes[i]) - { - // If the user has provided a tensor shape for the current input, - // override numElements - numElements = params.m_InputTensorShapes[i]->GetNumElements(); - } - - armnnUtils::TContainer tensorData; - PopulateTensorWithData(tensorData, - numElements, - params.m_InputTypes[i], - qParams, - dataFile); - - input.push_back(tensorData); - } - - for (unsigned int i = 0; i < numOutputs; ++i) - { - if (params.m_OutputTypes[i].compare("float") == 0) - { - output.push_back(std::vector(model.GetOutputSize(i))); - } else if (params.m_OutputTypes[i].compare("int") == 0) { - output.push_back(std::vector(model.GetOutputSize(i))); - } else if (params.m_OutputTypes[i].compare("qasymm8") == 0 || - params.m_OutputTypes[i].compare("qasymmu8") == 0) - { - output.push_back(std::vector(model.GetOutputSize(i))); - } else if (params.m_OutputTypes[i].compare("qasymms8") == 0) - { - output.push_back(std::vector(model.GetOutputSize(i))); - } else { - ARMNN_LOG(fatal) << "Unsupported tensor data type \"" << params.m_OutputTypes[i] << "\". "; - return EXIT_FAILURE; - } - } - - std::vector> timings; - timings.reserve(params.m_Iterations); - for (size_t x = 0; x < params.m_Iterations; x++) - { - // model.Run returns the inference time elapsed in EnqueueWorkload (in milliseconds) - auto inference_duration = model.Run(input, output); - timings.push_back(inference_duration); - } - - if (params.m_GenerateTensorData) - { - ARMNN_LOG(warning) << "The input data was generated, note that the output will not be useful"; - } - if (params.m_DontPrintOutputs) - { - ARMNN_LOG(info) << "Printing outputs to console is disabled."; - } - - // Print output. This only needs to happen once as input is the same for each iteration. - const auto &infosOut = model.GetOutputBindingInfos(); - for (size_t i = 0; i < numOutputs; i++) - { - const armnn::TensorInfo &infoOut = infosOut[i].second; - - // We've made sure before that the number of output files either equals numOutputs, in which - // case we override those files when processing the results of each iteration (only the result - // of the last iteration will be stored), or there are enough - // output files for each output of each iteration. - size_t outputFileIndex = numOutputs + i; - if (!params.m_OutputTensorFiles.empty()) - { - outputFileIndex = outputFileIndex % params.m_OutputTensorFiles.size(); - ARMNN_LOG(info) << "Writing output " << i << " named: '" - << inferenceModelParams.m_OutputBindings[i] <<" to file: '" - << params.m_OutputTensorFiles[outputFileIndex] << "'"; - } - auto outputTensorFile = params.m_OutputTensorFiles.empty() - ? "" - : params.m_OutputTensorFiles[outputFileIndex]; - - TensorPrinter printer(inferenceModelParams.m_OutputBindings[i], - infoOut, - outputTensorFile, - params.m_DequantizeOutput, - !params.m_DontPrintOutputs); - mapbox::util::apply_visitor(printer, output[i]); - } - - for(auto inference: timings) - { - - ARMNN_LOG(info) << "\nInference time: " << std::setprecision(2) - << std::fixed << inference.count() << " ms\n"; - - // If thresholdTime == 0.0 (default), then it hasn't been supplied at command line - if (params.m_ThresholdTime != 0.0) - { - ARMNN_LOG(info) << "Threshold time: " << std::setprecision(2) - << std::fixed << params.m_ThresholdTime << " ms"; - auto thresholdMinusInference = params.m_ThresholdTime - inference.count(); - ARMNN_LOG(info) << "Threshold time - Inference time: " << std::setprecision(2) - << std::fixed << thresholdMinusInference << " ms" << "\n"; - - if (thresholdMinusInference < 0) - { - std::string errorMessage = "Elapsed inference time is greater than provided threshold time."; - ARMNN_LOG(fatal) << errorMessage; - } - } - } - } - - // Asynchronous execution using the Arm NN thread pool - else if (params.m_ThreadPoolSize >= 1) - { - try - { - ARMNN_LOG(info) << "Asynchronous execution with Arm NN thread pool... \n"; - armnn::AsyncCallbackManager callbackManager; - std::unordered_map&> inferenceOutputMap; - - // Declare the latest and earliest inference times here to be used when calculating overall time - std::chrono::high_resolution_clock::time_point earliestStartTime; - std::chrono::high_resolution_clock::time_point latestEndTime = - std::chrono::high_resolution_clock::now(); - - // For the asynchronous execution, we are adding a pool of working memory handles (1 per thread) in the - // LoadedNetwork with each scheduled inference having a specific priority - for (size_t i = 0; i < params.m_Iterations; ++i) - { - std::shared_ptr cb = callbackManager.GetNewCallback(); - inferenceOutputMap.insert({cb->GetInferenceId(), outputs[i]}); - model.RunAsync(inputs[i], outputs[i], cb); - } - - // Check the results - unsigned int j = 0; - for (size_t iteration = 0; iteration < params.m_Iterations; ++iteration) - { - auto cb = callbackManager.GetNotifiedCallback(); - - // Get the results - auto endTime = time_point_cast(cb->GetEndTime()); - auto startTime = time_point_cast(cb->GetStartTime()); - auto inferenceDuration = endTime - startTime; - - if (latestEndTime < cb->GetEndTime()) - { - latestEndTime = cb->GetEndTime(); - } - - if (earliestStartTime.time_since_epoch().count() == 0) - { - earliestStartTime = cb->GetStartTime(); - } - else if (earliestStartTime > cb->GetStartTime()) - { - earliestStartTime = cb->GetStartTime(); - } - - if (params.m_GenerateTensorData) - { - ARMNN_LOG(warning) << "The input data was generated, note that the output will not be useful"; - } - if (params.m_DontPrintOutputs) - { - ARMNN_LOG(info) << "Printing outputs to console is disabled."; - } - - // Print output tensors - const auto& infosOut = model.GetOutputBindingInfos(); - for (size_t i = 0; i < numOutputs; i++) - { - // We've made sure before that the number of output files either equals numOutputs, in which - // case we override those files when processing the results of each iteration (only the - // result of the last iteration will be stored), or there are enough - // output files for each output of each iteration. - size_t outputFileIndex = iteration * numOutputs + i; - if (!params.m_OutputTensorFiles.empty()) - { - outputFileIndex = outputFileIndex % params.m_OutputTensorFiles.size(); - ARMNN_LOG(info) << "Writing output " << i << " named: '" - << inferenceModelParams.m_OutputBindings[i] - << "' of iteration: " << iteration+1 << " to file: '" - << params.m_OutputTensorFiles[outputFileIndex] << "'"; - } - - const armnn::TensorInfo& infoOut = infosOut[i].second; - auto outputTensorFile = params.m_OutputTensorFiles.empty() - ? "" - : params.m_OutputTensorFiles[outputFileIndex]; - - TensorPrinter printer(inferenceModelParams.m_OutputBindings[i], - infoOut, - outputTensorFile, - params.m_DequantizeOutput, - !params.m_DontPrintOutputs); - mapbox::util::apply_visitor(printer, inferenceOutputMap.at(cb->GetInferenceId())[i]); - } - - CheckInferenceTimeThreshold(inferenceDuration, params.m_ThresholdTime); - ++j; - } - //print duration difference between overallStartTime and overallEndTime - auto overallEndTime = time_point_cast(latestEndTime); - auto overallStartTime = time_point_cast(earliestStartTime); - auto totalInferenceDuration = overallEndTime - overallStartTime; - ARMNN_LOG(info) << "\nOverall Inference time: " << std::setprecision(2) - << std::fixed << totalInferenceDuration.count() << " ms\n"; - } - catch (const armnn::Exception& e) - { - ARMNN_LOG(fatal) << "Armnn Error: " << e.what(); - return EXIT_FAILURE; - } - } - // Asynchronous execution using std::launch::async - else - { - try - { - ARMNN_LOG(info) << "Asynchronous Execution with std::launch:async... \n"; - std::vector>>> inferenceResults; - inferenceResults.reserve(params.m_Iterations); - - // Create WorkingMemHandles for each inference - std::vector> workingMemHandles; - workingMemHandles.reserve(params.m_Iterations); - for (unsigned int i = 0; i < params.m_Iterations; ++i) - { - workingMemHandles.push_back(model.CreateWorkingMemHandle()); - } - - // Run each inference in its own thread - // start a timer - const auto start_time = armnn::GetTimeNow(); - for (unsigned int i = 0; i < params.m_Iterations; ++i) - { - armnn::experimental::IWorkingMemHandle& workingMemHandleRef = *workingMemHandles[i].get(); - - inferenceResults.push_back(std::async( - std::launch::async, [&model, &workingMemHandleRef, &inputs, &outputs, i]() { - return model.RunAsync(workingMemHandleRef, inputs[i], outputs[i], i); - } - )); - } - - // Check the results - for (unsigned int j = 0; j < inferenceResults.size(); ++j) - { - // Get the results - auto inferenceResult = inferenceResults[j].get(); - auto inferenceDuration = std::get<1>(inferenceResult); - auto inferenceID = std::get<0>(inferenceResult); - - if (params.m_GenerateTensorData) - { - ARMNN_LOG(warning) << "The input data was generated, note that the output will not be useful"; - } - if (params.m_DontPrintOutputs) - { - ARMNN_LOG(info) << "Printing outputs to console is disabled."; - } - - // Print output tensors - const auto& infosOut = model.GetOutputBindingInfos(); - for (size_t i = 0; i < numOutputs; i++) - { - // We've made sure before that the number of output files either equals numOutputs, in which - // case we override those files when processing the results of each iteration (only the - // result of the last iteration will be stored), or there are enough - // output files for each output of each iteration. - size_t outputFileIndex = j * numOutputs + i; - if (!params.m_OutputTensorFiles.empty()) - { - outputFileIndex = outputFileIndex % params.m_OutputTensorFiles.size(); - ARMNN_LOG(info) << "Writing output " << i << " named: '" - << inferenceModelParams.m_OutputBindings[i] - << "' of iteration: " << j+1 << " to file: '" - << params.m_OutputTensorFiles[outputFileIndex] << "'"; - } - const armnn::TensorInfo& infoOut = infosOut[i].second; - auto outputTensorFile = params.m_OutputTensorFiles.empty() - ? "" - : params.m_OutputTensorFiles[outputFileIndex]; - - TensorPrinter printer(inferenceModelParams.m_OutputBindings[i], - infoOut, - outputTensorFile, - params.m_DequantizeOutput, - !params.m_DontPrintOutputs); - mapbox::util::apply_visitor(printer, outputs[j][i]); - } - CheckInferenceTimeThreshold(inferenceDuration, params.m_ThresholdTime); - ARMNN_LOG(info) << "Asynchronous Execution is finished for Inference ID: " << inferenceID << " \n"; - } - // finish timer - const auto duration = armnn::GetTimeDuration(start_time); - ARMNN_LOG(info) << "\nOverall Inference time: " << std::setprecision(2) - << std::fixed << duration.count() << " ms\n"; - } - catch (const armnn::Exception& e) - { - ARMNN_LOG(fatal) << "Armnn Error: " << e.what(); - return EXIT_FAILURE; - } - } } - catch (const armnn::Exception& e) + else { - ARMNN_LOG(fatal) << "Armnn Error: " << e.what(); - return EXIT_FAILURE; + return std::make_unique(programOptions.m_ExNetParams, programOptions.m_RuntimeOptions); } - - return EXIT_SUCCESS; } // MAIN @@ -1004,74 +43,49 @@ int main(int argc, const char* argv[]) // Get ExecuteNetwork parameters and runtime options from command line // This might throw an InvalidArgumentException if the user provided invalid inputs - ProgramOptions ProgramOptions; - try { - ProgramOptions.ParseOptions(argc, argv); - } catch (const std::exception &e){ - ARMNN_LOG(fatal) << e.what(); - return EXIT_FAILURE; + ProgramOptions programOptions; + try + { + programOptions.ParseOptions(argc, argv); } - - if ((ProgramOptions.m_ExNetParams.m_OutputDetailsToStdOut || - ProgramOptions.m_ExNetParams.m_OutputDetailsOnlyToStdOut) - && !ProgramOptions.m_ExNetParams.m_EnableProfiling) + catch (const std::exception &e) { - ARMNN_LOG(fatal) << "You must enable profiling if you would like to output layer details"; + ARMNN_LOG(fatal) << e.what(); return EXIT_FAILURE; } - std::string modelFormat = ProgramOptions.m_ExNetParams.m_ModelFormat; + std::vector outputResults; - // Forward to implementation based on the parser type - if (modelFormat.find("armnn") != std::string::npos) - { - #if defined(ARMNN_SERIALIZER) - std::shared_ptr runtime(armnn::IRuntime::Create(ProgramOptions.m_RuntimeOptions)); - return MainImpl(ProgramOptions.m_ExNetParams, runtime); - #else - ARMNN_LOG(fatal) << "Not built with serialization support."; - return EXIT_FAILURE; - #endif - } - else if (modelFormat.find("onnx") != std::string::npos) + auto executor = BuildExecutor(programOptions); + if (!executor) { - #if defined(ARMNN_ONNX_PARSER) - std::shared_ptr runtime(armnn::IRuntime::Create(ProgramOptions.m_RuntimeOptions)); - return MainImpl(ProgramOptions.m_ExNetParams, runtime); - #else - ARMNN_LOG(fatal) << "Not built with Onnx parser support."; return EXIT_FAILURE; - #endif } - else if(modelFormat.find("tflite") != std::string::npos) + + executor->PrintNetworkInfo(); + outputResults = executor->Execute(); + + if (!programOptions.m_ExNetParams.m_ComparisonComputeDevices.empty() || + programOptions.m_ExNetParams.m_CompareWithTflite) { - if (ProgramOptions.m_ExNetParams.m_TfLiteExecutor == ExecuteNetworkParams::TfLiteExecutor::ArmNNTfLiteParser) + ExecuteNetworkParams comparisonParams = programOptions.m_ExNetParams; + comparisonParams.m_ComputeDevices = programOptions.m_ExNetParams.m_ComparisonComputeDevices; + + if (programOptions.m_ExNetParams.m_CompareWithTflite) { - #if defined(ARMNN_TF_LITE_PARSER) - std::shared_ptr runtime(armnn::IRuntime::Create(ProgramOptions.m_RuntimeOptions)); - return MainImpl(ProgramOptions.m_ExNetParams, runtime); - #else - ARMNN_LOG(fatal) << "Not built with Tensorflow-Lite parser support."; - return EXIT_FAILURE; - #endif + comparisonParams.m_TfLiteExecutor = ExecuteNetworkParams::TfLiteExecutor::TfliteInterpreter; } - else if (ProgramOptions.m_ExNetParams.m_TfLiteExecutor == - ExecuteNetworkParams::TfLiteExecutor::ArmNNTfLiteDelegate || - ProgramOptions.m_ExNetParams.m_TfLiteExecutor == - ExecuteNetworkParams::TfLiteExecutor::TfliteInterpreter) + + auto comparisonExecutor = BuildExecutor(programOptions); + + if (!comparisonExecutor) { - #if defined(ARMNN_TF_LITE_DELEGATE) - return TfLiteDelegateMainImpl(ProgramOptions.m_ExNetParams, ProgramOptions.m_RuntimeOptions); - #else - ARMNN_LOG(fatal) << "Not built with Arm NN Tensorflow-Lite delegate support."; return EXIT_FAILURE; - #endif } - } - else - { - ARMNN_LOG(fatal) << "Unknown model format: '" << modelFormat - << "'. Please include 'tflite' or 'onnx'"; - return EXIT_FAILURE; + + comparisonExecutor->PrintNetworkInfo(); + comparisonExecutor->Execute(); + + comparisonExecutor->CompareAndPrintResult(outputResults); } } -- cgit v1.2.1