From 3cff15a5d8797d0afe5d5b1cb3ff0e5b7d1cd6c9 Mon Sep 17 00:00:00 2001 From: Colm Donelan Date: Tue, 12 Oct 2021 15:06:19 +0100 Subject: IVGCVSW-5879 Pass the execute network parameters to the TfLiteDelegate. * Introduce a mechanism to construct a DelegateOptions from an ExecuteNetworkParams. * Modify ExecuteNetwork to use this constructed DelegateOptions. Signed-off-by: Colm Donelan Change-Id: Ied663a1e00ac3eece42244ed313ddafd6d2ce078 --- tests/ExecuteNetwork/ExecuteNetwork.cpp | 121 ++++++++++++--------- tests/ExecuteNetwork/ExecuteNetworkParams.cpp | 58 +++++++++- tests/ExecuteNetwork/ExecuteNetworkParams.hpp | 10 ++ .../ExecuteNetworkProgramOptions.cpp | 10 +- 4 files changed, 140 insertions(+), 59 deletions(-) diff --git a/tests/ExecuteNetwork/ExecuteNetwork.cpp b/tests/ExecuteNetwork/ExecuteNetwork.cpp index 8acab71290..fa8c8c8761 100644 --- a/tests/ExecuteNetwork/ExecuteNetwork.cpp +++ b/tests/ExecuteNetwork/ExecuteNetwork.cpp @@ -35,8 +35,39 @@ #endif #include + +/** + * Given a measured duration and a threshold time tell the user whether we succeeded or not. + * + * @param duration the measured inference duration. + * @param thresholdTime the threshold time in milliseconds. + * @return false if the measured time exceeded the threshold. + */ +bool CheckInferenceTimeThreshold(const std::chrono::duration& duration, + const double& thresholdTime) +{ + ARMNN_LOG(info) << "\nInference time: " << std::setprecision(2) + << std::fixed << duration.count() << " ms\n"; + // If thresholdTime == 0.0 (default), then it hasn't been supplied at command line + if (thresholdTime != 0.0) + { + ARMNN_LOG(info) << "Threshold time: " << std::setprecision(2) + << std::fixed << thresholdTime << " ms"; + auto thresholdMinusInference = thresholdTime - duration.count(); + ARMNN_LOG(info) << "Threshold time - Inference time: " << std::setprecision(2) + << std::fixed << thresholdMinusInference << " ms" << "\n"; + if (thresholdMinusInference < 0) + { + std::string errorMessage = "Elapsed inference time is greater than provided threshold time."; + ARMNN_LOG(fatal) << errorMessage; + return false; + } + } + return true; +} + #if defined(ARMNN_TFLITE_DELEGATE) -int TfLiteDelegateMainImpl(const ExecuteNetworkParams& params, +int TfLiteDelegateMainImpl(const ExecuteNetworkParams& params, const armnn::IRuntime::CreationOptions runtimeOptions, const std::shared_ptr& runtime = nullptr) { using namespace tflite; @@ -54,7 +85,10 @@ int TfLiteDelegateMainImpl(const ExecuteNetworkParams& params, if (params.m_TfLiteExecutor == ExecuteNetworkParams::TfLiteExecutor::ArmNNTfLiteDelegate) { // Create the Armnn Delegate - armnnDelegate::DelegateOptions delegateOptions(params.m_ComputeDevices); + // Populate a DelegateOptions from the ExecuteNetworkParams. + armnnDelegate::DelegateOptions delegateOptions = params.ToDelegateOptions(); + delegateOptions.SetExternalProfilingParams(runtimeOptions.m_ProfilingOptions); + std::unique_ptr theArmnnDelegate(armnnDelegate::TfLiteArmnnDelegateCreate(delegateOptions), armnnDelegate::TfLiteArmnnDelegateDelete); @@ -71,18 +105,11 @@ int TfLiteDelegateMainImpl(const ExecuteNetworkParams& params, std::cout << "Running on TfLite without ArmNN delegate\n"; } - - std::vector inputBindings; - for (const std::string& inputName: params.m_InputNames) - { - inputBindings.push_back(inputName); - } - armnn::Optional dataFile = params.m_GenerateTensorData ? armnn::EmptyOptional() : armnn::MakeOptional(params.m_InputTensorDataFilePaths[0]); - const size_t numInputs = inputBindings.size(); + const size_t numInputs = params.m_InputNames.size(); for(unsigned int inputIndex = 0; inputIndex < numInputs; ++inputIndex) { @@ -212,15 +239,36 @@ int TfLiteDelegateMainImpl(const ExecuteNetworkParams& params, for (size_t x = 0; x < params.m_Iterations; x++) { + // Start timer to record inference time in milliseconds. + const auto start_time = armnn::GetTimeNow(); // Run the inference status = tfLiteInterpreter->Invoke(); + const auto duration = armnn::GetTimeDuration(start_time); // Print out the output for (unsigned int outputIndex = 0; outputIndex < params.m_OutputNames.size(); ++outputIndex) { auto tfLiteDelegateOutputId = tfLiteInterpreter->outputs()[outputIndex]; TfLiteIntArray* outputDims = tfLiteInterpreter->tensor(tfLiteDelegateOutputId)->dims; - + // If we've been asked to write to a file then set a file output stream. Otherwise use stdout. + FILE* outputTensorFile = stdout; + if (!params.m_OutputTensorFiles.empty()) + { + outputTensorFile = fopen(params.m_OutputTensorFiles[outputIndex].c_str(), "w"); + if (outputTensorFile == NULL) + { + ARMNN_LOG(fatal) << "Specified output tensor file, \"" << + params.m_OutputTensorFiles[outputIndex] << + "\", cannot be created. Defaulting to stdout. " << + "Error was: " << std::strerror(errno); + outputTensorFile = stdout; + } + else + { + ARMNN_LOG(info) << "Writing output " << outputIndex << "' of iteration: " << x+1 << " to file: '" + << params.m_OutputTensorFiles[outputIndex] << "'"; + } + } long outputSize = 1; for (unsigned int dim = 0; dim < static_cast(outputDims->size); ++dim) { @@ -242,7 +290,7 @@ int TfLiteDelegateMainImpl(const ExecuteNetworkParams& params, { for (int i = 0; i < outputSize; ++i) { - printf("%f ", tfLiteDelageOutputData[i]); + fprintf(outputTensorFile, "%f ", tfLiteDelageOutputData[i]); } } } @@ -260,7 +308,7 @@ int TfLiteDelegateMainImpl(const ExecuteNetworkParams& params, { for (int i = 0; i < outputSize; ++i) { - printf("%d ", tfLiteDelageOutputData[i]); + fprintf(outputTensorFile, "%d ", tfLiteDelageOutputData[i]); } } } @@ -278,7 +326,7 @@ int TfLiteDelegateMainImpl(const ExecuteNetworkParams& params, { for (int i = 0; i < outputSize; ++i) { - printf("%d ", tfLiteDelageOutputData[i]); + fprintf(outputTensorFile, "%d ", tfLiteDelageOutputData[i]); } } } @@ -297,7 +345,7 @@ int TfLiteDelegateMainImpl(const ExecuteNetworkParams& params, { for (int i = 0; i < outputSize; ++i) { - printf("%u ", tfLiteDelageOutputData[i]); + fprintf(outputTensorFile, "%u ", tfLiteDelageOutputData[i]); } } } @@ -310,6 +358,7 @@ int TfLiteDelegateMainImpl(const ExecuteNetworkParams& params, } std::cout << std::endl; } + CheckInferenceTimeThreshold(duration, params.m_ThresholdTime); } return status; @@ -628,24 +677,7 @@ int MainImpl(const ExecuteNetworkParams& params, mapbox::util::apply_visitor(printer, inferenceOutputMap.at(cb->GetInferenceId())[i]); } - ARMNN_LOG(info) << "\nInference time: " << std::setprecision(2) - << std::fixed << inferenceDuration.count() << " ms\n"; - - // If thresholdTime == 0.0 (default), then it hasn't been supplied at command line - if (params.m_ThresholdTime != 0.0) - { - ARMNN_LOG(info) << "Threshold time: " << std::setprecision(2) - << std::fixed << params.m_ThresholdTime << " ms"; - auto thresholdMinusInference = - params.m_ThresholdTime - duration(inferenceDuration).count(); - ARMNN_LOG(info) << "Threshold time - Inference time: " << std::setprecision(2) - << std::fixed << thresholdMinusInference << " ms" << "\n"; - - if (thresholdMinusInference < 0) - { - ARMNN_LOG(fatal) << "Elapsed inference time is greater than provided threshold time. \n"; - } - } + CheckInferenceTimeThreshold(inferenceDuration, params.m_ThresholdTime); ++j; } //print duration difference between overallStartTime and overallEndTime @@ -739,26 +771,8 @@ int MainImpl(const ExecuteNetworkParams& params, !params.m_DontPrintOutputs); mapbox::util::apply_visitor(printer, outputs[j][i]); } - - ARMNN_LOG(info) << "\nInference time: " << std::setprecision(2) - << std::fixed << inferenceDuration.count() << " ms\n"; - - // If thresholdTime == 0.0 (default), then it hasn't been supplied at command line - if (params.m_ThresholdTime != 0.0) - { - ARMNN_LOG(info) << "Threshold time: " << std::setprecision(2) - << std::fixed << params.m_ThresholdTime << " ms"; - auto thresholdMinusInference = params.m_ThresholdTime - inferenceDuration.count(); - ARMNN_LOG(info) << "Threshold time - Inference time: " << std::setprecision(2) - << std::fixed << thresholdMinusInference << " ms" << "\n"; - - if (thresholdMinusInference < 0) - { - ARMNN_LOG(fatal) << "Elapsed inference time is greater than provided threshold time. \n"; - } - } + CheckInferenceTimeThreshold(inferenceDuration, params.m_ThresholdTime); ARMNN_LOG(info) << "Asynchronous Execution is finished for Inference ID: " << inferenceID << " \n"; - } // finish timer const auto duration = armnn::GetTimeDuration(start_time); @@ -781,7 +795,6 @@ int MainImpl(const ExecuteNetworkParams& params, return EXIT_SUCCESS; } - // MAIN int main(int argc, const char* argv[]) { @@ -853,7 +866,7 @@ int main(int argc, const char* argv[]) ExecuteNetworkParams::TfLiteExecutor::TfliteInterpreter) { #if defined(ARMNN_TF_LITE_DELEGATE) - return TfLiteDelegateMainImpl(ProgramOptions.m_ExNetParams, runtime); + return TfLiteDelegateMainImpl(ProgramOptions.m_ExNetParams, ProgramOptions.m_RuntimeOptions, runtime); #else ARMNN_LOG(fatal) << "Not built with Arm NN Tensorflow-Lite delegate support."; return EXIT_FAILURE; diff --git a/tests/ExecuteNetwork/ExecuteNetworkParams.cpp b/tests/ExecuteNetwork/ExecuteNetworkParams.cpp index dcdd423246..541430c421 100644 --- a/tests/ExecuteNetwork/ExecuteNetworkParams.cpp +++ b/tests/ExecuteNetwork/ExecuteNetworkParams.cpp @@ -232,4 +232,60 @@ void ExecuteNetworkParams::ValidateParams() { ARMNN_LOG(warning) << "No input files provided, input tensors will be filled with 0s."; } -} \ No newline at end of file +} + +#if defined(ARMNN_TFLITE_DELEGATE) +/** + * A utility method that populates a DelegateOptions object from this ExecuteNetworkParams. + * + * @return a populated armnnDelegate::DelegateOptions object. + */ +armnnDelegate::DelegateOptions ExecuteNetworkParams::ToDelegateOptions() const +{ + armnnDelegate::DelegateOptions delegateOptions(m_ComputeDevices); + delegateOptions.SetDynamicBackendsPath(m_DynamicBackendsPath); + delegateOptions.SetGpuProfilingState(m_EnableProfiling); + + armnn::OptimizerOptions options; + options.m_ReduceFp32ToFp16 = m_EnableFp16TurboMode; + options.m_ReduceFp32ToBf16 = m_EnableBf16TurboMode; + options.m_Debug = m_PrintIntermediate; + + options.m_shapeInferenceMethod = armnn::ShapeInferenceMethod::ValidateOnly; + if (m_InferOutputShape) + { + options.m_shapeInferenceMethod = armnn::ShapeInferenceMethod::InferAndValidate; + } + + armnn::BackendOptions gpuAcc("GpuAcc", + { + { "FastMathEnabled", m_EnableFastMath }, + { "SaveCachedNetwork", m_SaveCachedNetwork }, + { "CachedNetworkFilePath", m_CachedNetworkFilePath }, + { "TuningLevel", m_TuningLevel}, + { "TuningFile", m_TuningPath.c_str()}, + { "KernelProfilingEnabled", m_EnableProfiling}, + { "MLGOTuningFilePath", m_MLGOTuningFilePath} + }); + + armnn::BackendOptions cpuAcc("CpuAcc", + { + { "FastMathEnabled", m_EnableFastMath }, + { "NumberOfThreads", m_NumberOfThreads } + }); + options.m_ModelOptions.push_back(gpuAcc); + options.m_ModelOptions.push_back(cpuAcc); + + delegateOptions.SetOptimizerOptions(options); + + // If v,visualize-optimized-model is enabled then construct a file name for the dot file. + if (m_EnableLayerDetails) + { + fs::path filename = m_ModelPath; + filename.replace_extension("dot"); + delegateOptions.SetSerializeToDot(filename); + } + + return delegateOptions; +} +#endif diff --git a/tests/ExecuteNetwork/ExecuteNetworkParams.hpp b/tests/ExecuteNetwork/ExecuteNetworkParams.hpp index db8194b3f9..cb8c2fb386 100644 --- a/tests/ExecuteNetwork/ExecuteNetworkParams.hpp +++ b/tests/ExecuteNetwork/ExecuteNetworkParams.hpp @@ -8,6 +8,10 @@ #include #include +#if defined(ARMNN_TFLITE_DELEGATE) +#include +#endif + /// Holds all parameters necessary to execute a network /// Check ExecuteNetworkProgramOptions.cpp for a description of each parameter struct ExecuteNetworkParams @@ -64,4 +68,10 @@ struct ExecuteNetworkParams // Ensures that the parameters for ExecuteNetwork fit together void ValidateParams(); + +#if defined(ARMNN_TFLITE_DELEGATE) + /// A utility method that populates a DelegateOptions object from this ExecuteNetworkParams. + armnnDelegate::DelegateOptions ToDelegateOptions() const; +#endif + }; diff --git a/tests/ExecuteNetwork/ExecuteNetworkProgramOptions.cpp b/tests/ExecuteNetwork/ExecuteNetworkProgramOptions.cpp index b1c87d088a..8ee66cf64b 100644 --- a/tests/ExecuteNetwork/ExecuteNetworkProgramOptions.cpp +++ b/tests/ExecuteNetwork/ExecuteNetworkProgramOptions.cpp @@ -242,7 +242,8 @@ ProgramOptions::ProgramOptions() : m_CxxOptions{"ExecuteNetwork", ("l,dequantize-output", "If this option is enabled, all quantized outputs will be dequantized to float. " "If unset, default to not get dequantized. " - "Accepted values (true or false)", + "Accepted values (true or false)" + " (Not available when executing ArmNNTfLiteDelegate or TfliteInterpreter)", cxxopts::value(m_ExNetParams.m_DequantizeOutput)->default_value("false")->implicit_value("true")) ("p,print-intermediate-layers", @@ -261,9 +262,9 @@ ProgramOptions::ProgramOptions() : m_CxxOptions{"ExecuteNetwork", ("q,quantize-input", "If this option is enabled, all float inputs will be quantized as appropriate for the model's inputs. " - "If unset, default to not quantized. Accepted values (true or false)", + "If unset, default to not quantized. Accepted values (true or false)" + " (Not available when executing ArmNNTfLiteDelegate or TfliteInterpreter)", cxxopts::value(m_ExNetParams.m_QuantizeInput)->default_value("false")->implicit_value("true")) - ("r,threshold-time", "Threshold time is the maximum allowed time for inference measured in milliseconds. If the actual " "inference time is greater than the threshold time, the test will fail. By default, no threshold " @@ -286,7 +287,8 @@ ProgramOptions::ProgramOptions() : m_CxxOptions{"ExecuteNetwork", cxxopts::value()) ("x,subgraph-number", - "Id of the subgraph to be executed. Defaults to 0.", + "Id of the subgraph to be executed. Defaults to 0." + " (Not available when executing ArmNNTfLiteDelegate or TfliteInterpreter)", cxxopts::value(m_ExNetParams.m_SubgraphId)->default_value("0")) ("y,input-type", -- cgit v1.2.1