From dfbec2d481ee60202ddf2f6400a4bcbbf6f23cc1 Mon Sep 17 00:00:00 2001 From: Ryan OShea Date: Mon, 28 Mar 2022 10:55:48 +0100 Subject: IVGCVSW-6828 Add a 'reuse I/O buffers' iteration mode to ExecuteNetwork * Add shorthand argument for no print * Add Execute network option to reuse buffers * Add new synchronous execute method to reuse buffers Signed-off-by: Ryan OShea Change-Id: Ia7ee99b2ba9a21043c9575d7546bf25208357141 --- tests/ExecuteNetwork/ExecuteNetwork.cpp | 277 +++++++++++++++------ tests/ExecuteNetwork/ExecuteNetworkParams.hpp | 1 + .../ExecuteNetworkProgramOptions.cpp | 8 +- 3 files changed, 214 insertions(+), 72 deletions(-) diff --git a/tests/ExecuteNetwork/ExecuteNetwork.cpp b/tests/ExecuteNetwork/ExecuteNetwork.cpp index b5b8d8561c..ddabf3c11f 100644 --- a/tests/ExecuteNetwork/ExecuteNetwork.cpp +++ b/tests/ExecuteNetwork/ExecuteNetwork.cpp @@ -446,44 +446,6 @@ int MainImpl(const ExecuteNetworkParams& params, << " input-tensor-data file/s which will be used to fill the input/s.\n"; } - for(unsigned int j = 0; j < params.m_Iterations ; ++j) - { - std::vector inputDataContainers; - for(unsigned int i = 0; i < numInputs; ++i) - { - // If there are fewer input files given than required for the execution of - // params.m_Iterations we simply start with the first input file again - size_t inputFileIndex = j * numInputs + i; - if (!params.m_InputTensorDataFilePaths.empty()) - { - inputFileIndex = inputFileIndex % params.m_InputTensorDataFilePaths.size(); - } - - armnn::Optional dataFile = params.m_GenerateTensorData ? - armnn::EmptyOptional() : - armnn::MakeOptional( - params.m_InputTensorDataFilePaths.at(inputFileIndex)); - - unsigned int numElements = model.GetInputSize(i); - if (params.m_InputTensorShapes.size() > i && params.m_InputTensorShapes[i]) - { - // If the user has provided a tensor shape for the current input, - // override numElements - numElements = params.m_InputTensorShapes[i]->GetNumElements(); - } - - armnnUtils::TContainer tensorData; - PopulateTensorWithData(tensorData, - numElements, - params.m_InputTypes[i], - qParams, - dataFile); - - inputDataContainers.push_back(tensorData); - } - inputs.push_back(inputDataContainers); - } - const size_t numOutputs = inferenceModelParams.m_OutputBindings.size(); // The user is allowed to specify the data type of each output tensor. It is used here to construct the @@ -500,32 +462,32 @@ int MainImpl(const ExecuteNetworkParams& params, case armnn::DataType::Float32: if (params.m_OutputTypes[outputIdx].compare("float") != 0) { - ARMNN_LOG(warning) << "Model output index: " << outputIdx << " has data type Float32. The " << - "corresponding --output-type is " << params.m_OutputTypes[outputIdx] << + ARMNN_LOG(warning) << "Model output index: " << outputIdx << " has data type Float32. The " + << "corresponding --output-type is " << params.m_OutputTypes[outputIdx] << ". This may cause unexpected problems or random failures."; } break; case armnn::DataType::QAsymmU8: if (params.m_OutputTypes[outputIdx].compare("qasymmu8") != 0) { - ARMNN_LOG(warning) << "Model output index: " << outputIdx << " has data type QAsymmU8. The " << - "corresponding --output-type is " << params.m_OutputTypes[outputIdx] << - ". This may cause unexpected problemsor random failures."; + ARMNN_LOG(warning) << "Model output index: " << outputIdx << " has data type QAsymmU8. The " + << "corresponding --output-type is " << params.m_OutputTypes[outputIdx] << + ". This may cause unexpected problems or random failures."; } break; case armnn::DataType::Signed32: if (params.m_OutputTypes[outputIdx].compare("int") != 0) { - ARMNN_LOG(warning) << "Model output index: " << outputIdx << " has data type Signed32. The " << - "corresponding --output-type is " << params.m_OutputTypes[outputIdx] << + ARMNN_LOG(warning) << "Model output index: " << outputIdx << " has data type Signed32. The " + << "corresponding --output-type is " << params.m_OutputTypes[outputIdx] << ". This may cause unexpected problems or random failures."; } break; case armnn::DataType::QAsymmS8: if (params.m_OutputTypes[outputIdx].compare("qasymms8") != 0) { - ARMNN_LOG(warning) << "Model output index: " << outputIdx << " has data type QAsymmS8. The " << - "corresponding --output-type is " << params.m_OutputTypes[outputIdx] << + ARMNN_LOG(warning) << "Model output index: " << outputIdx << " has data type QAsymmS8. The " + << "corresponding --output-type is " << params.m_OutputTypes[outputIdx] << ". This may cause unexpected problems or random failures."; } break; @@ -533,36 +495,79 @@ int MainImpl(const ExecuteNetworkParams& params, break; } } - for (unsigned int j = 0; j < params.m_Iterations; ++j) + + if (!params.m_ReuseBuffers) { - std::vector outputDataContainers; - for (unsigned int i = 0; i < numOutputs; ++i) + for (unsigned int j = 0; j < params.m_Iterations; ++j) { - if (params.m_OutputTypes[i].compare("float") == 0) + std::vector inputDataContainers; + for (unsigned int i = 0; i < numInputs; ++i) { - outputDataContainers.push_back(std::vector(model.GetOutputSize(i))); - } - else if (params.m_OutputTypes[i].compare("int") == 0) - { - outputDataContainers.push_back(std::vector(model.GetOutputSize(i))); - } - else if (params.m_OutputTypes[i].compare("qasymm8") == 0 || - params.m_OutputTypes[i].compare("qasymmu8") == 0) - { - outputDataContainers.push_back(std::vector(model.GetOutputSize(i))); + // If there are fewer input files given than required for the execution of + // params.m_Iterations we simply start with the first input file again + size_t inputFileIndex = j * numInputs + i; + if (!params.m_InputTensorDataFilePaths.empty()) + { + inputFileIndex = inputFileIndex % params.m_InputTensorDataFilePaths.size(); + } + + armnn::Optional dataFile = params.m_GenerateTensorData ? + armnn::EmptyOptional() : + armnn::MakeOptional( + params.m_InputTensorDataFilePaths.at( + inputFileIndex)); + + unsigned int numElements = model.GetInputSize(i); + if (params.m_InputTensorShapes.size() > i && params.m_InputTensorShapes[i]) + { + // If the user has provided a tensor shape for the current input, + // override numElements + numElements = params.m_InputTensorShapes[i]->GetNumElements(); + } + + armnnUtils::TContainer tensorData; + PopulateTensorWithData(tensorData, + numElements, + params.m_InputTypes[i], + qParams, + dataFile); + + inputDataContainers.push_back(tensorData); } - else if (params.m_OutputTypes[i].compare("qasymms8") == 0) - { - outputDataContainers.push_back(std::vector(model.GetOutputSize(i))); - } else + inputs.push_back(inputDataContainers); + } + + for (unsigned int j = 0; j < params.m_Iterations; ++j) + { + std::vector outputDataContainers; + for (unsigned int i = 0; i < numOutputs; ++i) { - ARMNN_LOG(fatal) << "Unsupported tensor data type \"" << params.m_OutputTypes[i] << "\". "; - return EXIT_FAILURE; + if (params.m_OutputTypes[i].compare("float") == 0) + { + outputDataContainers.push_back(std::vector(model.GetOutputSize(i))); + } + else if (params.m_OutputTypes[i].compare("int") == 0) + { + outputDataContainers.push_back(std::vector(model.GetOutputSize(i))); + } + else if (params.m_OutputTypes[i].compare("qasymm8") == 0 || + params.m_OutputTypes[i].compare("qasymmu8") == 0) + { + outputDataContainers.push_back(std::vector(model.GetOutputSize(i))); + } + else if (params.m_OutputTypes[i].compare("qasymms8") == 0) + { + outputDataContainers.push_back(std::vector(model.GetOutputSize(i))); + } + else + { + ARMNN_LOG(fatal) << "Unsupported tensor data type \"" << params.m_OutputTypes[i] << "\". "; + return EXIT_FAILURE; + } } + outputs.push_back(outputDataContainers); } - outputs.push_back(outputDataContainers); } - if (params.m_Iterations > 1) { std::stringstream msg; @@ -581,7 +586,7 @@ int MainImpl(const ExecuteNetworkParams& params, } // Synchronous execution - if (!params.m_Concurrent) + if (!params.m_Concurrent && !params.m_ReuseBuffers) { for (size_t x = 0; x < params.m_Iterations; x++) { @@ -648,6 +653,138 @@ int MainImpl(const ExecuteNetworkParams& params, } } } + // Synchronous Execution using a single buffer for input and output data + else if(!params.m_Concurrent) + { + std::vector input; + std::vector output; + + for (unsigned int i = 0; i < numInputs; ++i) + { + // If there are fewer input files given than required for the execution of + // params.m_Iterations we simply start with the first input file again + size_t inputFileIndex = numInputs + i; + if (!params.m_InputTensorDataFilePaths.empty()) + { + inputFileIndex = inputFileIndex % params.m_InputTensorDataFilePaths.size(); + } + + armnn::Optional dataFile = params.m_GenerateTensorData ? + armnn::EmptyOptional() : + armnn::MakeOptional( + params.m_InputTensorDataFilePaths.at( + inputFileIndex)); + + unsigned int numElements = model.GetInputSize(i); + if (params.m_InputTensorShapes.size() > i && params.m_InputTensorShapes[i]) + { + // If the user has provided a tensor shape for the current input, + // override numElements + numElements = params.m_InputTensorShapes[i]->GetNumElements(); + } + + armnnUtils::TContainer tensorData; + PopulateTensorWithData(tensorData, + numElements, + params.m_InputTypes[i], + qParams, + dataFile); + + input.push_back(tensorData); + } + + for (unsigned int i = 0; i < numOutputs; ++i) + { + if (params.m_OutputTypes[i].compare("float") == 0) + { + output.push_back(std::vector(model.GetOutputSize(i))); + } else if (params.m_OutputTypes[i].compare("int") == 0) { + output.push_back(std::vector(model.GetOutputSize(i))); + } else if (params.m_OutputTypes[i].compare("qasymm8") == 0 || + params.m_OutputTypes[i].compare("qasymmu8") == 0) + { + output.push_back(std::vector(model.GetOutputSize(i))); + } else if (params.m_OutputTypes[i].compare("qasymms8") == 0) + { + output.push_back(std::vector(model.GetOutputSize(i))); + } else { + ARMNN_LOG(fatal) << "Unsupported tensor data type \"" << params.m_OutputTypes[i] << "\". "; + return EXIT_FAILURE; + } + } + + std::vector> timings; + timings.reserve(params.m_Iterations); + for (size_t x = 0; x < params.m_Iterations; x++) + { + // model.Run returns the inference time elapsed in EnqueueWorkload (in milliseconds) + auto inference_duration = model.Run(input, output); + timings.push_back(inference_duration); + } + + if (params.m_GenerateTensorData) + { + ARMNN_LOG(warning) << "The input data was generated, note that the output will not be useful"; + } + if (params.m_DontPrintOutputs) + { + ARMNN_LOG(info) << "Printing outputs to console is disabled."; + } + + // Print output. This only needs to happen once as input is the same for each iteration. + const auto &infosOut = model.GetOutputBindingInfos(); + for (size_t i = 0; i < numOutputs; i++) + { + const armnn::TensorInfo &infoOut = infosOut[i].second; + + // We've made sure before that the number of output files either equals numOutputs, in which + // case we override those files when processing the results of each iteration (only the result + // of the last iteration will be stored), or there are enough + // output files for each output of each iteration. + size_t outputFileIndex = numOutputs + i; + if (!params.m_OutputTensorFiles.empty()) + { + outputFileIndex = outputFileIndex % params.m_OutputTensorFiles.size(); + ARMNN_LOG(info) << "Writing output " << i << " named: '" + << inferenceModelParams.m_OutputBindings[i] <<" to file: '" + << params.m_OutputTensorFiles[outputFileIndex] << "'"; + } + auto outputTensorFile = params.m_OutputTensorFiles.empty() + ? "" + : params.m_OutputTensorFiles[outputFileIndex]; + + TensorPrinter printer(inferenceModelParams.m_OutputBindings[i], + infoOut, + outputTensorFile, + params.m_DequantizeOutput, + !params.m_DontPrintOutputs); + mapbox::util::apply_visitor(printer, output[i]); + } + + for(auto inference: timings) + { + + ARMNN_LOG(info) << "\nInference time: " << std::setprecision(2) + << std::fixed << inference.count() << " ms\n"; + + // If thresholdTime == 0.0 (default), then it hasn't been supplied at command line + if (params.m_ThresholdTime != 0.0) + { + ARMNN_LOG(info) << "Threshold time: " << std::setprecision(2) + << std::fixed << params.m_ThresholdTime << " ms"; + auto thresholdMinusInference = params.m_ThresholdTime - inference.count(); + ARMNN_LOG(info) << "Threshold time - Inference time: " << std::setprecision(2) + << std::fixed << thresholdMinusInference << " ms" << "\n"; + + if (thresholdMinusInference < 0) + { + std::string errorMessage = "Elapsed inference time is greater than provided threshold time."; + ARMNN_LOG(fatal) << errorMessage; + } + } + } + } + // Asynchronous execution using the Arm NN thread pool else if (params.m_ThreadPoolSize >= 1) { diff --git a/tests/ExecuteNetwork/ExecuteNetworkParams.hpp b/tests/ExecuteNetwork/ExecuteNetworkParams.hpp index deaf55f6e5..04a073311d 100644 --- a/tests/ExecuteNetwork/ExecuteNetworkParams.hpp +++ b/tests/ExecuteNetwork/ExecuteNetworkParams.hpp @@ -66,6 +66,7 @@ struct ExecuteNetworkParams TfLiteExecutor m_TfLiteExecutor; size_t m_ThreadPoolSize; bool m_ImportInputsIfAligned; + bool m_ReuseBuffers; // Ensures that the parameters for ExecuteNetwork fit together void ValidateParams(); diff --git a/tests/ExecuteNetwork/ExecuteNetworkProgramOptions.cpp b/tests/ExecuteNetwork/ExecuteNetworkProgramOptions.cpp index 681dc8a611..c84c79ea78 100644 --- a/tests/ExecuteNetwork/ExecuteNetworkProgramOptions.cpp +++ b/tests/ExecuteNetwork/ExecuteNetworkProgramOptions.cpp @@ -255,7 +255,7 @@ ProgramOptions::ProgramOptions() : m_CxxOptions{"ExecuteNetwork", "Add unsupported operators as stand-in layers (where supported by parser)", cxxopts::value(m_ExNetParams.m_ParseUnsupported)->default_value("false")->implicit_value("true")) - ("do-not-print-output", + ("N,do-not-print-output", "The default behaviour of ExecuteNetwork is to print the resulting outputs on the console. " "This behaviour can be changed by adding this flag to your command.", cxxopts::value(m_ExNetParams.m_DontPrintOutputs)->default_value("false")->implicit_value("true")) @@ -378,7 +378,11 @@ ProgramOptions::ProgramOptions() : m_CxxOptions{"ExecuteNetwork", ("MLGOTuningFilePath", "Path to tuning file. Enables use of CL MLGO tuning", - cxxopts::value(m_ExNetParams.m_MLGOTuningFilePath)); + cxxopts::value(m_ExNetParams.m_MLGOTuningFilePath)) + + ("R, reuse-buffers", + "If enabled then the IO buffers will be reused for each inference", + cxxopts::value(m_ExNetParams.m_ReuseBuffers)->default_value("false")->implicit_value("true")); m_CxxOptions.add_options("d) Profiling") ("a,enable-external-profiling", -- cgit v1.2.1