From a04a9d7c11f28c7e932435535e80223782f369f2 Mon Sep 17 00:00:00 2001 From: Sadik Armagan Date: Tue, 27 Apr 2021 10:02:10 +0100 Subject: IVGCVSW-5775 'Add Async Support to ExecuteNetwork' * Enabled async mode with '-n, concurrent' and 'simultaneous-iterations' in ExecuteNetwork * Number of input files provided should be equal to number of input files provided multiply by number of simultaneous iterations divided by comma !armnn:5443 Signed-off-by: Sadik Armagan Change-Id: Ibeb318010430bf4ae61a02b18b1bf88f3657774c --- tests/ExecuteNetwork/ExecuteNetwork.cpp | 253 ++++++++++++++------- tests/ExecuteNetwork/ExecuteNetworkParams.cpp | 23 +- tests/ExecuteNetwork/ExecuteNetworkParams.hpp | 2 + .../ExecuteNetworkProgramOptions.cpp | 10 +- 4 files changed, 208 insertions(+), 80 deletions(-) (limited to 'tests/ExecuteNetwork') diff --git a/tests/ExecuteNetwork/ExecuteNetwork.cpp b/tests/ExecuteNetwork/ExecuteNetwork.cpp index 60e4ec3401..2bbb51783c 100644 --- a/tests/ExecuteNetwork/ExecuteNetwork.cpp +++ b/tests/ExecuteNetwork/ExecuteNetwork.cpp @@ -279,7 +279,8 @@ int MainImpl(const ExecuteNetworkParams& params, using TContainer = mapbox::util::variant, std::vector, std::vector, std::vector>; - std::vector inputDataContainers; + std::vector> inputs; + std::vector> outputs; try { @@ -298,6 +299,7 @@ int MainImpl(const ExecuteNetworkParams& params, inferenceModelParams.m_CachedNetworkFilePath = params.m_CachedNetworkFilePath; inferenceModelParams.m_NumberOfThreads = params.m_NumberOfThreads; inferenceModelParams.m_MLGOTuningFilePath = params.m_MLGOTuningFilePath; + inferenceModelParams.m_AsyncEnabled = params.m_Concurrent; for(const std::string& inputName: params.m_InputNames) { @@ -324,106 +326,201 @@ int MainImpl(const ExecuteNetworkParams& params, runtime); const size_t numInputs = inferenceModelParams.m_InputBindings.size(); - for(unsigned int i = 0; i < numInputs; ++i) + + armnn::Optional qParams = params.m_QuantizeInput ? + armnn::MakeOptional( + model.GetInputQuantizationParams()) : + armnn::EmptyOptional(); + + for(unsigned int j = 0; j < params.m_SimultaneousIterations ; ++j) { - armnn::Optional qParams = params.m_QuantizeInput ? - armnn::MakeOptional( - model.GetInputQuantizationParams()) : - armnn::EmptyOptional(); - - armnn::Optional dataFile = params.m_GenerateTensorData ? - armnn::EmptyOptional() : - armnn::MakeOptional( - params.m_InputTensorDataFilePaths[i]); - - unsigned int numElements = model.GetInputSize(i); - if (params.m_InputTensorShapes.size() > i && params.m_InputTensorShapes[i]) + std::vector inputDataContainers; + for(unsigned int i = 0; i < numInputs; ++i) { - // If the user has provided a tensor shape for the current input, - // override numElements - numElements = params.m_InputTensorShapes[i]->GetNumElements(); - } + armnn::Optional dataFile = params.m_GenerateTensorData ? + armnn::EmptyOptional() : + armnn::MakeOptional( + params.m_InputTensorDataFilePaths[(j * numInputs) + i]); - TContainer tensorData; - PopulateTensorWithData(tensorData, - numElements, - params.m_InputTypes[i], - qParams, - dataFile); + unsigned int numElements = model.GetInputSize(i); + if (params.m_InputTensorShapes.size() > i && params.m_InputTensorShapes[i]) + { + // If the user has provided a tensor shape for the current input, + // override numElements + numElements = params.m_InputTensorShapes[i]->GetNumElements(); + } + + TContainer tensorData; + PopulateTensorWithData(tensorData, + numElements, + params.m_InputTypes[i], + qParams, + dataFile); - inputDataContainers.push_back(tensorData); + inputDataContainers.push_back(tensorData); + } + inputs.push_back(inputDataContainers); } const size_t numOutputs = inferenceModelParams.m_OutputBindings.size(); - std::vector outputDataContainers; - for (unsigned int i = 0; i < numOutputs; ++i) + for (unsigned int j = 0; j < params.m_SimultaneousIterations; ++j) { - if (params.m_OutputTypes[i].compare("float") == 0) - { - outputDataContainers.push_back(std::vector(model.GetOutputSize(i))); - } - else if (params.m_OutputTypes[i].compare("int") == 0) - { - outputDataContainers.push_back(std::vector(model.GetOutputSize(i))); - } - else if (params.m_OutputTypes[i].compare("qasymm8") == 0) - { - outputDataContainers.push_back(std::vector(model.GetOutputSize(i))); - } - else if (params.m_OutputTypes[i].compare("qsymms8") == 0) + std::vector outputDataContainers; + for (unsigned int i = 0; i < numOutputs; ++i) { - outputDataContainers.push_back(std::vector(model.GetOutputSize(i))); - } - else - { - ARMNN_LOG(fatal) << "Unsupported tensor data type \"" << params.m_OutputTypes[i] << "\". "; - return EXIT_FAILURE; + if (params.m_OutputTypes[i].compare("float") == 0) + { + outputDataContainers.push_back(std::vector(model.GetOutputSize(i))); + } else if (params.m_OutputTypes[i].compare("int") == 0) + { + outputDataContainers.push_back(std::vector(model.GetOutputSize(i))); + } else if (params.m_OutputTypes[i].compare("qasymm8") == 0) + { + outputDataContainers.push_back(std::vector(model.GetOutputSize(i))); + } else if (params.m_OutputTypes[i].compare("qsymms8") == 0) + { + outputDataContainers.push_back(std::vector(model.GetOutputSize(i))); + } else + { + ARMNN_LOG(fatal) << "Unsupported tensor data type \"" << params.m_OutputTypes[i] << "\". "; + return EXIT_FAILURE; + } } + outputs.push_back(outputDataContainers); } - for (size_t x = 0; x < params.m_Iterations; x++) + if (!params.m_Concurrent) { - // model.Run returns the inference time elapsed in EnqueueWorkload (in milliseconds) - auto inference_duration = model.Run(inputDataContainers, outputDataContainers); - - if (params.m_GenerateTensorData) + // Synchronous Execution + for (size_t x = 0; x < params.m_Iterations; x++) { - ARMNN_LOG(warning) << "The input data was generated, note that the output will not be useful"; - } + // model.Run returns the inference time elapsed in EnqueueWorkload (in milliseconds) + auto inference_duration = model.Run(inputs[0], outputs[0]); - // Print output tensors - const auto& infosOut = model.GetOutputBindingInfos(); - for (size_t i = 0; i < numOutputs; i++) - { - const armnn::TensorInfo& infoOut = infosOut[i].second; - auto outputTensorFile = params.m_OutputTensorFiles.empty() ? "" : params.m_OutputTensorFiles[i]; - - TensorPrinter printer(inferenceModelParams.m_OutputBindings[i], - infoOut, - outputTensorFile, - params.m_DequantizeOutput); - mapbox::util::apply_visitor(printer, outputDataContainers[i]); - } + if (params.m_GenerateTensorData) + { + ARMNN_LOG(warning) << "The input data was generated, note that the output will not be useful"; + } + + // Print output tensors + const auto& infosOut = model.GetOutputBindingInfos(); + for (size_t i = 0; i < numOutputs; i++) + { + const armnn::TensorInfo& infoOut = infosOut[i].second; + auto outputTensorFile = params.m_OutputTensorFiles.empty() ? "" : params.m_OutputTensorFiles[i]; + + TensorPrinter printer(inferenceModelParams.m_OutputBindings[i], + infoOut, + outputTensorFile, + params.m_DequantizeOutput); + mapbox::util::apply_visitor(printer, outputs[0][i]); + } - ARMNN_LOG(info) << "\nInference time: " << std::setprecision(2) - << std::fixed << inference_duration.count() << " ms\n"; + ARMNN_LOG(info) << "\nInference time: " << std::setprecision(2) + << std::fixed << inference_duration.count() << " ms\n"; - // If thresholdTime == 0.0 (default), then it hasn't been supplied at command line - if (params.m_ThresholdTime != 0.0) + // If thresholdTime == 0.0 (default), then it hasn't been supplied at command line + if (params.m_ThresholdTime != 0.0) + { + ARMNN_LOG(info) << "Threshold time: " << std::setprecision(2) + << std::fixed << params.m_ThresholdTime << " ms"; + auto thresholdMinusInference = params.m_ThresholdTime - inference_duration.count(); + ARMNN_LOG(info) << "Threshold time - Inference time: " << std::setprecision(2) + << std::fixed << thresholdMinusInference << " ms" << "\n"; + + if (thresholdMinusInference < 0) + { + std::string errorMessage = "Elapsed inference time is greater than provided threshold time."; + ARMNN_LOG(fatal) << errorMessage; + } + } + } + } + else + { + try { - ARMNN_LOG(info) << "Threshold time: " << std::setprecision(2) - << std::fixed << params.m_ThresholdTime << " ms"; - auto thresholdMinusInference = params.m_ThresholdTime - inference_duration.count(); - ARMNN_LOG(info) << "Threshold time - Inference time: " << std::setprecision(2) - << std::fixed << thresholdMinusInference << " ms" << "\n"; + ARMNN_LOG(info) << "Asynchronous Execution... \n"; + std::vector>>> inferenceResults; + inferenceResults.reserve(params.m_SimultaneousIterations); + + // Create WorkingMemHandles for each inference + std::vector> workingMemHandles; + workingMemHandles.reserve(params.m_SimultaneousIterations); + for (unsigned int i = 0; i < params.m_SimultaneousIterations; ++i) + { + workingMemHandles.push_back(model.CreateWorkingMemHandle()); + } + + // Run each inference in its own thread + for (unsigned int i = 0; i < params.m_SimultaneousIterations; ++i) + { + armnn::experimental::IWorkingMemHandle& workingMemHandleRef = *workingMemHandles[i].get(); + inferenceResults.push_back(std::async( + std::launch::async, [&model, &workingMemHandleRef, &inputs, &outputs, i]() { + return model.RunAsync(workingMemHandleRef, inputs[i], outputs[i]); + } + )); + } - if (thresholdMinusInference < 0) + // Check the results + for (unsigned int j = 0; j < inferenceResults.size(); ++j) { - std::string errorMessage = "Elapsed inference time is greater than provided threshold time."; - ARMNN_LOG(fatal) << errorMessage; + // Get the results + auto inferenceResult = inferenceResults[j].get(); + auto inference_duration = std::get<1>(inferenceResult); + auto inferenceID = std::get<0>(inferenceResult); + + if (params.m_GenerateTensorData) + { + ARMNN_LOG(warning) << "The input data was generated, note that the output will not be useful"; + } + + // Print output tensors + const auto& infosOut = model.GetOutputBindingInfos(); + for (size_t i = 0; i < numOutputs; i++) + { + const armnn::TensorInfo& infoOut = infosOut[i].second; + auto outputTensorFile = params.m_OutputTensorFiles.empty() + ? "" + : params.m_OutputTensorFiles[(j * numOutputs) + i]; + + TensorPrinter printer(inferenceModelParams.m_OutputBindings[i], + infoOut, + outputTensorFile, + params.m_DequantizeOutput); + mapbox::util::apply_visitor(printer, outputs[j][i]); + } + + ARMNN_LOG(info) << "\nInference time: " << std::setprecision(2) + << std::fixed << inference_duration.count() << " ms\n"; + + // If thresholdTime == 0.0 (default), then it hasn't been supplied at command line + if (params.m_ThresholdTime != 0.0) + { + ARMNN_LOG(info) << "Threshold time: " << std::setprecision(2) + << std::fixed << params.m_ThresholdTime << " ms"; + auto thresholdMinusInference = params.m_ThresholdTime - inference_duration.count(); + ARMNN_LOG(info) << "Threshold time - Inference time: " << std::setprecision(2) + << std::fixed << thresholdMinusInference << " ms" << "\n"; + + if (thresholdMinusInference < 0) + { + ARMNN_LOG(fatal) << "Elapsed inference time is greater than provided threshold time. \n"; + } + } + ARMNN_LOG(info) << "Asynchronous Execution is finished for Inference ID: " << inferenceID << " \n"; + } } + catch (const armnn::Exception& e) + { + ARMNN_LOG(fatal) << "Armnn Error: " << e.what(); + return EXIT_FAILURE; + } + } } catch (const armnn::Exception& e) diff --git a/tests/ExecuteNetwork/ExecuteNetworkParams.cpp b/tests/ExecuteNetwork/ExecuteNetworkParams.cpp index 4e3b5e313d..8f1cb0b599 100644 --- a/tests/ExecuteNetwork/ExecuteNetworkParams.cpp +++ b/tests/ExecuteNetwork/ExecuteNetworkParams.cpp @@ -145,6 +145,12 @@ void ExecuteNetworkParams::ValidateParams() CheckModelFormat(m_ModelFormat); + // Check number of simultaneous iterations + if ((m_SimultaneousIterations < 1)) + { + ARMNN_LOG(fatal) << "simultaneous-iterations cannot be less than 1. "; + } + // Check input tensor shapes if ((m_InputTensorShapes.size() != 0) && (m_InputTensorShapes.size() != m_InputNames.size())) @@ -159,10 +165,19 @@ void ExecuteNetworkParams::ValidateParams() ARMNN_LOG(fatal) << "One or more input data file paths are not valid. "; } - if (m_InputTensorDataFilePaths.size() != m_InputNames.size()) + if (!m_Concurrent && m_InputTensorDataFilePaths.size() != m_InputNames.size()) { ARMNN_LOG(fatal) << "input-name and input-tensor-data must have the same amount of elements. "; } + + if (m_InputTensorDataFilePaths.size() < m_SimultaneousIterations * m_InputNames.size()) + { + ARMNN_LOG(fatal) << "There is not enough input data for " << m_SimultaneousIterations << " execution."; + } + if (m_InputTensorDataFilePaths.size() > m_SimultaneousIterations * m_InputNames.size()) + { + ARMNN_LOG(fatal) << "There is more input data for " << m_SimultaneousIterations << " execution."; + } } if ((m_OutputTensorFiles.size() != 0) && @@ -171,6 +186,12 @@ void ExecuteNetworkParams::ValidateParams() ARMNN_LOG(fatal) << "output-name and write-outputs-to-file must have the same amount of elements. "; } + if ((m_OutputTensorFiles.size() != 0) + && m_OutputTensorFiles.size() != m_SimultaneousIterations * m_OutputNames.size()) + { + ARMNN_LOG(fatal) << "There is not enough output data for " << m_SimultaneousIterations << " execution."; + } + if (m_InputTypes.size() == 0) { //Defaults the value of all inputs to "float" diff --git a/tests/ExecuteNetwork/ExecuteNetworkParams.hpp b/tests/ExecuteNetwork/ExecuteNetworkParams.hpp index a19eaa9346..c325df110f 100644 --- a/tests/ExecuteNetwork/ExecuteNetworkParams.hpp +++ b/tests/ExecuteNetwork/ExecuteNetworkParams.hpp @@ -23,6 +23,7 @@ struct ExecuteNetworkParams std::string m_CachedNetworkFilePath; std::vector m_ComputeDevices; + bool m_Concurrent; bool m_DequantizeOutput; std::string m_DynamicBackendsPath; bool m_EnableBf16TurboMode; @@ -49,6 +50,7 @@ struct ExecuteNetworkParams bool m_PrintIntermediate; bool m_QuantizeInput; bool m_SaveCachedNetwork; + size_t m_SimultaneousIterations; size_t m_SubgraphId; double m_ThresholdTime; int m_TuningLevel; diff --git a/tests/ExecuteNetwork/ExecuteNetworkProgramOptions.cpp b/tests/ExecuteNetwork/ExecuteNetworkProgramOptions.cpp index 286c970d72..042087e4f4 100644 --- a/tests/ExecuteNetwork/ExecuteNetworkProgramOptions.cpp +++ b/tests/ExecuteNetwork/ExecuteNetworkProgramOptions.cpp @@ -194,6 +194,10 @@ ProgramOptions::ProgramOptions() : m_CxxOptions{"ExecuteNetwork", "If left empty (the default), dynamic backends will not be used.", cxxopts::value(m_RuntimeOptions.m_DynamicBackendsPath)) + ("n,concurrent", + "If this option is enabled inferences will be executed in parallel asynchronously.", + cxxopts::value(m_ExNetParams.m_Concurrent)->default_value("false")->implicit_value("true")) + ("d,input-tensor-data", "Path to files containing the input data as a flat array separated by whitespace. " "Several paths can be passed by separating them with a comma. If not specified, the network will be " @@ -278,7 +282,11 @@ ProgramOptions::ProgramOptions() : m_CxxOptions{"ExecuteNetwork", ("D,armnn-tflite-delegate", "Enable Arm NN TfLite delegate. " "This option is depreciated please use tflite-executor instead", - cxxopts::value(m_ExNetParams.m_EnableDelegate)->default_value("false")->implicit_value("true")); + cxxopts::value(m_ExNetParams.m_EnableDelegate)->default_value("false")->implicit_value("true")) + + ("simultaneous-iterations", + "Number of simultaneous iterations to async-run the network for, default is set to 1", + cxxopts::value(m_ExNetParams.m_SimultaneousIterations)->default_value("1")); m_CxxOptions.add_options("c) Optimization") ("bf16-turbo-mode", -- cgit v1.2.1