From b4b3ac91990eb5deaffca2300319f2ddf7aa0886 Mon Sep 17 00:00:00 2001 From: Kevin May Date: Fri, 21 May 2021 16:42:21 +0100 Subject: IVGCVSW-6009 Integrate threadpool into ExNet * Remove concurrent flag from ExecuteNetwork as it is possible to deduce if SimultaneousIterations > 1 * Add void RunAsync() * Refactor some unit tests Change-Id: I7021d4821b0e460470908294cbd9462850e8b361 Signed-off-by: Keith Davis Signed-off-by: Kevin May --- tests/ExecuteNetwork/ExecuteNetwork.cpp | 130 +++++++++++++++++++++++++++++--- 1 file changed, 121 insertions(+), 9 deletions(-) (limited to 'tests/ExecuteNetwork/ExecuteNetwork.cpp') diff --git a/tests/ExecuteNetwork/ExecuteNetwork.cpp b/tests/ExecuteNetwork/ExecuteNetwork.cpp index 2bbb51783c..cd760a8199 100644 --- a/tests/ExecuteNetwork/ExecuteNetwork.cpp +++ b/tests/ExecuteNetwork/ExecuteNetwork.cpp @@ -5,6 +5,8 @@ #include "NetworkExecutionUtils/NetworkExecutionUtils.hpp" #include "ExecuteNetworkProgramOptions.hpp" +#include +#include #include #include @@ -276,8 +278,7 @@ template int MainImpl(const ExecuteNetworkParams& params, const std::shared_ptr& runtime = nullptr) { - using TContainer = - mapbox::util::variant, std::vector, std::vector, std::vector>; + using namespace std::chrono; std::vector> inputs; std::vector> outputs; @@ -300,6 +301,7 @@ int MainImpl(const ExecuteNetworkParams& params, inferenceModelParams.m_NumberOfThreads = params.m_NumberOfThreads; inferenceModelParams.m_MLGOTuningFilePath = params.m_MLGOTuningFilePath; inferenceModelParams.m_AsyncEnabled = params.m_Concurrent; + inferenceModelParams.m_ThreadPoolSize = params.m_ThreadPoolSize; for(const std::string& inputName: params.m_InputNames) { @@ -390,9 +392,9 @@ int MainImpl(const ExecuteNetworkParams& params, outputs.push_back(outputDataContainers); } + // Synchronous execution if (!params.m_Concurrent) { - // Synchronous Execution for (size_t x = 0; x < params.m_Iterations; x++) { // model.Run returns the inference time elapsed in EnqueueWorkload (in milliseconds) @@ -437,13 +439,118 @@ int MainImpl(const ExecuteNetworkParams& params, } } } + // Asynchronous execution using the Arm NN thread pool + else if (params.m_ThreadPoolSize >= 2) + { + try + { + ARMNN_LOG(info) << "Asynchronous execution with Arm NN thread pool... \n"; + std::vector callbacks; + + // Create callbacks that will be checked post scheduling + for (size_t i = 0; i < params.m_SimultaneousIterations; ++i) + { + // Point to ArmNN example implementation of AsyncExecutionCallback + callbacks.emplace_back(std::make_shared()); + } + + // Declare the latest and earliest inference times here to be used when calculating overall time + std::chrono::high_resolution_clock::time_point earliestStartTime; + std::chrono::high_resolution_clock::time_point latestEndTime = + std::chrono::high_resolution_clock::now(); + + // For the asynchronous execution, we are adding a pool of working memory handles (1 per thread) in the + // LoadedNetwork with each scheduled inference having a specific priority + for (size_t i = 0; i < callbacks.size(); ++i) + { + model.RunAsync(inputs[i], outputs[i], callbacks[i]); + } + + // Check the results + unsigned int j = 0; + for (armnn::experimental::IAsyncExecutionCallbackPtr cb : callbacks) + { + // Get the results + auto endTime = time_point_cast(cb->GetEndTime()); + auto startTime = time_point_cast(cb->GetStartTime()); + auto inferenceDuration = endTime - startTime; + + if (latestEndTime < cb->GetEndTime()) + { + latestEndTime = cb->GetEndTime(); + } + + if (earliestStartTime.time_since_epoch().count() == 0) + { + earliestStartTime = cb->GetStartTime(); + } + else if (earliestStartTime > cb->GetStartTime()) + { + earliestStartTime = cb->GetStartTime(); + } + + if (params.m_GenerateTensorData) + { + ARMNN_LOG(warning) << "The input data was generated, note that the output will not be useful"; + } + + // Print output tensors + const auto& infosOut = model.GetOutputBindingInfos(); + for (size_t i = 0; i < numOutputs; i++) + { + const armnn::TensorInfo& infoOut = infosOut[i].second; + auto outputTensorFile = params.m_OutputTensorFiles.empty() + ? "" + : params.m_OutputTensorFiles[(j * numOutputs) + i]; + + TensorPrinter printer(inferenceModelParams.m_OutputBindings[i], + infoOut, + outputTensorFile, + params.m_DequantizeOutput); + mapbox::util::apply_visitor(printer, outputs[j][i]); + } + + ARMNN_LOG(info) << "\nInference time: " << std::setprecision(2) + << std::fixed << inferenceDuration.count() << " ms\n"; + + // If thresholdTime == 0.0 (default), then it hasn't been supplied at command line + if (params.m_ThresholdTime != 0.0) + { + ARMNN_LOG(info) << "Threshold time: " << std::setprecision(2) + << std::fixed << params.m_ThresholdTime << " ms"; + auto thresholdMinusInference = + params.m_ThresholdTime - duration(inferenceDuration).count(); + ARMNN_LOG(info) << "Threshold time - Inference time: " << std::setprecision(2) + << std::fixed << thresholdMinusInference << " ms" << "\n"; + + if (thresholdMinusInference < 0) + { + ARMNN_LOG(fatal) << "Elapsed inference time is greater than provided threshold time. \n"; + } + } + ++j; + } + //print duration difference between overallStartTime and overallEndTime + auto overallEndTime = time_point_cast(latestEndTime); + auto overallStartTime = time_point_cast(earliestStartTime); + auto totalInferenceDuration = overallEndTime - overallStartTime; + ARMNN_LOG(info) << "\nOverall Inference time: " << std::setprecision(2) + << std::fixed << totalInferenceDuration.count() << " ms\n"; + } + catch (const armnn::Exception& e) + { + ARMNN_LOG(fatal) << "Armnn Error: " << e.what(); + return EXIT_FAILURE; + } + } + // Asynchronous execution using std::launch::async else { try { - ARMNN_LOG(info) << "Asynchronous Execution... \n"; + ARMNN_LOG(info) << "Asynchronous Execution with std::launch:async... \n"; std::vector>>> inferenceResults; + std::chrono::duration>>> inferenceResults; inferenceResults.reserve(params.m_SimultaneousIterations); // Create WorkingMemHandles for each inference @@ -455,6 +562,8 @@ int MainImpl(const ExecuteNetworkParams& params, } // Run each inference in its own thread + // start a timer + const auto start_time = armnn::GetTimeNow(); for (unsigned int i = 0; i < params.m_SimultaneousIterations; ++i) { armnn::experimental::IWorkingMemHandle& workingMemHandleRef = *workingMemHandles[i].get(); @@ -470,7 +579,7 @@ int MainImpl(const ExecuteNetworkParams& params, { // Get the results auto inferenceResult = inferenceResults[j].get(); - auto inference_duration = std::get<1>(inferenceResult); + auto inferenceDuration = std::get<1>(inferenceResult); auto inferenceID = std::get<0>(inferenceResult); if (params.m_GenerateTensorData) @@ -495,14 +604,14 @@ int MainImpl(const ExecuteNetworkParams& params, } ARMNN_LOG(info) << "\nInference time: " << std::setprecision(2) - << std::fixed << inference_duration.count() << " ms\n"; + << std::fixed << inferenceDuration.count() << " ms\n"; // If thresholdTime == 0.0 (default), then it hasn't been supplied at command line if (params.m_ThresholdTime != 0.0) { ARMNN_LOG(info) << "Threshold time: " << std::setprecision(2) << std::fixed << params.m_ThresholdTime << " ms"; - auto thresholdMinusInference = params.m_ThresholdTime - inference_duration.count(); + auto thresholdMinusInference = params.m_ThresholdTime - inferenceDuration.count(); ARMNN_LOG(info) << "Threshold time - Inference time: " << std::setprecision(2) << std::fixed << thresholdMinusInference << " ms" << "\n"; @@ -514,13 +623,16 @@ int MainImpl(const ExecuteNetworkParams& params, ARMNN_LOG(info) << "Asynchronous Execution is finished for Inference ID: " << inferenceID << " \n"; } + // finish timer + const auto duration = armnn::GetTimeDuration(start_time); + ARMNN_LOG(info) << "\nOverall Inference time: " << std::setprecision(2) + << std::fixed << duration.count() << " ms\n"; } catch (const armnn::Exception& e) { ARMNN_LOG(fatal) << "Armnn Error: " << e.what(); return EXIT_FAILURE; } - } } catch (const armnn::Exception& e) -- cgit v1.2.1