From b4b3ac91990eb5deaffca2300319f2ddf7aa0886 Mon Sep 17 00:00:00 2001
From: Kevin May <kevin.may@arm.com>
Date: Fri, 21 May 2021 16:42:21 +0100
Subject: IVGCVSW-6009 Integrate threadpool into ExNet

 * Remove concurrent flag from ExecuteNetwork as it is possible
   to deduce if SimultaneousIterations > 1
 * Add void RunAsync()
 * Refactor some unit tests

Change-Id: I7021d4821b0e460470908294cbd9462850e8b361
Signed-off-by: Keith Davis <keith.davis@arm.com>
Signed-off-by: Kevin May <kevin.may@arm.com>
---
 tests/ExecuteNetwork/ExecuteNetwork.cpp | 130 +++++++++++++++++++++++++++++---
 1 file changed, 121 insertions(+), 9 deletions(-)

(limited to 'tests/ExecuteNetwork/ExecuteNetwork.cpp')
diff --git a/tests/ExecuteNetwork/ExecuteNetwork.cpp b/tests/ExecuteNetwork/ExecuteNetwork.cpp
index 2bbb51783c..cd760a8199 100644
--- a/tests/ExecuteNetwork/ExecuteNetwork.cpp
+++ b/tests/ExecuteNetwork/ExecuteNetwork.cpp
@@ -5,6 +5,8 @@
 
 #include "NetworkExecutionUtils/NetworkExecutionUtils.hpp"
 #include "ExecuteNetworkProgramOptions.hpp"
+#include <armnn/IAsyncExecutionCallback.hpp>
+#include <AsyncExecutionCallback.hpp>
 
 #include <armnn/Logging.hpp>
 #include <Filesystem.hpp>
@@ -276,8 +278,7 @@ template<typename TParser, typename TDataType>
 int MainImpl(const ExecuteNetworkParams& params,
              const std::shared_ptr<armnn::IRuntime>& runtime = nullptr)
 {
-    using TContainer =
-           mapbox::util::variant<std::vector<float>, std::vector<int>, std::vector<unsigned char>, std::vector<int8_t>>;
+    using namespace std::chrono;
 
     std::vector<std::vector<TContainer>> inputs;
     std::vector<std::vector<TContainer>> outputs;
@@ -300,6 +301,7 @@ int MainImpl(const ExecuteNetworkParams& params,
         inferenceModelParams.m_NumberOfThreads                = params.m_NumberOfThreads;
         inferenceModelParams.m_MLGOTuningFilePath             = params.m_MLGOTuningFilePath;
         inferenceModelParams.m_AsyncEnabled                   = params.m_Concurrent;
+        inferenceModelParams.m_ThreadPoolSize                 = params.m_ThreadPoolSize;
 
         for(const std::string& inputName: params.m_InputNames)
         {
@@ -390,9 +392,9 @@ int MainImpl(const ExecuteNetworkParams& params,
             outputs.push_back(outputDataContainers);
         }
 
+        // Synchronous execution
         if (!params.m_Concurrent)
         {
-            // Synchronous Execution
             for (size_t x = 0; x < params.m_Iterations; x++)
             {
                 // model.Run returns the inference time elapsed in EnqueueWorkload (in milliseconds)
@@ -437,13 +439,118 @@ int MainImpl(const ExecuteNetworkParams& params,
                 }
             }
         }
+        // Asynchronous execution using the Arm NN thread pool
+        else if (params.m_ThreadPoolSize >= 2)
+        {
+            try
+            {
+                ARMNN_LOG(info) << "Asynchronous execution with Arm NN thread pool...  \n";
+                std::vector<armnn::experimental::IAsyncExecutionCallbackPtr> callbacks;
+
+                // Create callbacks that will be checked post scheduling
+                for (size_t i = 0; i < params.m_SimultaneousIterations; ++i)
+                {
+                    // Point to ArmNN example implementation of AsyncExecutionCallback
+                    callbacks.emplace_back(std::make_shared<armnn::experimental::AsyncExecutionCallback>());
+                }
+
+                // Declare the latest and earliest inference times here to be used when calculating overall time
+                std::chrono::high_resolution_clock::time_point earliestStartTime;
+                std::chrono::high_resolution_clock::time_point latestEndTime =
+                    std::chrono::high_resolution_clock::now();
+
+                // For the asynchronous execution, we are adding a pool of working memory handles (1 per thread) in the
+                // LoadedNetwork with each scheduled inference having a specific priority
+                for (size_t i = 0; i < callbacks.size(); ++i)
+                {
+                    model.RunAsync(inputs[i], outputs[i], callbacks[i]);
+                }
+
+                // Check the results
+                unsigned int j = 0;
+                for (armnn::experimental::IAsyncExecutionCallbackPtr cb : callbacks)
+                {
+                    // Get the results
+                    auto endTime = time_point_cast<std::chrono::milliseconds>(cb->GetEndTime());
+                    auto startTime = time_point_cast<std::chrono::milliseconds>(cb->GetStartTime());
+                    auto inferenceDuration = endTime - startTime;
+
+                    if (latestEndTime < cb->GetEndTime())
+                    {
+                        latestEndTime = cb->GetEndTime();
+                    }
+
+                    if (earliestStartTime.time_since_epoch().count() == 0)
+                    {
+                        earliestStartTime = cb->GetStartTime();
+                    }
+                    else if (earliestStartTime > cb->GetStartTime())
+                    {
+                        earliestStartTime = cb->GetStartTime();
+                    }
+
+                    if (params.m_GenerateTensorData)
+                    {
+                        ARMNN_LOG(warning) << "The input data was generated, note that the output will not be useful";
+                    }
+
+                    // Print output tensors
+                    const auto& infosOut = model.GetOutputBindingInfos();
+                    for (size_t i = 0; i < numOutputs; i++)
+                    {
+                        const armnn::TensorInfo& infoOut = infosOut[i].second;
+                        auto outputTensorFile = params.m_OutputTensorFiles.empty()
+                                                ? ""
+                                                : params.m_OutputTensorFiles[(j * numOutputs) + i];
+
+                        TensorPrinter printer(inferenceModelParams.m_OutputBindings[i],
+                                              infoOut,
+                                              outputTensorFile,
+                                              params.m_DequantizeOutput);
+                        mapbox::util::apply_visitor(printer, outputs[j][i]);
+                    }
+
+                    ARMNN_LOG(info) << "\nInference time: " << std::setprecision(2)
+                                    << std::fixed << inferenceDuration.count() << " ms\n";
+
+                     // If thresholdTime == 0.0 (default), then it hasn't been supplied at command line
+                    if (params.m_ThresholdTime != 0.0)
+                    {
+                        ARMNN_LOG(info) << "Threshold time: " << std::setprecision(2)
+                                        << std::fixed << params.m_ThresholdTime << " ms";
+                        auto thresholdMinusInference =
+                            params.m_ThresholdTime - duration<double, std::milli>(inferenceDuration).count();
+                        ARMNN_LOG(info) << "Threshold time - Inference time: " << std::setprecision(2)
+                                        << std::fixed << thresholdMinusInference << " ms" << "\n";
+
+                        if (thresholdMinusInference < 0)
+                        {
+                            ARMNN_LOG(fatal) << "Elapsed inference time is greater than provided threshold time. \n";
+                        }
+                    }
+                    ++j;
+                }
+                //print duration difference between overallStartTime and overallEndTime
+                auto overallEndTime = time_point_cast<std::chrono::milliseconds>(latestEndTime);
+                auto overallStartTime = time_point_cast<std::chrono::milliseconds>(earliestStartTime);
+                auto totalInferenceDuration = overallEndTime - overallStartTime;
+                ARMNN_LOG(info) << "\nOverall Inference time: " << std::setprecision(2)
+                                << std::fixed << totalInferenceDuration.count() << " ms\n";
+            }
+            catch (const armnn::Exception& e)
+            {
+                ARMNN_LOG(fatal) << "Armnn Error: " << e.what();
+                return EXIT_FAILURE;
+            }
+        }
+        // Asynchronous execution using std::launch::async
         else
         {
             try
             {
-                ARMNN_LOG(info) << "Asynchronous Execution...  \n";
+                ARMNN_LOG(info) << "Asynchronous Execution with std::launch:async...  \n";
                 std::vector<std::future<std::tuple<armnn::profiling::ProfilingGuid,
-                std::chrono::duration<double, std::milli>>>> inferenceResults;
+                    std::chrono::duration<double, std::milli>>>> inferenceResults;
                 inferenceResults.reserve(params.m_SimultaneousIterations);
 
                 // Create WorkingMemHandles for each inference
@@ -455,6 +562,8 @@ int MainImpl(const ExecuteNetworkParams& params,
                 }
 
                 // Run each inference in its own thread
+                // start a timer
+                const auto start_time = armnn::GetTimeNow();
                 for (unsigned int i = 0; i < params.m_SimultaneousIterations; ++i)
                 {
                     armnn::experimental::IWorkingMemHandle& workingMemHandleRef = *workingMemHandles[i].get();
@@ -470,7 +579,7 @@ int MainImpl(const ExecuteNetworkParams& params,
                 {
                     // Get the results
                     auto inferenceResult = inferenceResults[j].get();
-                    auto inference_duration = std::get<1>(inferenceResult);
+                    auto inferenceDuration = std::get<1>(inferenceResult);
                     auto inferenceID = std::get<0>(inferenceResult);
 
                     if (params.m_GenerateTensorData)
@@ -495,14 +604,14 @@ int MainImpl(const ExecuteNetworkParams& params,
                     }
 
                     ARMNN_LOG(info) << "\nInference time: " << std::setprecision(2)
-                                    << std::fixed << inference_duration.count() << " ms\n";
+                                    << std::fixed << inferenceDuration.count() << " ms\n";
 
                     // If thresholdTime == 0.0 (default), then it hasn't been supplied at command line
                     if (params.m_ThresholdTime != 0.0)
                     {
                         ARMNN_LOG(info) << "Threshold time: " << std::setprecision(2)
                                         << std::fixed << params.m_ThresholdTime << " ms";
-                        auto thresholdMinusInference = params.m_ThresholdTime - inference_duration.count();
+                        auto thresholdMinusInference = params.m_ThresholdTime - inferenceDuration.count();
                         ARMNN_LOG(info) << "Threshold time - Inference time: " << std::setprecision(2)
                                         << std::fixed << thresholdMinusInference << " ms" << "\n";
 
@@ -514,13 +623,16 @@ int MainImpl(const ExecuteNetworkParams& params,
                     ARMNN_LOG(info) << "Asynchronous Execution is finished for Inference ID: " << inferenceID << " \n";
 
                 }
+                // finish timer
+                const auto duration = armnn::GetTimeDuration(start_time);
+                ARMNN_LOG(info) << "\nOverall Inference time: " << std::setprecision(2)
+                                << std::fixed << duration.count() << " ms\n";
             }
             catch (const armnn::Exception& e)
             {
                 ARMNN_LOG(fatal) << "Armnn Error: " << e.what();
                 return EXIT_FAILURE;
             }
-
         }
     }
     catch (const armnn::Exception& e)
-- 
cgit v1.2.1