From 7b4886faccb52af9afe7fdeffcbae87e7fbc1484 Mon Sep 17 00:00:00 2001
From: James Conroy <james.conroy@arm.com>
Date: Thu, 11 Apr 2019 10:23:58 +0100
Subject: IVGCVSW-2543 Add timing for ExecuteNetwork inference

  * Adds a new command line option 'threshold-time'
    to ExecuteNetwork, the maximum allowed time for
    inference in EnqueueWorkload.
  * ExecuteNetwork now outputs inference time
    elapsed and (if supplied) threshold time.
  * If actual elapsed inference time is greater
    than supplied threshold time, fail the test.

Change-Id: If441b49a29cf5450687c07500c9046a80ece56fc
Signed-off-by: James Conroy <james.conroy@arm.com>
---
 tests/ExecuteNetwork/ExecuteNetwork.cpp | 72 ++++++++++++++++++++++++++-------
 tests/InferenceModel.hpp                | 27 ++++++++++++-
 2 files changed, 83 insertions(+), 16 deletions(-)

(limited to 'tests')
diff --git a/tests/ExecuteNetwork/ExecuteNetwork.cpp b/tests/ExecuteNetwork/ExecuteNetwork.cpp
index fac334e75e..9ac66d1cd3 100644
--- a/tests/ExecuteNetwork/ExecuteNetwork.cpp
+++ b/tests/ExecuteNetwork/ExecuteNetwork.cpp
@@ -205,6 +205,7 @@ int MainImpl(const char* modelPath,
              const std::vector<string>& outputNames,
              bool enableProfiling,
              bool enableFp16TurboMode,
+             const double& thresholdTime,
              const size_t subgraphId,
              const std::shared_ptr<armnn::IRuntime>& runtime = nullptr)
 {
@@ -295,7 +296,8 @@ int MainImpl(const char* modelPath,
             }
         }
 
-        model.Run(inputDataContainers, outputDataContainers);
+        // model.Run returns the inference time elapsed in EnqueueWorkload (in milliseconds)
+        auto inference_duration = model.Run(inputDataContainers, outputDataContainers);
 
         // Print output tensors
         for (size_t i = 0; i < numOutputs; i++)
@@ -311,6 +313,27 @@ int MainImpl(const char* modelPath,
                                  },
                                  outputDataContainers[i]);
         }
+
+        BOOST_LOG_TRIVIAL(info) << "\nInference time: " << std::setprecision(2)
+                                << std::fixed << inference_duration.count() << " ms";
+
+        // If thresholdTime == 0.0 (default), then it hasn't been supplied at command line
+        if (thresholdTime != 0.0)
+        {
+            BOOST_LOG_TRIVIAL(info) << "Threshold time: " << std::setprecision(2)
+                                    << std::fixed << thresholdTime << " ms";
+            auto thresholdMinusInference = thresholdTime - inference_duration.count();
+            BOOST_LOG_TRIVIAL(info) << "Threshold time - Inference time: " << std::setprecision(2)
+                                    << std::fixed << thresholdMinusInference << " ms" << "\n";
+
+            if (thresholdMinusInference < 0)
+            {
+                BOOST_LOG_TRIVIAL(fatal) << "Elapsed inference time is greater than provided threshold time.\n";
+                return EXIT_FAILURE;
+            }
+        }
+
+
     }
     catch (armnn::Exception const& e)
     {
@@ -333,6 +356,7 @@ int RunTest(const std::string& format,
             const std::string& outputNames,
             bool enableProfiling,
             bool enableFp16TurboMode,
+            const double& thresholdTime,
             const size_t subgraphId,
             const std::shared_ptr<armnn::IRuntime>& runtime = nullptr)
 {
@@ -416,6 +440,13 @@ int RunTest(const std::string& format,
         }
     }
 
+    // Check that threshold time is not less than zero
+    if (thresholdTime < 0)
+    {
+        BOOST_LOG_TRIVIAL(fatal) << "Threshold time supplied as a commoand line argument is less than zero.";
+        return EXIT_FAILURE;
+    }
+
     // Forward to implementation based on the parser type
     if (modelFormat.find("armnn") != std::string::npos)
     {
@@ -423,8 +454,9 @@ int RunTest(const std::string& format,
     return MainImpl<armnnDeserializer::IDeserializer, float>(
         modelPath.c_str(), isModelBinary, computeDevice,
         inputNamesVector, inputTensorShapes,
-        inputTensorDataFilePathsVector, inputTypesVector, outputTypesVector,
-        outputNamesVector, enableProfiling, enableFp16TurboMode, subgraphId, runtime);
+        inputTensorDataFilePathsVector, inputTypesVector,
+        outputTypesVector, outputNamesVector, enableProfiling,
+        enableFp16TurboMode, thresholdTime, subgraphId, runtime);
 #else
     BOOST_LOG_TRIVIAL(fatal) << "Not built with serialization support.";
     return EXIT_FAILURE;
@@ -437,7 +469,7 @@ int RunTest(const std::string& format,
                                                                inputNamesVector, inputTensorShapes,
                                                                inputTensorDataFilePathsVector, inputTypesVector,
                                                                outputTypesVector, outputNamesVector, enableProfiling,
-                                                               enableFp16TurboMode, subgraphId, runtime);
+                                                               enableFp16TurboMode, thresholdTime, subgraphId, runtime);
 #else
         BOOST_LOG_TRIVIAL(fatal) << "Not built with Caffe parser support.";
         return EXIT_FAILURE;
@@ -450,7 +482,7 @@ int RunTest(const std::string& format,
                                                          inputNamesVector, inputTensorShapes,
                                                          inputTensorDataFilePathsVector, inputTypesVector,
                                                          outputTypesVector, outputNamesVector, enableProfiling,
-                                                         enableFp16TurboMode, subgraphId, runtime);
+                                                         enableFp16TurboMode, thresholdTime, subgraphId, runtime);
 #else
     BOOST_LOG_TRIVIAL(fatal) << "Not built with Onnx parser support.";
     return EXIT_FAILURE;
@@ -463,7 +495,7 @@ int RunTest(const std::string& format,
                                                          inputNamesVector, inputTensorShapes,
                                                          inputTensorDataFilePathsVector, inputTypesVector,
                                                          outputTypesVector, outputNamesVector, enableProfiling,
-                                                         enableFp16TurboMode, subgraphId, runtime);
+                                                         enableFp16TurboMode, thresholdTime, subgraphId, runtime);
 #else
         BOOST_LOG_TRIVIAL(fatal) << "Not built with Tensorflow parser support.";
         return EXIT_FAILURE;
@@ -482,7 +514,8 @@ int RunTest(const std::string& format,
                                                                  inputNamesVector, inputTensorShapes,
                                                                  inputTensorDataFilePathsVector, inputTypesVector,
                                                                  outputTypesVector, outputNamesVector, enableProfiling,
-                                                                 enableFp16TurboMode, subgraphId, runtime);
+                                                                 enableFp16TurboMode, thresholdTime, subgraphId,
+                                                                 runtime);
 #else
         BOOST_LOG_TRIVIAL(fatal) << "Unknown model format: '" << modelFormat <<
             "'. Please include 'caffe', 'tensorflow', 'tflite' or 'onnx'";
@@ -498,7 +531,7 @@ int RunTest(const std::string& format,
 }
 
 int RunCsvTest(const armnnUtils::CsvRow &csvRow, const std::shared_ptr<armnn::IRuntime>& runtime,
-               const bool enableProfiling, const bool enableFp16TurboMode)
+               const bool enableProfiling, const bool enableFp16TurboMode, const double& thresholdTime)
 {
     std::string modelFormat;
     std::string modelPath;
@@ -593,10 +626,12 @@ int RunCsvTest(const armnnUtils::CsvRow &csvRow, const std::shared_ptr<armnn::IR
         return EXIT_FAILURE;
     }
 
-    return RunTest(modelFormat, inputTensorShapes, computeDevices, modelPath, inputNames, inputTensorDataFilePaths,
-                   inputTypes, outputTypes, outputNames, enableProfiling, enableFp16TurboMode, subgraphId);
+    return RunTest(modelFormat, inputTensorShapes, computeDevices, modelPath, inputNames,
+                   inputTensorDataFilePaths, inputTypes, outputTypes, outputNames,
+                   enableProfiling, enableFp16TurboMode, thresholdTime, subgraphId);
 }
 
+// MAIN
 int main(int argc, const char* argv[])
 {
     // Configures logging for both the ARMNN library and this test program.
@@ -619,6 +654,8 @@ int main(int argc, const char* argv[])
     std::string inputTypes;
     std::string outputTypes;
 
+    double thresholdTime = 0.0;
+
     size_t subgraphId = 0;
 
     const std::string backendsMessage = "Which device to run layers on by default. Possible choices: "
@@ -664,7 +701,11 @@ int main(int argc, const char* argv[])
             ("event-based-profiling,e", po::bool_switch()->default_value(false),
              "Enables built in profiler. If unset, defaults to off.")
             ("fp16-turbo-mode,h", po::bool_switch()->default_value(false), "If this option is enabled, FP32 layers, "
-             "weights and biases will be converted to FP16 where the backend supports it");
+             "weights and biases will be converted to FP16 where the backend supports it")
+            ("threshold-time,r", po::value<double>(&thresholdTime)->default_value(0.0),
+             "Threshold time is the maximum allowed time for inference measured in milliseconds. If the actual "
+             "inference time is greater than the threshold time, the test will fail. By default, no threshold "
+             "time is used.");
     }
     catch (const std::exception& e)
     {
@@ -745,7 +786,7 @@ int main(int argc, const char* argv[])
             {
                 testCase.values.insert(testCase.values.begin(), executableName);
                 results.push_back(std::async(std::launch::async, RunCsvTest, std::cref(testCase), std::cref(runtime),
-                                             enableProfiling, enableFp16TurboMode));
+                                             enableProfiling, enableFp16TurboMode, thresholdTime));
             }
 
             // Check results
@@ -763,7 +804,7 @@ int main(int argc, const char* argv[])
             for (auto&  testCase : testCases)
             {
                 testCase.values.insert(testCase.values.begin(), executableName);
-                if (RunCsvTest(testCase, runtime, enableProfiling, enableFp16TurboMode) != EXIT_SUCCESS)
+                if (RunCsvTest(testCase, runtime, enableProfiling, enableFp16TurboMode, thresholdTime) != EXIT_SUCCESS)
                 {
                     return EXIT_FAILURE;
                 }
@@ -804,7 +845,8 @@ int main(int argc, const char* argv[])
             return EXIT_FAILURE;
         }
 
-        return RunTest(modelFormat, inputTensorShapes, computeDevices, modelPath, inputNames, inputTensorDataFilePaths,
-                       inputTypes, outputTypes, outputNames,  enableProfiling, enableFp16TurboMode, subgraphId);
+        return RunTest(modelFormat, inputTensorShapes, computeDevices, modelPath, inputNames,
+                       inputTensorDataFilePaths, inputTypes, outputTypes, outputNames,
+                       enableProfiling, enableFp16TurboMode, thresholdTime, subgraphId);
     }
 }
diff --git a/tests/InferenceModel.hpp b/tests/InferenceModel.hpp
index 25ccbee45a..e168923048 100644
--- a/tests/InferenceModel.hpp
+++ b/tests/InferenceModel.hpp
@@ -30,6 +30,7 @@
 #include <boost/variant.hpp>
 
 #include <algorithm>
+#include <chrono>
 #include <iterator>
 #include <fstream>
 #include <map>
@@ -506,7 +507,9 @@ public:
         return m_OutputBindings[outputIndex].second.GetNumElements();
     }
 
-    void Run(const std::vector<TContainer>& inputContainers, std::vector<TContainer>& outputContainers)
+    std::chrono::duration<double, std::milli> Run(
+            const std::vector<TContainer>& inputContainers,
+            std::vector<TContainer>& outputContainers)
     {
         for (unsigned int i = 0; i < outputContainers.size(); ++i)
         {
@@ -532,10 +535,15 @@ public:
             profiler->EnableProfiling(m_EnableProfiling);
         }
 
+        // Start timer to record inference time in EnqueueWorkload (in milliseconds)
+        const auto start_time = GetCurrentTime();
+
         armnn::Status ret = m_Runtime->EnqueueWorkload(m_NetworkIdentifier,
                                                        MakeInputTensors(inputContainers),
                                                        MakeOutputTensors(outputContainers));
 
+        const auto end_time = GetCurrentTime();
+
         // if profiling is enabled print out the results
         if (profiler && profiler->IsProfilingEnabled())
         {
@@ -546,6 +554,10 @@ public:
         {
             throw armnn::Exception("IRuntime::EnqueueWorkload failed");
         }
+        else
+        {
+            return std::chrono::duration<double, std::milli>(end_time - start_time);
+        }
     }
 
     const BindingPointInfo& GetInputBindingInfo(unsigned int inputIndex = 0u) const
@@ -613,4 +625,17 @@ private:
     {
         return ::MakeOutputTensors(m_OutputBindings, outputDataContainers);
     }
+
+    std::chrono::high_resolution_clock::time_point GetCurrentTime()
+    {
+        return std::chrono::high_resolution_clock::now();
+    }
+
+    std::chrono::duration<double, std::milli> GetTimeDuration(
+            std::chrono::high_resolution_clock::time_point& start_time,
+            std::chrono::high_resolution_clock::time_point& end_time)
+    {
+        return std::chrono::duration<double, std::milli>(end_time - start_time);
+    }
+
 };
-- 
cgit v1.2.1