From 3cff15a5d8797d0afe5d5b1cb3ff0e5b7d1cd6c9 Mon Sep 17 00:00:00 2001
From: Colm Donelan <Colm.Donelan@arm.com>
Date: Tue, 12 Oct 2021 15:06:19 +0100
Subject: IVGCVSW-5879 Pass the execute network parameters to the
 TfLiteDelegate.

* Introduce a mechanism to construct a DelegateOptions from an
  ExecuteNetworkParams.
* Modify ExecuteNetwork to use this constructed DelegateOptions.

Signed-off-by: Colm Donelan <Colm.Donelan@arm.com>
Change-Id: Ied663a1e00ac3eece42244ed313ddafd6d2ce078
---
 tests/ExecuteNetwork/ExecuteNetwork.cpp            | 121 ++++++++++++---------
 tests/ExecuteNetwork/ExecuteNetworkParams.cpp      |  58 +++++++++-
 tests/ExecuteNetwork/ExecuteNetworkParams.hpp      |  10 ++
 .../ExecuteNetworkProgramOptions.cpp               |  10 +-
 4 files changed, 140 insertions(+), 59 deletions(-)
diff --git a/tests/ExecuteNetwork/ExecuteNetwork.cpp b/tests/ExecuteNetwork/ExecuteNetwork.cpp
index 8acab71290..fa8c8c8761 100644
--- a/tests/ExecuteNetwork/ExecuteNetwork.cpp
+++ b/tests/ExecuteNetwork/ExecuteNetwork.cpp
@@ -35,8 +35,39 @@
 #endif
 
 #include <future>
+
+/**
+ * Given a measured duration and a threshold time tell the user whether we succeeded or not.
+ *
+ * @param duration the measured inference duration.
+ * @param thresholdTime the threshold time in milliseconds.
+ * @return false if the measured time exceeded the threshold.
+ */
+bool CheckInferenceTimeThreshold(const std::chrono::duration<double, std::milli>& duration,
+                                 const double& thresholdTime)
+{
+    ARMNN_LOG(info) << "\nInference time: " << std::setprecision(2)
+                    << std::fixed << duration.count() << " ms\n";
+    // If thresholdTime == 0.0 (default), then it hasn't been supplied at command line
+    if (thresholdTime != 0.0)
+    {
+        ARMNN_LOG(info) << "Threshold time: " << std::setprecision(2)
+                        << std::fixed << thresholdTime << " ms";
+        auto thresholdMinusInference = thresholdTime - duration.count();
+        ARMNN_LOG(info) << "Threshold time - Inference time: " << std::setprecision(2)
+                        << std::fixed << thresholdMinusInference << " ms" << "\n";
+       if (thresholdMinusInference < 0)
+        {
+            std::string errorMessage = "Elapsed inference time is greater than provided threshold time.";
+            ARMNN_LOG(fatal) << errorMessage;
+            return false;
+        }
+    }
+    return true;
+}
+
 #if defined(ARMNN_TFLITE_DELEGATE)
-int TfLiteDelegateMainImpl(const ExecuteNetworkParams& params,
+int TfLiteDelegateMainImpl(const ExecuteNetworkParams& params, const armnn::IRuntime::CreationOptions runtimeOptions,
                            const std::shared_ptr<armnn::IRuntime>& runtime = nullptr)
 {
     using namespace tflite;
@@ -54,7 +85,10 @@ int TfLiteDelegateMainImpl(const ExecuteNetworkParams& params,
     if (params.m_TfLiteExecutor == ExecuteNetworkParams::TfLiteExecutor::ArmNNTfLiteDelegate)
     {
         // Create the Armnn Delegate
-        armnnDelegate::DelegateOptions delegateOptions(params.m_ComputeDevices);
+        // Populate a DelegateOptions from the ExecuteNetworkParams.
+        armnnDelegate::DelegateOptions delegateOptions = params.ToDelegateOptions();
+        delegateOptions.SetExternalProfilingParams(runtimeOptions.m_ProfilingOptions);
+
         std::unique_ptr<TfLiteDelegate, decltype(&armnnDelegate::TfLiteArmnnDelegateDelete)>
                 theArmnnDelegate(armnnDelegate::TfLiteArmnnDelegateCreate(delegateOptions),
                                  armnnDelegate::TfLiteArmnnDelegateDelete);
@@ -71,18 +105,11 @@ int TfLiteDelegateMainImpl(const ExecuteNetworkParams& params,
         std::cout << "Running on TfLite without ArmNN delegate\n";
     }
 
-
-    std::vector<std::string>  inputBindings;
-    for (const std::string& inputName: params.m_InputNames)
-    {
-        inputBindings.push_back(inputName);
-    }
-
     armnn::Optional<std::string> dataFile = params.m_GenerateTensorData
                                             ? armnn::EmptyOptional()
                                             : armnn::MakeOptional<std::string>(params.m_InputTensorDataFilePaths[0]);
 
-    const size_t numInputs = inputBindings.size();
+    const size_t numInputs = params.m_InputNames.size();
 
     for(unsigned int inputIndex = 0; inputIndex < numInputs; ++inputIndex)
     {
@@ -212,15 +239,36 @@ int TfLiteDelegateMainImpl(const ExecuteNetworkParams& params,
 
     for (size_t x = 0; x < params.m_Iterations; x++)
     {
+        // Start timer to record inference time in milliseconds.
+        const auto start_time = armnn::GetTimeNow();
         // Run the inference
         status = tfLiteInterpreter->Invoke();
+        const auto duration = armnn::GetTimeDuration(start_time);
 
         // Print out the output
         for (unsigned int outputIndex = 0; outputIndex < params.m_OutputNames.size(); ++outputIndex)
         {
             auto tfLiteDelegateOutputId = tfLiteInterpreter->outputs()[outputIndex];
             TfLiteIntArray* outputDims = tfLiteInterpreter->tensor(tfLiteDelegateOutputId)->dims;
-
+            // If we've been asked to write to a file then set a file output stream. Otherwise use stdout.
+            FILE* outputTensorFile = stdout;
+            if (!params.m_OutputTensorFiles.empty())
+            {
+                outputTensorFile = fopen(params.m_OutputTensorFiles[outputIndex].c_str(), "w");
+                if (outputTensorFile == NULL)
+                {
+                    ARMNN_LOG(fatal) << "Specified output tensor file, \"" <<
+                                     params.m_OutputTensorFiles[outputIndex] <<
+                                     "\", cannot be created. Defaulting to stdout. " <<
+                                     "Error was: " << std::strerror(errno);
+                    outputTensorFile = stdout;
+                }
+                else
+                {
+                    ARMNN_LOG(info) << "Writing output " << outputIndex << "' of iteration: " << x+1 << " to file: '"
+                                    << params.m_OutputTensorFiles[outputIndex] << "'";
+                }
+            }
             long outputSize = 1;
             for (unsigned int dim = 0; dim < static_cast<unsigned int>(outputDims->size); ++dim)
             {
@@ -242,7 +290,7 @@ int TfLiteDelegateMainImpl(const ExecuteNetworkParams& params,
                 {
                     for (int i = 0; i < outputSize; ++i)
                     {
-                        printf("%f ", tfLiteDelageOutputData[i]);
+                        fprintf(outputTensorFile, "%f ", tfLiteDelageOutputData[i]);
                     }
                 }
             }
@@ -260,7 +308,7 @@ int TfLiteDelegateMainImpl(const ExecuteNetworkParams& params,
                 {
                     for (int i = 0; i < outputSize; ++i)
                     {
-                        printf("%d ", tfLiteDelageOutputData[i]);
+                        fprintf(outputTensorFile, "%d ", tfLiteDelageOutputData[i]);
                     }
                 }
             }
@@ -278,7 +326,7 @@ int TfLiteDelegateMainImpl(const ExecuteNetworkParams& params,
                 {
                     for (int i = 0; i < outputSize; ++i)
                     {
-                        printf("%d ", tfLiteDelageOutputData[i]);
+                        fprintf(outputTensorFile, "%d ", tfLiteDelageOutputData[i]);
                     }
                 }
             }
@@ -297,7 +345,7 @@ int TfLiteDelegateMainImpl(const ExecuteNetworkParams& params,
                 {
                     for (int i = 0; i < outputSize; ++i)
                     {
-                        printf("%u ", tfLiteDelageOutputData[i]);
+                        fprintf(outputTensorFile, "%u ", tfLiteDelageOutputData[i]);
                     }
                 }
             }
@@ -310,6 +358,7 @@ int TfLiteDelegateMainImpl(const ExecuteNetworkParams& params,
             }
             std::cout << std::endl;
         }
+        CheckInferenceTimeThreshold(duration, params.m_ThresholdTime);
     }
 
     return status;
@@ -628,24 +677,7 @@ int MainImpl(const ExecuteNetworkParams& params,
                         mapbox::util::apply_visitor(printer, inferenceOutputMap.at(cb->GetInferenceId())[i]);
                     }
 
-                    ARMNN_LOG(info) << "\nInference time: " << std::setprecision(2)
-                                    << std::fixed << inferenceDuration.count() << " ms\n";
-
-                     // If thresholdTime == 0.0 (default), then it hasn't been supplied at command line
-                    if (params.m_ThresholdTime != 0.0)
-                    {
-                        ARMNN_LOG(info) << "Threshold time: " << std::setprecision(2)
-                                        << std::fixed << params.m_ThresholdTime << " ms";
-                        auto thresholdMinusInference =
-                            params.m_ThresholdTime - duration<double, std::milli>(inferenceDuration).count();
-                        ARMNN_LOG(info) << "Threshold time - Inference time: " << std::setprecision(2)
-                                        << std::fixed << thresholdMinusInference << " ms" << "\n";
-
-                        if (thresholdMinusInference < 0)
-                        {
-                            ARMNN_LOG(fatal) << "Elapsed inference time is greater than provided threshold time. \n";
-                        }
-                    }
+                    CheckInferenceTimeThreshold(inferenceDuration, params.m_ThresholdTime);
                     ++j;
                 }
                 //print duration difference between overallStartTime and overallEndTime
@@ -739,26 +771,8 @@ int MainImpl(const ExecuteNetworkParams& params,
                                               !params.m_DontPrintOutputs);
                         mapbox::util::apply_visitor(printer, outputs[j][i]);
                     }
-
-                    ARMNN_LOG(info) << "\nInference time: " << std::setprecision(2)
-                                    << std::fixed << inferenceDuration.count() << " ms\n";
-
-                    // If thresholdTime == 0.0 (default), then it hasn't been supplied at command line
-                    if (params.m_ThresholdTime != 0.0)
-                    {
-                        ARMNN_LOG(info) << "Threshold time: " << std::setprecision(2)
-                                        << std::fixed << params.m_ThresholdTime << " ms";
-                        auto thresholdMinusInference = params.m_ThresholdTime - inferenceDuration.count();
-                        ARMNN_LOG(info) << "Threshold time - Inference time: " << std::setprecision(2)
-                                        << std::fixed << thresholdMinusInference << " ms" << "\n";
-
-                        if (thresholdMinusInference < 0)
-                        {
-                            ARMNN_LOG(fatal) << "Elapsed inference time is greater than provided threshold time. \n";
-                        }
-                    }
+                    CheckInferenceTimeThreshold(inferenceDuration, params.m_ThresholdTime);
                     ARMNN_LOG(info) << "Asynchronous Execution is finished for Inference ID: " << inferenceID << " \n";
-
                 }
                 // finish timer
                 const auto duration = armnn::GetTimeDuration(start_time);
@@ -781,7 +795,6 @@ int MainImpl(const ExecuteNetworkParams& params,
     return EXIT_SUCCESS;
 }
 
-
 // MAIN
 int main(int argc, const char* argv[])
 {
@@ -853,7 +866,7 @@ int main(int argc, const char* argv[])
                     ExecuteNetworkParams::TfLiteExecutor::TfliteInterpreter)
         {
         #if defined(ARMNN_TF_LITE_DELEGATE)
-            return TfLiteDelegateMainImpl(ProgramOptions.m_ExNetParams, runtime);
+            return TfLiteDelegateMainImpl(ProgramOptions.m_ExNetParams, ProgramOptions.m_RuntimeOptions, runtime);
         #else
             ARMNN_LOG(fatal) << "Not built with Arm NN Tensorflow-Lite delegate support.";
             return EXIT_FAILURE;
diff --git a/tests/ExecuteNetwork/ExecuteNetworkParams.cpp b/tests/ExecuteNetwork/ExecuteNetworkParams.cpp
index dcdd423246..541430c421 100644
--- a/tests/ExecuteNetwork/ExecuteNetworkParams.cpp
+++ b/tests/ExecuteNetwork/ExecuteNetworkParams.cpp
@@ -232,4 +232,60 @@ void ExecuteNetworkParams::ValidateParams()
     {
         ARMNN_LOG(warning) << "No input files provided, input tensors will be filled with 0s.";
     }
-}
\ No newline at end of file
+}
+
+#if defined(ARMNN_TFLITE_DELEGATE)
+/**
+ * A utility method that populates a DelegateOptions object from this ExecuteNetworkParams.
+ *
+ * @return a populated armnnDelegate::DelegateOptions object.
+ */
+armnnDelegate::DelegateOptions ExecuteNetworkParams::ToDelegateOptions() const
+{
+    armnnDelegate::DelegateOptions delegateOptions(m_ComputeDevices);
+    delegateOptions.SetDynamicBackendsPath(m_DynamicBackendsPath);
+    delegateOptions.SetGpuProfilingState(m_EnableProfiling);
+
+    armnn::OptimizerOptions options;
+    options.m_ReduceFp32ToFp16 = m_EnableFp16TurboMode;
+    options.m_ReduceFp32ToBf16 = m_EnableBf16TurboMode;
+    options.m_Debug = m_PrintIntermediate;
+
+    options.m_shapeInferenceMethod = armnn::ShapeInferenceMethod::ValidateOnly;
+    if (m_InferOutputShape)
+    {
+        options.m_shapeInferenceMethod = armnn::ShapeInferenceMethod::InferAndValidate;
+    }
+
+    armnn::BackendOptions gpuAcc("GpuAcc",
+                                 {
+        { "FastMathEnabled", m_EnableFastMath },
+        { "SaveCachedNetwork", m_SaveCachedNetwork },
+        { "CachedNetworkFilePath", m_CachedNetworkFilePath },
+        { "TuningLevel", m_TuningLevel},
+        { "TuningFile", m_TuningPath.c_str()},
+        { "KernelProfilingEnabled", m_EnableProfiling},
+        { "MLGOTuningFilePath", m_MLGOTuningFilePath}
+                                 });
+
+    armnn::BackendOptions cpuAcc("CpuAcc",
+                                 {
+        { "FastMathEnabled", m_EnableFastMath },
+        { "NumberOfThreads", m_NumberOfThreads }
+                                 });
+    options.m_ModelOptions.push_back(gpuAcc);
+    options.m_ModelOptions.push_back(cpuAcc);
+
+    delegateOptions.SetOptimizerOptions(options);
+
+    // If v,visualize-optimized-model is enabled then construct a file name for the dot file.
+    if (m_EnableLayerDetails)
+    {
+        fs::path filename = m_ModelPath;
+        filename.replace_extension("dot");
+        delegateOptions.SetSerializeToDot(filename);
+    }
+
+    return delegateOptions;
+}
+#endif
diff --git a/tests/ExecuteNetwork/ExecuteNetworkParams.hpp b/tests/ExecuteNetwork/ExecuteNetworkParams.hpp
index db8194b3f9..cb8c2fb386 100644
--- a/tests/ExecuteNetwork/ExecuteNetworkParams.hpp
+++ b/tests/ExecuteNetwork/ExecuteNetworkParams.hpp
@@ -8,6 +8,10 @@
 #include <armnn/BackendId.hpp>
 #include <armnn/Tensor.hpp>
 
+#if defined(ARMNN_TFLITE_DELEGATE)
+#include <DelegateOptions.hpp>
+#endif
+
 /// Holds all parameters necessary to execute a network
 /// Check ExecuteNetworkProgramOptions.cpp for a description of each parameter
 struct ExecuteNetworkParams
@@ -64,4 +68,10 @@ struct ExecuteNetworkParams
 
     // Ensures that the parameters for ExecuteNetwork fit together
     void ValidateParams();
+
+#if defined(ARMNN_TFLITE_DELEGATE)
+    /// A utility method that populates a DelegateOptions object from this ExecuteNetworkParams.
+    armnnDelegate::DelegateOptions ToDelegateOptions() const;
+#endif
+
 };
diff --git a/tests/ExecuteNetwork/ExecuteNetworkProgramOptions.cpp b/tests/ExecuteNetwork/ExecuteNetworkProgramOptions.cpp
index b1c87d088a..8ee66cf64b 100644
--- a/tests/ExecuteNetwork/ExecuteNetworkProgramOptions.cpp
+++ b/tests/ExecuteNetwork/ExecuteNetworkProgramOptions.cpp
@@ -242,7 +242,8 @@ ProgramOptions::ProgramOptions() : m_CxxOptions{"ExecuteNetwork",
                 ("l,dequantize-output",
                  "If this option is enabled, all quantized outputs will be dequantized to float. "
                  "If unset, default to not get dequantized. "
-                 "Accepted values (true or false)",
+                 "Accepted values (true or false)"
+                 " (Not available when executing ArmNNTfLiteDelegate or TfliteInterpreter)",
                  cxxopts::value<bool>(m_ExNetParams.m_DequantizeOutput)->default_value("false")->implicit_value("true"))
 
                 ("p,print-intermediate-layers",
@@ -261,9 +262,9 @@ ProgramOptions::ProgramOptions() : m_CxxOptions{"ExecuteNetwork",
 
                 ("q,quantize-input",
                  "If this option is enabled, all float inputs will be quantized as appropriate for the model's inputs. "
-                 "If unset, default to not quantized. Accepted values (true or false)",
+                 "If unset, default to not quantized. Accepted values (true or false)"
+                 " (Not available when executing ArmNNTfLiteDelegate or TfliteInterpreter)",
                  cxxopts::value<bool>(m_ExNetParams.m_QuantizeInput)->default_value("false")->implicit_value("true"))
-
                 ("r,threshold-time",
                  "Threshold time is the maximum allowed time for inference measured in milliseconds. If the actual "
                  "inference time is greater than the threshold time, the test will fail. By default, no threshold "
@@ -286,7 +287,8 @@ ProgramOptions::ProgramOptions() : m_CxxOptions{"ExecuteNetwork",
                  cxxopts::value<std::string>())
 
                 ("x,subgraph-number",
-                 "Id of the subgraph to be executed. Defaults to 0.",
+                 "Id of the subgraph to be executed. Defaults to 0."
+                 " (Not available when executing ArmNNTfLiteDelegate or TfliteInterpreter)",
                  cxxopts::value<size_t>(m_ExNetParams.m_SubgraphId)->default_value("0"))
 
                 ("y,input-type",
-- 
cgit v1.2.1