From 17d34da9efab1e515b29755811b7ad7597aacb79 Mon Sep 17 00:00:00 2001
From: Jan Eilers <jan.eilers@arm.com>
Date: Wed, 8 Dec 2021 16:15:12 +0000
Subject: Add addition timing logging output

 * Adds ExecuteNetwork when building the delegate only
 * Adds timings to delegate subgraph creation
 * Adds executions times

Signed-off-by: Jan Eilers <jan.eilers@arm.com>
Change-Id: Ieff2f67ea8dbb6c2a708f8810e84a20485b7a631
---
 delegate/src/armnn_delegate.cpp         | 19 +++++++++++++++++++
 include/armnn/INetwork.hpp              |  9 +++++----
 src/armnn/Network.cpp                   |  2 +-
 src/armnn/Runtime.cpp                   | 30 ++++++++++++++++++++++--------
 tests/CMakeLists.txt                    |  6 +++++-
 tests/ExecuteNetwork/ExecuteNetwork.cpp |  2 +-
 6 files changed, 53 insertions(+), 15 deletions(-)

diff --git a/delegate/src/armnn_delegate.cpp b/delegate/src/armnn_delegate.cpp
index 0069b4fe0e..b18d234d69 100644
--- a/delegate/src/armnn_delegate.cpp
+++ b/delegate/src/armnn_delegate.cpp
@@ -40,6 +40,7 @@
 #include "Unpack.hpp"
 
 #include <armnnUtils/Filesystem.hpp>
+#include <armnn/utility/Timer.hpp>
 #include <flatbuffers/flatbuffers.h>
 #include <tensorflow/lite/context_util.h>
 #include <tensorflow/lite/schema/schema_generated.h>
@@ -281,6 +282,9 @@ ArmnnSubgraph* ArmnnSubgraph::Create(TfLiteContext* tfLiteContext,
                                      const TfLiteDelegateParams* parameters,
                                      const Delegate* delegate)
 {
+    const auto startTime = armnn::GetTimeNow();
+    ARMNN_LOG(info) << "ArmnnSubgraph creation";
+
     TfLiteIntArray* executionPlan;
     if (tfLiteContext->GetExecutionPlan(tfLiteContext, &executionPlan) != kTfLiteOk)
     {
@@ -308,6 +312,7 @@ ArmnnSubgraph* ArmnnSubgraph::Create(TfLiteContext* tfLiteContext,
     }
 
     // Parse TfLite delegate nodes to ArmNN
+    const auto parseStartTime = armnn::GetTimeNow();
     for (int i = 0; i < parameters->nodes_to_replace->size; ++i)
     {
         const int nodeIndex = parameters->nodes_to_replace->data[i];
@@ -325,6 +330,8 @@ ArmnnSubgraph* ArmnnSubgraph::Create(TfLiteContext* tfLiteContext,
             throw armnn::Exception(&"TfLiteArmnnDelegate: Unable to parse node: " [ nodeIndex]);
         }
     }
+    ARMNN_LOG(info) << "Parse nodes to ArmNN time: " << std::setprecision(2)
+                    << std::fixed << armnn::GetTimeDuration(parseStartTime).count() << " ms";
 
     // Add Output layer
     status = AddOutputLayer(delegateData, tfLiteContext, parameters->output_tensors, outputBindings);
@@ -337,10 +344,13 @@ ArmnnSubgraph* ArmnnSubgraph::Create(TfLiteContext* tfLiteContext,
     armnn::IOptimizedNetworkPtr optNet(nullptr, nullptr);
     try
     {
+        const auto optimizeStartTime = armnn::GetTimeNow();
         optNet = armnn::Optimize(*(delegateData.m_Network.get()),
                                  delegate->m_Options.GetBackends(),
                                  delegate->m_Runtime->GetDeviceSpec(),
                                  delegate->m_Options.GetOptimizerOptions());
+        ARMNN_LOG(info) << "Optimize ArmnnSubgraph time: " << std::setprecision(2)
+                        << std::fixed << armnn::GetTimeDuration(optimizeStartTime).count() << " ms";
     }
     catch (std::exception &ex)
     {
@@ -358,6 +368,7 @@ ArmnnSubgraph* ArmnnSubgraph::Create(TfLiteContext* tfLiteContext,
     const std::string serializeToDotFile = delegate->m_Options.GetSerializeToDot();
     if (!serializeToDotFile.empty())
     {
+        ARMNN_LOG(info) << "Writing graph to dot file: " << serializeToDotFile;
         fs::path filename = serializeToDotFile;
         std::fstream file(filename.c_str(), std::ios_base::out);
         optNet->SerializeToDot(file);
@@ -365,6 +376,8 @@ ArmnnSubgraph* ArmnnSubgraph::Create(TfLiteContext* tfLiteContext,
 
     try
     {
+        const auto loadStartTime = armnn::GetTimeNow();
+
         // Load graph into runtime
         std::string errorMessage;
         armnn::Status loadingStatus;
@@ -387,6 +400,9 @@ ArmnnSubgraph* ArmnnSubgraph::Create(TfLiteContext* tfLiteContext,
             // Network load failed.
             throw armnn::Exception("TfLiteArmnnDelegate: Network could not be loaded:" + errorMessage);
         }
+
+        ARMNN_LOG(info) << "Load ArmnnSubgraph time: " << std::setprecision(2)
+                        << std::fixed << armnn::GetTimeDuration(loadStartTime).count() << " ms";
     }
     catch (std::exception& ex)
     {
@@ -401,6 +417,9 @@ ArmnnSubgraph* ArmnnSubgraph::Create(TfLiteContext* tfLiteContext,
         delegate->m_Runtime->RegisterDebugCallback(networkId, delegate->m_Options.GetDebugCallbackFunction().value());
     }
 
+    ARMNN_LOG(info) << "Overall ArmnnSubgraph creation time: " << std::setprecision(2)
+                    << std::fixed << armnn::GetTimeDuration(startTime).count() << " ms\n";
+
     // Create a new SubGraph with networkId and runtime
     return new ArmnnSubgraph(networkId, delegate->m_Runtime.get(), inputBindings, outputBindings);
 }
diff --git a/include/armnn/INetwork.hpp b/include/armnn/INetwork.hpp
index 6119f124e1..1d98821160 100644
--- a/include/armnn/INetwork.hpp
+++ b/include/armnn/INetwork.hpp
@@ -169,7 +169,8 @@ struct OptimizerOptions
         stream << "OptimizerOptions: \n";
         stream << "\tReduceFp32ToFp16: " << m_ReduceFp32ToFp16 << "\n";
         stream << "\tReduceFp32ToBf16: " << m_ReduceFp32ToBf16 << "\n";
-        stream << "\tDebug: " <<
+        stream << "\tDebug: " << m_Debug << "\n";
+        stream << "\tShapeInferenceMethod: " <<
         (m_shapeInferenceMethod == ShapeInferenceMethod::ValidateOnly ? "ValidateOnly" : "InferAndValidate") << "\n";
         stream << "\tImportEnabled: " << m_ImportEnabled << "\n";
         stream << "\tProfilingEnabled: " << m_ProfilingEnabled << "\n";
@@ -180,9 +181,9 @@ struct OptimizerOptions
             for (size_t i=0; i < optionsGroup.GetOptionCount(); i++)
             {
                 const armnn::BackendOptions::BackendOption option = optionsGroup.GetOption(i);
-                stream << "\t\tBackend: "  << optionsGroup.GetBackendId()
-                       << "\t\t\tOption: " << option.GetName()
-                       << "\t\t\tValue: "  << std::string(option.GetValue().ToString());
+                stream << "\t\tBackend: "  << optionsGroup.GetBackendId() << "\n"
+                       << "\t\t\tOption: " << option.GetName() << "\n"
+                       << "\t\t\tValue: "  << std::string(option.GetValue().ToString()) << "\n";
             }
         }
 
diff --git a/src/armnn/Network.cpp b/src/armnn/Network.cpp
index e5f098e903..54d9ae2028 100644
--- a/src/armnn/Network.cpp
+++ b/src/armnn/Network.cpp
@@ -1614,7 +1614,7 @@ IOptimizedNetworkPtr Optimize(const INetwork& inNetwork,
                               const OptimizerOptions& options,
                               Optional<std::vector<std::string>&> messages)
 {
-    ARMNN_LOG(info) << options.ToString();
+    ARMNN_LOG(debug) << options.ToString();
 
     // Enable profiling
     auto profiler = inNetwork.pNetworkImpl->GetGraph().GetProfiler();
diff --git a/src/armnn/Runtime.cpp b/src/armnn/Runtime.cpp
index 76f39e0feb..1ccfb4687a 100644
--- a/src/armnn/Runtime.cpp
+++ b/src/armnn/Runtime.cpp
@@ -502,7 +502,7 @@ RuntimeImpl::RuntimeImpl(const IRuntime::CreationOptions& options)
 
 RuntimeImpl::~RuntimeImpl()
 {
-    const auto start_time = armnn::GetTimeNow();
+    const auto startTime = armnn::GetTimeNow();
     std::vector<int> networkIDs;
     try
     {
@@ -547,7 +547,7 @@ RuntimeImpl::~RuntimeImpl()
 
     BackendRegistryInstance().SetProfilingService(armnn::EmptyOptional());
     ARMNN_LOG(info) << "Shutdown time: " << std::setprecision(2)
-                    << std::fixed << armnn::GetTimeDuration(start_time).count() << " ms\n";
+                    << std::fixed << armnn::GetTimeDuration(startTime).count() << " ms\n";
 }
 
 LoadedNetwork* RuntimeImpl::GetLoadedNetworkPtr(NetworkId networkId) const
@@ -589,6 +589,8 @@ Status RuntimeImpl::EnqueueWorkload(NetworkId networkId,
                                 const InputTensors& inputTensors,
                                 const OutputTensors& outputTensors)
 {
+    const auto startTime = armnn::GetTimeNow();
+
     LoadedNetwork* loadedNetwork = GetLoadedNetworkPtr(networkId);
 
     if (!loadedNetwork)
@@ -615,7 +617,12 @@ Status RuntimeImpl::EnqueueWorkload(NetworkId networkId,
     }
     lastId=networkId;
 
-    return loadedNetwork->EnqueueWorkload(inputTensors, outputTensors);
+    auto status = loadedNetwork->EnqueueWorkload(inputTensors, outputTensors);
+
+    ARMNN_LOG(info) << "Execution time: " << std::setprecision(2)
+                    << std::fixed << armnn::GetTimeDuration(startTime).count() << " ms\n";
+
+    return status;
 }
 
 Status RuntimeImpl::Execute(IWorkingMemHandle& iWorkingMemHandle,
@@ -624,6 +631,8 @@ Status RuntimeImpl::Execute(IWorkingMemHandle& iWorkingMemHandle,
                             std::vector<ImportedInputId> preImportedInputs,
                             std::vector<ImportedOutputId> preImportedOutputs)
 {
+    const auto startTime = armnn::GetTimeNow();
+
     NetworkId networkId = iWorkingMemHandle.GetNetworkId();
     LoadedNetwork* loadedNetwork = GetLoadedNetworkPtr(networkId);
 
@@ -641,11 +650,16 @@ Status RuntimeImpl::Execute(IWorkingMemHandle& iWorkingMemHandle,
 
     ARMNN_SCOPED_PROFILING_EVENT(Compute::Undefined, "Execute");
 
-    return loadedNetwork->Execute(inputTensors,
-                                  outputTensors,
-                                  iWorkingMemHandle,
-                                  preImportedInputs,
-                                  preImportedOutputs);
+    auto status = loadedNetwork->Execute(inputTensors,
+                                         outputTensors,
+                                         iWorkingMemHandle,
+                                         preImportedInputs,
+                                         preImportedOutputs);
+
+    ARMNN_LOG(info) << "Execution time: " << std::setprecision(2)
+                    << std::fixed << armnn::GetTimeDuration(startTime).count() << " ms\n";
+
+    return status;
 }
 
 /// Create a new unique WorkingMemHandle object. Create multiple handles if you wish to have
diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt
index cea55c5b8e..4cb324f2c7 100644
--- a/tests/CMakeLists.txt
+++ b/tests/CMakeLists.txt
@@ -133,7 +133,11 @@ if (BUILD_ONNX_PARSER)
     OnnxParserTest(OnnxMobileNet-Armnn "${OnnxMobileNet-Armnn_sources}")
 endif()
 
-if (BUILD_ARMNN_SERIALIZER OR BUILD_TF_PARSER OR BUILD_TF_LITE_PARSER OR BUILD_ONNX_PARSER)
+if (BUILD_ARMNN_SERIALIZER
+        OR BUILD_TF_PARSER
+        OR BUILD_TF_LITE_PARSER
+        OR BUILD_ONNX_PARSER
+        OR BUILD_ARMNN_TFLITE_DELEGATE)
     set(ExecuteNetwork_sources
         ExecuteNetwork/ExecuteNetwork.cpp
         ExecuteNetwork/ExecuteNetworkProgramOptions.cpp
diff --git a/tests/ExecuteNetwork/ExecuteNetwork.cpp b/tests/ExecuteNetwork/ExecuteNetwork.cpp
index b0f98b27b9..540bfd4271 100644
--- a/tests/ExecuteNetwork/ExecuteNetwork.cpp
+++ b/tests/ExecuteNetwork/ExecuteNetwork.cpp
@@ -47,7 +47,7 @@
 bool CheckInferenceTimeThreshold(const std::chrono::duration<double, std::milli>& duration,
                                  const double& thresholdTime)
 {
-    ARMNN_LOG(info) << "\nInference time: " << std::setprecision(2)
+    ARMNN_LOG(info) << "Inference time: " << std::setprecision(2)
                     << std::fixed << duration.count() << " ms\n";
     // If thresholdTime == 0.0 (default), then it hasn't been supplied at command line
     if (thresholdTime != 0.0)
-- 
cgit v1.2.1