aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--ArmnnPreparedModel.cpp10
-rw-r--r--ArmnnPreparedModel_1_2.cpp8
-rw-r--r--ArmnnPreparedModel_1_3.cpp8
3 files changed, 25 insertions, 1 deletions
diff --git a/ArmnnPreparedModel.cpp b/ArmnnPreparedModel.cpp
index 326351c0..41740435 100644
--- a/ArmnnPreparedModel.cpp
+++ b/ArmnnPreparedModel.cpp
@@ -14,6 +14,7 @@
#include <OperationsUtils.h>
#include <ValidateHal.h>
+#include <chrono>
#include <cinttypes>
#ifdef ARMNN_ANDROID_S
@@ -299,6 +300,8 @@ void ArmnnPreparedModel<HalVersion>::ExecuteGraph(
CallbackContext_1_0 cb)
{
ALOGV("ArmnnPreparedModel::ExecuteGraph(...)");
+ // Capture the graph execution start time.
+ std::chrono::time_point<std::chrono::system_clock> graphExecutionStart = std::chrono::system_clock::now();
DumpTensorsIfRequired("Input", inputTensors);
@@ -328,7 +331,6 @@ void ArmnnPreparedModel<HalVersion>::ExecuteGraph(
status = m_Runtime->EnqueueWorkload(m_NetworkId, inputTensors, outputTensors,
importedInputIds, importedOutputIds);
}
-
if (status != armnn::Status::Success)
{
ALOGW("EnqueueWorkload failed");
@@ -365,6 +367,12 @@ void ArmnnPreparedModel<HalVersion>::ExecuteGraph(
#endif
}
+ // Log the total time in this call. This is a good number to compare to that printed out by
+ // RuntimeImpl::EnqueueWorkload. The difference should be the execution overhead of the driver.
+ ALOGI("ArmnnPreparedModel::ExecuteGraph Execution time = %lld µs",
+ std::chrono::duration_cast<std::chrono::microseconds>
+ (std::chrono::system_clock::now() - graphExecutionStart).count());
+
cb.callback(V1_0::ErrorStatus::NONE, "ExecuteGraph");
}
diff --git a/ArmnnPreparedModel_1_2.cpp b/ArmnnPreparedModel_1_2.cpp
index 37bc3a49..c54ee354 100644
--- a/ArmnnPreparedModel_1_2.cpp
+++ b/ArmnnPreparedModel_1_2.cpp
@@ -16,6 +16,7 @@
#include <ExecutionBurstServer.h>
#include <ValidateHal.h>
+#include <chrono>
#include <cinttypes>
#ifdef ARMNN_ANDROID_S
@@ -511,6 +512,8 @@ bool ArmnnPreparedModel_1_2<HalVersion>::ExecuteGraph(
ALOGV("ArmnnPreparedModel_1_2::ExecuteGraph(...)");
TimePoint driverEnd, deviceStart, deviceEnd;
+ // Capture the graph execution start time.
+ std::chrono::time_point<std::chrono::system_clock> graphExecutionStart = std::chrono::system_clock::now();
DumpTensorsIfRequired("Input", inputTensors);
@@ -599,6 +602,11 @@ bool ArmnnPreparedModel_1_2<HalVersion>::ExecuteGraph(
cb.callback(V1_0::ErrorStatus::NONE, outputShapes, g_NoTiming, "ArmnnPreparedModel_1_2::ExecuteGraph");
}
+ // Log the total time in this call. This is a good number to compare to that printed out by
+ // RuntimeImpl::EnqueueWorkload. The difference should be the execution overhead of the driver.
+ ALOGI("ArmnnPreparedModel_1_2::ExecuteGraph Execution time = %lld µs",
+ std::chrono::duration_cast<std::chrono::microseconds>
+ (std::chrono::system_clock::now() - graphExecutionStart).count());
return true;
}
diff --git a/ArmnnPreparedModel_1_3.cpp b/ArmnnPreparedModel_1_3.cpp
index 34c42ecd..20b49f5d 100644
--- a/ArmnnPreparedModel_1_3.cpp
+++ b/ArmnnPreparedModel_1_3.cpp
@@ -21,6 +21,7 @@
#include <ExecutionBurstServer.h>
#include <ValidateHal.h>
+#include <chrono>
#include <cinttypes>
#ifdef ARMNN_ANDROID_S
@@ -805,6 +806,8 @@ Return <V1_3::ErrorStatus> ArmnnPreparedModel_1_3<HalVersion>::ExecuteGraph(
CallbackContext cb)
{
ALOGV("ArmnnPreparedModel_1_3::ExecuteGraph(...)");
+ // Capture the graph execution start time.
+ std::chrono::time_point<std::chrono::system_clock> graphExecutionStart = std::chrono::system_clock::now();
DumpTensorsIfRequired("Input", inputTensors);
@@ -890,6 +893,11 @@ Return <V1_3::ErrorStatus> ArmnnPreparedModel_1_3<HalVersion>::ExecuteGraph(
{
cb.callback(V1_3::ErrorStatus::NONE, outputShapes, g_NoTiming, "ArmnnPreparedModel_1_3::ExecuteGraph");
}
+ // Log the total time in this call. This is a good number to compare to that printed out by
+ // RuntimeImpl::EnqueueWorkload. The difference should be the execution overhead of the driver.
+ ALOGI("ArmnnPreparedModel_1_3::ExecuteGraph Execution time = %lld µs",
+ std::chrono::duration_cast<std::chrono::microseconds>
+ (std::chrono::system_clock::now() - graphExecutionStart).count());
return V1_3::ErrorStatus::NONE;
}