diff options
author | Colm Donelan <colm.donelan@arm.com> | 2022-03-16 11:54:13 +0000 |
---|---|---|
committer | Colm Donelan <colm.donelan@arm.com> | 2022-03-16 11:54:13 +0000 |
commit | 0fc16c6dcbee21b34758772c4a5872e68c602f3f (patch) | |
tree | 293a8b6ba0c02c1f64894d90b68679dfc0aefd10 | |
parent | c7756cfbd8995962aae3c3a737f1fe918d400940 (diff) | |
download | android-nn-driver-0fc16c6dcbee21b34758772c4a5872e68c602f3f.tar.gz |
IVGCVSW-6801 Add some timing information to ExecuteGraph.
* Modify all versions of ExecuteGraph to include timing information. This
can give a rough guide as to how much overhead the driver is adding.
Signed-off-by: Colm Donelan <colm.donelan@arm.com>
Change-Id: Ie168e060c22b8417efa2ede0c9bdc7ef2631b913
-rw-r--r-- | ArmnnPreparedModel.cpp | 10 | ||||
-rw-r--r-- | ArmnnPreparedModel_1_2.cpp | 8 | ||||
-rw-r--r-- | ArmnnPreparedModel_1_3.cpp | 8 |
3 files changed, 25 insertions, 1 deletions
diff --git a/ArmnnPreparedModel.cpp b/ArmnnPreparedModel.cpp index 326351c0..41740435 100644 --- a/ArmnnPreparedModel.cpp +++ b/ArmnnPreparedModel.cpp @@ -14,6 +14,7 @@ #include <OperationsUtils.h> #include <ValidateHal.h> +#include <chrono> #include <cinttypes> #ifdef ARMNN_ANDROID_S @@ -299,6 +300,8 @@ void ArmnnPreparedModel<HalVersion>::ExecuteGraph( CallbackContext_1_0 cb) { ALOGV("ArmnnPreparedModel::ExecuteGraph(...)"); + // Capture the graph execution start time. + std::chrono::time_point<std::chrono::system_clock> graphExecutionStart = std::chrono::system_clock::now(); DumpTensorsIfRequired("Input", inputTensors); @@ -328,7 +331,6 @@ void ArmnnPreparedModel<HalVersion>::ExecuteGraph( status = m_Runtime->EnqueueWorkload(m_NetworkId, inputTensors, outputTensors, importedInputIds, importedOutputIds); } - if (status != armnn::Status::Success) { ALOGW("EnqueueWorkload failed"); @@ -365,6 +367,12 @@ void ArmnnPreparedModel<HalVersion>::ExecuteGraph( #endif } + // Log the total time in this call. This is a good number to compare to that printed out by + // RuntimeImpl::EnqueueWorkload. The difference should be the execution overhead of the driver. + ALOGI("ArmnnPreparedModel::ExecuteGraph Execution time = %lld µs", + std::chrono::duration_cast<std::chrono::microseconds> + (std::chrono::system_clock::now() - graphExecutionStart).count()); + cb.callback(V1_0::ErrorStatus::NONE, "ExecuteGraph"); } diff --git a/ArmnnPreparedModel_1_2.cpp b/ArmnnPreparedModel_1_2.cpp index 37bc3a49..c54ee354 100644 --- a/ArmnnPreparedModel_1_2.cpp +++ b/ArmnnPreparedModel_1_2.cpp @@ -16,6 +16,7 @@ #include <ExecutionBurstServer.h> #include <ValidateHal.h> +#include <chrono> #include <cinttypes> #ifdef ARMNN_ANDROID_S @@ -511,6 +512,8 @@ bool ArmnnPreparedModel_1_2<HalVersion>::ExecuteGraph( ALOGV("ArmnnPreparedModel_1_2::ExecuteGraph(...)"); TimePoint driverEnd, deviceStart, deviceEnd; + // Capture the graph execution start time. + std::chrono::time_point<std::chrono::system_clock> graphExecutionStart = std::chrono::system_clock::now(); DumpTensorsIfRequired("Input", inputTensors); @@ -599,6 +602,11 @@ bool ArmnnPreparedModel_1_2<HalVersion>::ExecuteGraph( cb.callback(V1_0::ErrorStatus::NONE, outputShapes, g_NoTiming, "ArmnnPreparedModel_1_2::ExecuteGraph"); } + // Log the total time in this call. This is a good number to compare to that printed out by + // RuntimeImpl::EnqueueWorkload. The difference should be the execution overhead of the driver. + ALOGI("ArmnnPreparedModel_1_2::ExecuteGraph Execution time = %lld µs", + std::chrono::duration_cast<std::chrono::microseconds> + (std::chrono::system_clock::now() - graphExecutionStart).count()); return true; } diff --git a/ArmnnPreparedModel_1_3.cpp b/ArmnnPreparedModel_1_3.cpp index 34c42ecd..20b49f5d 100644 --- a/ArmnnPreparedModel_1_3.cpp +++ b/ArmnnPreparedModel_1_3.cpp @@ -21,6 +21,7 @@ #include <ExecutionBurstServer.h> #include <ValidateHal.h> +#include <chrono> #include <cinttypes> #ifdef ARMNN_ANDROID_S @@ -805,6 +806,8 @@ Return <V1_3::ErrorStatus> ArmnnPreparedModel_1_3<HalVersion>::ExecuteGraph( CallbackContext cb) { ALOGV("ArmnnPreparedModel_1_3::ExecuteGraph(...)"); + // Capture the graph execution start time. + std::chrono::time_point<std::chrono::system_clock> graphExecutionStart = std::chrono::system_clock::now(); DumpTensorsIfRequired("Input", inputTensors); @@ -890,6 +893,11 @@ Return <V1_3::ErrorStatus> ArmnnPreparedModel_1_3<HalVersion>::ExecuteGraph( { cb.callback(V1_3::ErrorStatus::NONE, outputShapes, g_NoTiming, "ArmnnPreparedModel_1_3::ExecuteGraph"); } + // Log the total time in this call. This is a good number to compare to that printed out by + // RuntimeImpl::EnqueueWorkload. The difference should be the execution overhead of the driver. + ALOGI("ArmnnPreparedModel_1_3::ExecuteGraph Execution time = %lld µs", + std::chrono::duration_cast<std::chrono::microseconds> + (std::chrono::system_clock::now() - graphExecutionStart).count()); return V1_3::ErrorStatus::NONE; } |