diff options
author | Jonny Svärd <jonny.svaerd@arm.com> | 2022-05-10 17:29:30 +0200 |
---|---|---|
committer | Kristofer Jonsson <kristofer.jonsson@arm.com> | 2022-06-14 10:23:21 +0000 |
commit | 2ebaac7a007cbfae7fff818e4d6c4c33562eea0e (patch) | |
tree | 928435cf7c8312c8c03a921c33bc7e3b791980b3 | |
parent | a3b7c692625205bbff3e078c378f146dd2578efd (diff) | |
download | ethos-u-core-software-2ebaac7a007cbfae7fff818e4d6c4c33562eea0e.tar.gz |
Refactor performance measurements
Change 'Inference runtime' to measure CPU cycles for the
Tensorflow Lite Micro interpreter.Invoke() call.
Add 'Operator(s) runtime' print that prints a summary for
cycles spent on all operators during an inference. (This is
equivalent to the old reported 'Inference runtime')
Move prints out of the EndEvent() function in ArmProfiler as
it otherwise interferes with the inference cycle measurement.
Change-Id: Ie11b5abb5b12a3bcf5a67841f04834d05dfd796d
-rw-r--r-- | applications/inference_process/include/inference_process.hpp | 1 | ||||
-rw-r--r-- | applications/inference_process/src/inference_process.cpp | 18 | ||||
-rw-r--r-- | lib/arm_profiler/include/arm_profiler.hpp | 1 | ||||
-rw-r--r-- | lib/arm_profiler/src/arm_profiler.cpp | 10 |
4 files changed, 26 insertions, 4 deletions
diff --git a/applications/inference_process/include/inference_process.hpp b/applications/inference_process/include/inference_process.hpp index 9635884..fc54ae0 100644 --- a/applications/inference_process/include/inference_process.hpp +++ b/applications/inference_process/include/inference_process.hpp @@ -52,6 +52,7 @@ struct InferenceJob { std::vector<DataPtr> input; std::vector<DataPtr> output; std::vector<DataPtr> expectedOutput; + uint64_t cpuCycles{0}; size_t numBytesToPrint; void *externalContext; diff --git a/applications/inference_process/src/inference_process.cpp b/applications/inference_process/src/inference_process.cpp index 264c4ba..29254c7 100644 --- a/applications/inference_process/src/inference_process.cpp +++ b/applications/inference_process/src/inference_process.cpp @@ -21,6 +21,7 @@ #include "tensorflow/lite/micro/micro_error_reporter.h" #include "tensorflow/lite/micro/micro_interpreter.h" #include "tensorflow/lite/micro/micro_profiler.h" +#include "tensorflow/lite/micro/micro_time.h" #include "tensorflow/lite/schema/schema_generated.h" #include "arm_profiler.hpp" @@ -150,15 +151,20 @@ bool InferenceProcess::runJob(InferenceJob &job) { return true; } + // Get the current cycle counter value + uint32_t cpuCyclesBegin = tflite::GetCurrentTimeTicks(); + // Run the inference status = interpreter.Invoke(); + + // Calculate nbr of CPU cycles for the Invoke call + job.cpuCycles = tflite::GetCurrentTimeTicks() - cpuCyclesBegin; + if (status != kTfLiteOk) { LOG_ERR("Invoke failed for inference: job=%s", job.name.c_str()); return true; } - LOG("Inference runtime: %" PRIu64 " cycles\n", profiler.GetTotalTicks()); - // Copy output data from TFLu arena to job descriptor if (copyOfm(job, interpreter)) { return true; @@ -171,8 +177,16 @@ bool InferenceProcess::runJob(InferenceJob &job) { return true; } + LOG_INFO("\n"); LOG_INFO("Finished running job: %s", job.name.c_str()); + profiler.ReportResults(); + + LOG("\n"); + LOG("Operator(s) total: %" PRIu64 " CPU cycles\n\n", profiler.GetTotalTicks()); + + LOG("Inference runtime: %" PRIu64 " CPU cycles total\n\n", job.cpuCycles); + return false; } diff --git a/lib/arm_profiler/include/arm_profiler.hpp b/lib/arm_profiler/include/arm_profiler.hpp index 0b97e2e..0e6784d 100644 --- a/lib/arm_profiler/include/arm_profiler.hpp +++ b/lib/arm_profiler/include/arm_profiler.hpp @@ -31,6 +31,7 @@ public: uint32_t BeginEvent(const char *tag); void EndEvent(uint32_t event_handle); uint64_t GetTotalTicks() const; + void ReportResults() const; private: size_t max_events_; diff --git a/lib/arm_profiler/src/arm_profiler.cpp b/lib/arm_profiler/src/arm_profiler.cpp index c90eec2..7648584 100644 --- a/lib/arm_profiler/src/arm_profiler.cpp +++ b/lib/arm_profiler/src/arm_profiler.cpp @@ -50,8 +50,6 @@ uint32_t ArmProfiler::BeginEvent(const char *tag) { void ArmProfiler::EndEvent(uint32_t event_handle) { TFLITE_DCHECK(event_handle < max_events_); end_ticks_[event_handle] = GetCurrentTimeTicks(); - tflite::GetMicroErrorReporter()->Report( - "%s : cycle_cnt : %u cycles", tags_[event_handle], end_ticks_[event_handle] - start_ticks_[event_handle]); } uint64_t ArmProfiler::GetTotalTicks() const { @@ -64,4 +62,12 @@ uint64_t ArmProfiler::GetTotalTicks() const { return ticks; } +void ArmProfiler::ReportResults() const { + tflite::GetMicroErrorReporter()->Report("Profiler report, CPU cycles per operator:"); + for (size_t i = 0; i < num_events_; ++i) { + tflite::GetMicroErrorReporter()->Report( + "%s : cycle_cnt : %u cycles", tags_[i], end_ticks_[i] - start_ticks_[i]); + } +} + } // namespace tflite |