diff options
author | Bhavik Patel <bhavik.patel@arm.com> | 2020-11-16 12:13:56 +0100 |
---|---|---|
committer | Bhavik Patel <bhavik.patel@arm.com> | 2020-12-08 10:18:55 +0100 |
commit | ffe845d4aad5a389e295bab5f78c8088ef187ea0 (patch) | |
tree | 64e8aeb90db243a04e12993e416763d7ec293b33 /applications/inference_process | |
parent | f1bda569bdc7812cef48a234bfe99321ef2e56fd (diff) | |
download | ethos-u-core-software-ffe845d4aad5a389e295bab5f78c8088ef187ea0.tar.gz |
MLBEDSW-3269 Add pmu counters for inference_process
Change-Id: I6594acc228fe8048f56cb96ac7846a87491eed38
Diffstat (limited to 'applications/inference_process')
-rw-r--r-- | applications/inference_process/include/inference_process.hpp | 9 | ||||
-rw-r--r-- | applications/inference_process/src/inference_process.cc | 48 |
2 files changed, 49 insertions, 8 deletions
diff --git a/applications/inference_process/include/inference_process.hpp b/applications/inference_process/include/inference_process.hpp index ec682d1..67b30c5 100644 --- a/applications/inference_process/include/inference_process.hpp +++ b/applications/inference_process/include/inference_process.hpp @@ -18,6 +18,7 @@ #pragma once +#include <array> #include <queue> #include <stdlib.h> #include <string> @@ -41,6 +42,10 @@ struct InferenceJob { std::vector<DataPtr> output; std::vector<DataPtr> expectedOutput; size_t numBytesToPrint; + std::vector<uint8_t> pmuEventConfig; + uint32_t pmuCycleCounterEnable; + std::vector<uint32_t> pmuEventCount; + uint64_t pmuCycleCounterCount; InferenceJob(); InferenceJob(const std::string &name, @@ -48,7 +53,9 @@ struct InferenceJob { const std::vector<DataPtr> &input, const std::vector<DataPtr> &output, const std::vector<DataPtr> &expectedOutput, - size_t numBytesToPrint); + size_t numBytesToPrint, + const std::vector<uint8_t> &pmuEventConfig, + const uint32_t pmuCycleCounterEnable); void invalidate(); void clean(); diff --git a/applications/inference_process/src/inference_process.cc b/applications/inference_process/src/inference_process.cc index ecf6144..f3d2da8 100644 --- a/applications/inference_process/src/inference_process.cc +++ b/applications/inference_process/src/inference_process.cc @@ -20,6 +20,7 @@ #include "tensorflow/lite/micro/cortex_m_generic/debug_log_callback.h" #include "tensorflow/lite/micro/micro_error_reporter.h" #include "tensorflow/lite/micro/micro_interpreter.h" +#include "tensorflow/lite/micro/micro_profiler.h" #include "tensorflow/lite/schema/schema_generated.h" #include "tensorflow/lite/version.h" @@ -106,10 +107,17 @@ InferenceJob::InferenceJob(const string &_name, const vector<DataPtr> &_input, const vector<DataPtr> &_output, const vector<DataPtr> &_expectedOutput, - size_t _numBytesToPrint) : + size_t _numBytesToPrint, + const vector<uint8_t> &_pmuEventConfig, + const uint32_t pmuCycleCounterEnable) : name(_name), networkModel(_networkModel), input(_input), output(_output), expectedOutput(_expectedOutput), - numBytesToPrint(_numBytesToPrint) {} + numBytesToPrint(_numBytesToPrint), pmuEventConfig(_pmuEventConfig), pmuCycleCounterEnable(pmuCycleCounterEnable), + pmuEventCount(), pmuCycleCounterCount(0) { +#if defined(INFERENCE_PROC_TFLU_PROFILER) && defined(ETHOSU) + pmuEventCount = vector<uint32_t>(ETHOSU_PMU_NCOUNTERS, 0); +#endif +} void InferenceJob::invalidate() { networkModel.invalidate(); @@ -183,6 +191,9 @@ bool InferenceProcess::push(const InferenceJob &job) { bool InferenceProcess::runJob(InferenceJob &job) { printf("Running inference job: %s\n", job.name.c_str()); + // Register debug log callback for profiling + RegisterDebugLogCallback(tflu_debug_log); + tflite::MicroErrorReporter microErrorReporter; tflite::ErrorReporter *reporter = µErrorReporter; @@ -197,7 +208,17 @@ bool InferenceProcess::runJob(InferenceJob &job) { // Create the TFL micro interpreter tflite::AllOpsResolver resolver; - tflite::MicroInterpreter interpreter(model, resolver, inferenceProcessTensorArena, TENSOR_ARENA_SIZE, reporter); + tflite::MicroProfiler profiler(reporter); + +#if defined(INFERENCE_PROC_TFLU_PROFILER) && defined(ETHOSU) + profiler.MonitorEthosuPMUEvents(ethosu_pmu_event_type(job.pmuEventConfig[0]), + ethosu_pmu_event_type(job.pmuEventConfig[1]), + ethosu_pmu_event_type(job.pmuEventConfig[2]), + ethosu_pmu_event_type(job.pmuEventConfig[3])); +#endif + + tflite::MicroInterpreter interpreter( + model, resolver, inferenceProcessTensorArena, TENSOR_ARENA_SIZE, reporter, &profiler); // Allocate tensors TfLiteStatus allocate_status = interpreter.AllocateTensors(); @@ -240,9 +261,6 @@ bool InferenceProcess::runJob(InferenceJob &job) { copy(static_cast<char *>(input.data), static_cast<char *>(input.data) + input.size, tensor->data.uint8); } - // Register debug log callback for profiling - RegisterDebugLogCallback(tflu_debug_log); - // Run the inference TfLiteStatus invoke_status = interpreter.Invoke(); if (invoke_status != kTfLiteOk) { @@ -250,6 +268,22 @@ bool InferenceProcess::runJob(InferenceJob &job) { return true; } + printf("%s : %zu\r\n", "arena_used_bytes", interpreter.arena_used_bytes()); + +#ifdef INFERENCE_PROC_TFLU_PROFILER + printf("Inference runtime: %u cycles\r\n", (unsigned int)profiler.TotalInferenceTime()); + + if (job.pmuCycleCounterEnable != 0) { + job.pmuCycleCounterCount = profiler.TotalInferenceTime(); + } + +#ifdef ETHOSU + for (uint32_t i = 0; i < ETHOSU_PMU_NCOUNTERS; i++) { + job.pmuEventCount[i] = profiler.GetEthosuPMUCounter(i); + } +#endif +#endif + // Copy output data if (job.output.size() > 0) { if (interpreter.outputs_size() != job.output.size()) { @@ -285,7 +319,7 @@ bool InferenceProcess::runJob(InferenceJob &job) { if (job.expectedOutput.size() > 0) { if (job.expectedOutput.size() != interpreter.outputs_size()) { - printf("Expeded number of output tensors does not match network. job=%s, expected=%zu, network=%zu\n", + printf("Expected number of output tensors does not match network. job=%s, expected=%zu, network=%zu\n", job.name.c_str(), job.expectedOutput.size(), interpreter.outputs_size()); |