From 4c11a488d7f20c219ea6265480bc02f4b2cea1e3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jonny=20Sv=C3=A4rd?= Date: Fri, 17 Dec 2021 17:04:08 +0100 Subject: Use lbl-profiler to setup PMU and PMU events Adapt layer-by-layer profiler to handle PMU event configuration, PMU setup and retrieving PMU counter values. Adapt the inference process application to support the lbl-profiler PMU setup and retrieve/save PMU counter values in the InferenceJob struct. Change-Id: I1667a5b11c43c54e7d28232b594dd118bf3f79a8 --- .../include/inference_process.hpp | 4 +-- .../inference_process/src/inference_process.cpp | 29 +++++++++++----------- 2 files changed, 17 insertions(+), 16 deletions(-) (limited to 'applications') diff --git a/applications/inference_process/include/inference_process.hpp b/applications/inference_process/include/inference_process.hpp index db57811..1679e19 100644 --- a/applications/inference_process/include/inference_process.hpp +++ b/applications/inference_process/include/inference_process.hpp @@ -43,7 +43,7 @@ struct InferenceJob { std::vector expectedOutput; size_t numBytesToPrint; std::vector pmuEventConfig; - uint32_t pmuCycleCounterEnable; + bool pmuCycleCounterEnable; std::vector pmuEventCount; uint64_t pmuCycleCounterCount; @@ -55,7 +55,7 @@ struct InferenceJob { const std::vector &expectedOutput, size_t numBytesToPrint, const std::vector &pmuEventConfig, - const uint32_t pmuCycleCounterEnable); + const bool pmuCycleCounterEnable); void invalidate(); void clean(); diff --git a/applications/inference_process/src/inference_process.cpp b/applications/inference_process/src/inference_process.cpp index 4990e62..94c62d3 100644 --- a/applications/inference_process/src/inference_process.cpp +++ b/applications/inference_process/src/inference_process.cpp @@ -80,10 +80,10 @@ private: }; void print_output_data(TfLiteTensor *output, size_t bytesToPrint) { - constexpr auto crc = Crc(); + constexpr auto crc = Crc(); const uint32_t output_crc32 = crc.crc32(output->data.data, output->bytes); - const int numBytesToPrint = min(output->bytes, bytesToPrint); - int dims_size = output->dims->size; + const int numBytesToPrint = min(output->bytes, bytesToPrint); + int dims_size = output->dims->size; LOG("{\n"); LOG("\"dims\": [%d,", dims_size); for (int i = 0; i < output->dims->size - 1; ++i) { @@ -91,15 +91,14 @@ void print_output_data(TfLiteTensor *output, size_t bytesToPrint) { } LOG("%d],\n", output->dims->data[dims_size - 1]); LOG("\"data_address\": \"%08" PRIx32 "\",\n", (uint32_t)output->data.data); - if (numBytesToPrint) - { + if (numBytesToPrint) { LOG("\"crc32\": \"%08" PRIx32 "\",\n", output_crc32); LOG("\"data\":\""); for (int i = 0; i < numBytesToPrint - 1; ++i) { /* - * Workaround an issue when compiling with GCC where by - * printing only a '\n' the produced global output is wrong. - */ + * Workaround an issue when compiling with GCC where by + * printing only a '\n' the produced global output is wrong. + */ if (i % 15 == 0 && i != 0) { LOG("0x%02x,\n", output->data.uint8[i]); } else { @@ -107,8 +106,7 @@ void print_output_data(TfLiteTensor *output, size_t bytesToPrint) { } } LOG("0x%02x\"\n", output->data.uint8[numBytesToPrint - 1]); - } - else { + } else { LOG("\"crc32\": \"%08" PRIx32 "\"\n", output_crc32); } LOG("}"); @@ -156,7 +154,7 @@ InferenceJob::InferenceJob(const string &_name, const vector &_expectedOutput, size_t _numBytesToPrint, const vector &_pmuEventConfig, - const uint32_t _pmuCycleCounterEnable) : + const bool _pmuCycleCounterEnable) : name(_name), networkModel(_networkModel), input(_input), output(_output), expectedOutput(_expectedOutput), numBytesToPrint(_numBytesToPrint), pmuEventConfig(_pmuEventConfig), pmuCycleCounterEnable(_pmuCycleCounterEnable), @@ -250,7 +248,7 @@ bool InferenceProcess::runJob(InferenceJob &job) { // Create the TFL micro interpreter tflite::AllOpsResolver resolver; #ifdef LAYER_BY_LAYER_PROFILER - tflite::LayerByLayerProfiler profiler; + tflite::LayerByLayerProfiler profiler(job.pmuEventConfig, job.pmuCycleCounterEnable); #else tflite::ArmProfiler profiler; #endif @@ -308,9 +306,12 @@ bool InferenceProcess::runJob(InferenceJob &job) { LOG("Inference runtime: %u cycles\n", (unsigned int)profiler.GetTotalTicks()); - if (job.pmuCycleCounterEnable != 0) { - job.pmuCycleCounterCount = profiler.GetTotalTicks(); +#ifdef LAYER_BY_LAYER_PROFILER + if (job.pmuCycleCounterEnable) { + job.pmuCycleCounterCount = profiler.GetPmuCycleCounterCount(); } + job.pmuEventCount.assign(profiler.GetPmuEventCount().begin(), profiler.GetPmuEventCount().end()); +#endif // Copy output data if (job.output.size() > 0) { -- cgit v1.2.1