aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBhavik Patel <bhavik.patel@arm.com>2020-11-16 12:13:56 +0100
committerBhavik Patel <bhavik.patel@arm.com>2020-12-08 10:18:55 +0100
commitffe845d4aad5a389e295bab5f78c8088ef187ea0 (patch)
tree64e8aeb90db243a04e12993e416763d7ec293b33
parentf1bda569bdc7812cef48a234bfe99321ef2e56fd (diff)
downloadethos-u-core-software-ffe845d4aad5a389e295bab5f78c8088ef187ea0.tar.gz
MLBEDSW-3269 Add pmu counters for inference_process
Change-Id: I6594acc228fe8048f56cb96ac7846a87491eed38
-rw-r--r--applications/inference_process/include/inference_process.hpp9
-rw-r--r--applications/inference_process/src/inference_process.cc48
-rw-r--r--applications/message_process/include/message_process.hpp8
-rw-r--r--applications/message_process/src/message_process.cc40
4 files changed, 92 insertions, 13 deletions
diff --git a/applications/inference_process/include/inference_process.hpp b/applications/inference_process/include/inference_process.hpp
index ec682d1..67b30c5 100644
--- a/applications/inference_process/include/inference_process.hpp
+++ b/applications/inference_process/include/inference_process.hpp
@@ -18,6 +18,7 @@
#pragma once
+#include <array>
#include <queue>
#include <stdlib.h>
#include <string>
@@ -41,6 +42,10 @@ struct InferenceJob {
std::vector<DataPtr> output;
std::vector<DataPtr> expectedOutput;
size_t numBytesToPrint;
+ std::vector<uint8_t> pmuEventConfig;
+ uint32_t pmuCycleCounterEnable;
+ std::vector<uint32_t> pmuEventCount;
+ uint64_t pmuCycleCounterCount;
InferenceJob();
InferenceJob(const std::string &name,
@@ -48,7 +53,9 @@ struct InferenceJob {
const std::vector<DataPtr> &input,
const std::vector<DataPtr> &output,
const std::vector<DataPtr> &expectedOutput,
- size_t numBytesToPrint);
+ size_t numBytesToPrint,
+ const std::vector<uint8_t> &pmuEventConfig,
+ const uint32_t pmuCycleCounterEnable);
void invalidate();
void clean();
diff --git a/applications/inference_process/src/inference_process.cc b/applications/inference_process/src/inference_process.cc
index ecf6144..f3d2da8 100644
--- a/applications/inference_process/src/inference_process.cc
+++ b/applications/inference_process/src/inference_process.cc
@@ -20,6 +20,7 @@
#include "tensorflow/lite/micro/cortex_m_generic/debug_log_callback.h"
#include "tensorflow/lite/micro/micro_error_reporter.h"
#include "tensorflow/lite/micro/micro_interpreter.h"
+#include "tensorflow/lite/micro/micro_profiler.h"
#include "tensorflow/lite/schema/schema_generated.h"
#include "tensorflow/lite/version.h"
@@ -106,10 +107,17 @@ InferenceJob::InferenceJob(const string &_name,
const vector<DataPtr> &_input,
const vector<DataPtr> &_output,
const vector<DataPtr> &_expectedOutput,
- size_t _numBytesToPrint) :
+ size_t _numBytesToPrint,
+ const vector<uint8_t> &_pmuEventConfig,
+ const uint32_t pmuCycleCounterEnable) :
name(_name),
networkModel(_networkModel), input(_input), output(_output), expectedOutput(_expectedOutput),
- numBytesToPrint(_numBytesToPrint) {}
+ numBytesToPrint(_numBytesToPrint), pmuEventConfig(_pmuEventConfig), pmuCycleCounterEnable(pmuCycleCounterEnable),
+ pmuEventCount(), pmuCycleCounterCount(0) {
+#if defined(INFERENCE_PROC_TFLU_PROFILER) && defined(ETHOSU)
+ pmuEventCount = vector<uint32_t>(ETHOSU_PMU_NCOUNTERS, 0);
+#endif
+}
void InferenceJob::invalidate() {
networkModel.invalidate();
@@ -183,6 +191,9 @@ bool InferenceProcess::push(const InferenceJob &job) {
bool InferenceProcess::runJob(InferenceJob &job) {
printf("Running inference job: %s\n", job.name.c_str());
+ // Register debug log callback for profiling
+ RegisterDebugLogCallback(tflu_debug_log);
+
tflite::MicroErrorReporter microErrorReporter;
tflite::ErrorReporter *reporter = &microErrorReporter;
@@ -197,7 +208,17 @@ bool InferenceProcess::runJob(InferenceJob &job) {
// Create the TFL micro interpreter
tflite::AllOpsResolver resolver;
- tflite::MicroInterpreter interpreter(model, resolver, inferenceProcessTensorArena, TENSOR_ARENA_SIZE, reporter);
+ tflite::MicroProfiler profiler(reporter);
+
+#if defined(INFERENCE_PROC_TFLU_PROFILER) && defined(ETHOSU)
+ profiler.MonitorEthosuPMUEvents(ethosu_pmu_event_type(job.pmuEventConfig[0]),
+ ethosu_pmu_event_type(job.pmuEventConfig[1]),
+ ethosu_pmu_event_type(job.pmuEventConfig[2]),
+ ethosu_pmu_event_type(job.pmuEventConfig[3]));
+#endif
+
+ tflite::MicroInterpreter interpreter(
+ model, resolver, inferenceProcessTensorArena, TENSOR_ARENA_SIZE, reporter, &profiler);
// Allocate tensors
TfLiteStatus allocate_status = interpreter.AllocateTensors();
@@ -240,9 +261,6 @@ bool InferenceProcess::runJob(InferenceJob &job) {
copy(static_cast<char *>(input.data), static_cast<char *>(input.data) + input.size, tensor->data.uint8);
}
- // Register debug log callback for profiling
- RegisterDebugLogCallback(tflu_debug_log);
-
// Run the inference
TfLiteStatus invoke_status = interpreter.Invoke();
if (invoke_status != kTfLiteOk) {
@@ -250,6 +268,22 @@ bool InferenceProcess::runJob(InferenceJob &job) {
return true;
}
+ printf("%s : %zu\r\n", "arena_used_bytes", interpreter.arena_used_bytes());
+
+#ifdef INFERENCE_PROC_TFLU_PROFILER
+ printf("Inference runtime: %u cycles\r\n", (unsigned int)profiler.TotalInferenceTime());
+
+ if (job.pmuCycleCounterEnable != 0) {
+ job.pmuCycleCounterCount = profiler.TotalInferenceTime();
+ }
+
+#ifdef ETHOSU
+ for (uint32_t i = 0; i < ETHOSU_PMU_NCOUNTERS; i++) {
+ job.pmuEventCount[i] = profiler.GetEthosuPMUCounter(i);
+ }
+#endif
+#endif
+
// Copy output data
if (job.output.size() > 0) {
if (interpreter.outputs_size() != job.output.size()) {
@@ -285,7 +319,7 @@ bool InferenceProcess::runJob(InferenceJob &job) {
if (job.expectedOutput.size() > 0) {
if (job.expectedOutput.size() != interpreter.outputs_size()) {
- printf("Expeded number of output tensors does not match network. job=%s, expected=%zu, network=%zu\n",
+ printf("Expected number of output tensors does not match network. job=%s, expected=%zu, network=%zu\n",
job.name.c_str(),
job.expectedOutput.size(),
interpreter.outputs_size());
diff --git a/applications/message_process/include/message_process.hpp b/applications/message_process/include/message_process.hpp
index 602c2a4..96da30e 100644
--- a/applications/message_process/include/message_process.hpp
+++ b/applications/message_process/include/message_process.hpp
@@ -96,7 +96,13 @@ public:
void run();
bool handleMessage();
void sendPong();
- void sendInferenceRsp(uint64_t userArg, std::vector<InferenceProcess::DataPtr> &ofm, bool failed);
+ void sendInferenceRsp(uint64_t userArg,
+ std::vector<InferenceProcess::DataPtr> &ofm,
+ bool failed,
+ std::vector<uint8_t> &pmuEventConfig,
+ uint32_t pmuCycleCounterEnable,
+ std::vector<uint32_t> &pmuEventCount,
+ uint64_t pmuCycleCounterCount);
private:
QueueImpl queueIn;
diff --git a/applications/message_process/src/message_process.cc b/applications/message_process/src/message_process.cc
index c890399..8a8eec7 100644
--- a/applications/message_process/src/message_process.cc
+++ b/applications/message_process/src/message_process.cc
@@ -240,13 +240,25 @@ bool MessageProcess::handleMessage() {
vector<DataPtr> expectedOutput;
- InferenceJob job("job", networkModel, ifm, ofm, expectedOutput, -1);
+ vector<uint8_t> pmuEventConfig;
+ for (uint32_t i = 0; i < ETHOSU_CORE_PMU_MAX; i++) {
+ pmuEventConfig[i] = req.pmu_event_config[i];
+ }
+
+ InferenceJob job(
+ "job", networkModel, ifm, ofm, expectedOutput, -1, pmuEventConfig, req.pmu_cycle_counter_enable);
job.invalidate();
bool failed = inferenceProcess.runJob(job);
job.clean();
- sendInferenceRsp(req.user_arg, job.output, failed);
+ sendInferenceRsp(req.user_arg,
+ job.output,
+ failed,
+ job.pmuEventConfig,
+ job.pmuCycleCounterEnable,
+ job.pmuEventCount,
+ job.pmuCycleCounterCount);
break;
}
default: {
@@ -266,8 +278,19 @@ void MessageProcess::sendPong() {
mailbox.sendMessage();
}
-void MessageProcess::sendInferenceRsp(uint64_t userArg, vector<DataPtr> &ofm, bool failed) {
- ethosu_core_inference_rsp rsp;
+void MessageProcess::sendInferenceRsp(uint64_t userArg,
+ vector<DataPtr> &ofm,
+ bool failed,
+ vector<uint8_t> &pmuEventConfig,
+ uint32_t pmuCycleCounterEnable,
+ vector<uint32_t> &pmuEventCount,
+ uint64_t pmuCycleCounterCount) {
+ ethosu_core_inference_rsp rsp = {
+ .pmu_event_count =
+ {
+ 0,
+ },
+ };
rsp.user_arg = userArg;
rsp.ofm_count = ofm.size();
@@ -277,6 +300,15 @@ void MessageProcess::sendInferenceRsp(uint64_t userArg, vector<DataPtr> &ofm, bo
rsp.ofm_size[i] = ofm[i].size;
}
+ for (size_t i = 0; i < pmuEventConfig.size(); i++) {
+ rsp.pmu_event_config[i] = pmuEventConfig[i];
+ }
+ rsp.pmu_cycle_counter_enable = pmuCycleCounterEnable;
+ for (size_t i = 0; i < pmuEventCount.size(); i++) {
+ rsp.pmu_event_count[i] = pmuEventCount[i];
+ }
+ rsp.pmu_cycle_counter_count = pmuCycleCounterCount;
+
printf("Sending inference response. userArg=0x%" PRIx64 ", ofm_count=%" PRIu32 ", status=%" PRIu32 "\n",
rsp.user_arg,
rsp.ofm_count,