aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKristofer Jonsson <kristofer.jonsson@arm.com>2021-12-21 16:25:19 +0100
committerKristofer Jonsson <kristofer.jonsson@arm.com>2022-01-27 12:37:31 +0100
commitdcc1ce0f9b64198f3bb82d9858ec79663808a35f (patch)
tree17be381106669a64ad3c6e05b00691a3b5a30f37
parent40d886e7d3facda35558448f408fa99ab61a5963 (diff)
downloadethos-u-core-software-dcc1ce0f9b64198f3bb82d9858ec79663808a35f.tar.gz
Inference process update
Split runJob() into smaller functions to make the code easier to read and maintain. Correct return type of Layer By Layer profiler GetTotalTicks(). Change-Id: Ie414522017b3b6c6da9a09182439dbe4b2efdb1f
-rw-r--r--applications/inference_process/include/inference_process.hpp18
-rw-r--r--applications/inference_process/src/inference_process.cpp279
-rw-r--r--lib/layer_by_layer_profiler/include/layer_by_layer_profiler.hpp2
-rw-r--r--lib/layer_by_layer_profiler/src/layer_by_layer_profiler.cpp20
4 files changed, 193 insertions, 126 deletions
diff --git a/applications/inference_process/include/inference_process.hpp b/applications/inference_process/include/inference_process.hpp
index b8b2775..6ab453c 100644
--- a/applications/inference_process/include/inference_process.hpp
+++ b/applications/inference_process/include/inference_process.hpp
@@ -24,6 +24,14 @@
#include <string>
#include <vector>
+struct TfLiteTensor;
+
+namespace tflite {
+// Forward declarations
+class MicroInterpreter;
+class MicroResourceVariables;
+} // namespace tflite
+
namespace InferenceProcess {
struct DataPtr {
void *data;
@@ -33,6 +41,9 @@ struct DataPtr {
void invalidate();
void clean();
+
+ char *begin() const;
+ char *end() const;
};
struct InferenceJob {
@@ -68,6 +79,13 @@ public:
bool runJob(InferenceJob &job);
private:
+ static bool copyIfm(InferenceJob &job, tflite::MicroInterpreter &interpreter);
+ static bool copyOfm(InferenceJob &job, tflite::MicroInterpreter &interpreter);
+ static bool compareOfm(InferenceJob &job, tflite::MicroInterpreter &interpreter);
+ static void printJob(InferenceJob &job, tflite::MicroInterpreter &interpreter);
+ static void printOutputTensor(TfLiteTensor *output, size_t bytesToPrint);
+ static void tfluDebugLog(const char *s);
+
uint8_t *tensorArena;
const size_t tensorArenaSize;
};
diff --git a/applications/inference_process/src/inference_process.cpp b/applications/inference_process/src/inference_process.cpp
index 499cdd3..ebd9d6c 100644
--- a/applications/inference_process/src/inference_process.cpp
+++ b/applications/inference_process/src/inference_process.cpp
@@ -39,10 +39,6 @@ using namespace std;
namespace {
-void tflu_debug_log(const char *s) {
- LOG("%s", s);
-}
-
class Crc {
public:
constexpr Crc() : table() {
@@ -79,55 +75,6 @@ private:
uint32_t table[256];
};
-void print_output_data(TfLiteTensor *output, size_t bytesToPrint) {
- constexpr auto crc = Crc();
- const uint32_t output_crc32 = crc.crc32(output->data.data, output->bytes);
- const int numBytesToPrint = min(output->bytes, bytesToPrint);
- int dims_size = output->dims->size;
- LOG("{\n");
- LOG("\"dims\": [%d,", dims_size);
- for (int i = 0; i < output->dims->size - 1; ++i) {
- LOG("%d,", output->dims->data[i]);
- }
- LOG("%d],\n", output->dims->data[dims_size - 1]);
- LOG("\"data_address\": \"%08" PRIx32 "\",\n", (uint32_t)output->data.data);
- if (numBytesToPrint) {
- LOG("\"crc32\": \"%08" PRIx32 "\",\n", output_crc32);
- LOG("\"data\":\"");
- for (int i = 0; i < numBytesToPrint - 1; ++i) {
- /*
- * Workaround an issue when compiling with GCC where by
- * printing only a '\n' the produced global output is wrong.
- */
- if (i % 15 == 0 && i != 0) {
- LOG("0x%02x,\n", output->data.uint8[i]);
- } else {
- LOG("0x%02x,", output->data.uint8[i]);
- }
- }
- LOG("0x%02x\"\n", output->data.uint8[numBytesToPrint - 1]);
- } else {
- LOG("\"crc32\": \"%08" PRIx32 "\"\n", output_crc32);
- }
- LOG("}");
-}
-
-bool copyOutput(const TfLiteTensor &src, InferenceProcess::DataPtr &dst) {
- if (dst.data == nullptr) {
- return false;
- }
-
- if (src.bytes > dst.size) {
- LOG_ERR("Tensor size mismatch (bytes): actual=%d, expected%d.", src.bytes, dst.size);
- return true;
- }
-
- copy(src.data.uint8, src.data.uint8 + src.bytes, static_cast<uint8_t *>(dst.data));
- dst.size = src.bytes;
-
- return false;
-}
-
} // namespace
namespace InferenceProcess {
@@ -145,6 +92,14 @@ void DataPtr::clean() {
#endif
}
+char *DataPtr::begin() const {
+ return static_cast<char *>(data);
+}
+
+char *DataPtr::end() const {
+ return static_cast<char *>(data) + size;
+}
+
InferenceJob::InferenceJob() : numBytesToPrint(0) {}
InferenceJob::InferenceJob(const string &_name,
@@ -199,10 +154,7 @@ bool InferenceProcess::runJob(InferenceJob &job) {
LOG_INFO("Running inference job: %s", job.name.c_str());
// Register debug log callback for profiling
- RegisterDebugLogCallback(tflu_debug_log);
-
- tflite::MicroErrorReporter microErrorReporter;
- tflite::ErrorReporter *reporter = &microErrorReporter;
+ RegisterDebugLogCallback(tfluDebugLog);
// Get model handle and verify that the version is correct
const tflite::Model *model = ::tflite::GetModel(job.networkModel.data);
@@ -221,15 +173,57 @@ bool InferenceProcess::runJob(InferenceJob &job) {
tflite::ArmProfiler profiler;
#endif
- tflite::MicroInterpreter interpreter(model, resolver, tensorArena, tensorArenaSize, reporter, nullptr, &profiler);
+ tflite::MicroErrorReporter errorReporter;
+ tflite::MicroInterpreter interpreter(
+ model, resolver, tensorArena, tensorArenaSize, &errorReporter, nullptr, &profiler);
// Allocate tensors
- TfLiteStatus allocate_status = interpreter.AllocateTensors();
- if (allocate_status != kTfLiteOk) {
+ TfLiteStatus status = interpreter.AllocateTensors();
+ if (status != kTfLiteOk) {
LOG_ERR("Failed to allocate tensors for inference: job=%s", job.name.c_str());
return true;
}
+ // Copy IFM data from job descriptor to TFLu arena
+ if (copyIfm(job, interpreter)) {
+ return true;
+ }
+
+ // Run the inference
+ status = interpreter.Invoke();
+ if (status != kTfLiteOk) {
+ LOG_ERR("Invoke failed for inference: job=%s", job.name.c_str());
+ return true;
+ }
+
+#ifdef LAYER_BY_LAYER_PROFILER
+ if (job.pmuCycleCounterEnable) {
+ job.pmuCycleCounterCount = profiler.GetPmuCycleCounterCount();
+ }
+
+ job.pmuEventCount.assign(profiler.GetPmuEventCount().begin(), profiler.GetPmuEventCount().end());
+#endif
+
+ LOG("Inference runtime: %" PRId32 " cycles\n", profiler.GetTotalTicks());
+
+ // Copy output data from TFLu arena to job descriptor
+ if (copyOfm(job, interpreter)) {
+ return true;
+ }
+
+ printJob(job, interpreter);
+
+ // Compare the OFM with the expected reference data
+ if (compareOfm(job, interpreter)) {
+ return true;
+ }
+
+ LOG_INFO("Finished running job: %s", job.name.c_str());
+
+ return false;
+}
+
+bool InferenceProcess::copyIfm(InferenceJob &job, tflite::MicroInterpreter &interpreter) {
// Create a filtered list of non empty input tensors
vector<TfLiteTensor *> inputTensors;
for (size_t i = 0; i < interpreter.inputs_size(); ++i) {
@@ -239,6 +233,7 @@ bool InferenceProcess::runJob(InferenceJob &job) {
inputTensors.push_back(tensor);
}
}
+
if (job.input.size() != inputTensors.size()) {
LOG_ERR("Number of input buffers does not match number of non empty network tensors: input=%zu, network=%zu",
job.input.size(),
@@ -246,10 +241,10 @@ bool InferenceProcess::runJob(InferenceJob &job) {
return true;
}
- // Copy input data
+ // Copy input data from job to TFLu arena
for (size_t i = 0; i < inputTensors.size(); ++i) {
- const DataPtr &input = job.input[i];
- const TfLiteTensor *tensor = inputTensors[i];
+ DataPtr &input = job.input[i];
+ TfLiteTensor *tensor = inputTensors[i];
if (input.size != tensor->bytes) {
LOG_ERR("Job input size does not match network input size: job=%s, index=%zu, input=%zu, network=%u",
@@ -260,96 +255,150 @@ bool InferenceProcess::runJob(InferenceJob &job) {
return true;
}
- copy(static_cast<char *>(input.data), static_cast<char *>(input.data) + input.size, tensor->data.uint8);
+ copy(input.begin(), input.end(), tensor->data.uint8);
}
- // Run the inference
- TfLiteStatus invoke_status = interpreter.Invoke();
- if (invoke_status != kTfLiteOk) {
- LOG_ERR("Invoke failed for inference: job=%s", job.name.c_str());
+ return false;
+}
+
+bool InferenceProcess::copyOfm(InferenceJob &job, tflite::MicroInterpreter &interpreter) {
+ // Skip copy if output is empty
+ if (job.output.empty()) {
+ return false;
+ }
+
+ if (interpreter.outputs_size() != job.output.size()) {
+ LOG_ERR("Output size mismatch: job=%zu, network=%u", job.output.size(), interpreter.outputs_size());
return true;
}
- LOG("arena_used_bytes : %zu\n", interpreter.arena_used_bytes());
+ for (unsigned i = 0; i < interpreter.outputs_size(); ++i) {
+ DataPtr &output = job.output[i];
+ TfLiteTensor *tensor = interpreter.output(i);
- LOG("Inference runtime: %u cycles\n", (unsigned int)profiler.GetTotalTicks());
+ if (tensor->bytes > output.size) {
+ LOG_ERR("Tensor size mismatch: tensor=%d, expected=%d", tensor->bytes, output.size);
+ return true;
+ }
-#ifdef LAYER_BY_LAYER_PROFILER
- if (job.pmuCycleCounterEnable) {
- job.pmuCycleCounterCount = profiler.GetPmuCycleCounterCount();
+ copy(tensor->data.uint8, tensor->data.uint8 + tensor->bytes, output.begin());
}
- job.pmuEventCount.assign(profiler.GetPmuEventCount().begin(), profiler.GetPmuEventCount().end());
-#endif
- // Copy output data
- if (job.output.size() > 0) {
- if (interpreter.outputs_size() != job.output.size()) {
- LOG_ERR("Output size mismatch: job=%zu, network=%u", job.output.size(), interpreter.outputs_size());
+ return false;
+}
+
+bool InferenceProcess::compareOfm(InferenceJob &job, tflite::MicroInterpreter &interpreter) {
+ // Skip verification if expected output is empty
+ if (job.expectedOutput.empty()) {
+ return false;
+ }
+
+ if (job.expectedOutput.size() != interpreter.outputs_size()) {
+ LOG_ERR("Expected number of output tensors mismatch: job=%s, expected=%zu, network=%zu",
+ job.name.c_str(),
+ job.expectedOutput.size(),
+ interpreter.outputs_size());
+ return true;
+ }
+
+ for (unsigned int i = 0; i < interpreter.outputs_size(); i++) {
+ const DataPtr &expected = job.expectedOutput[i];
+ const TfLiteTensor *output = interpreter.output(i);
+
+ if (expected.size != output->bytes) {
+ LOG_ERR("Expected output tensor size mismatch: job=%s, index=%u, expected=%zu, network=%zu",
+ job.name.c_str(),
+ i,
+ expected.size,
+ output->bytes);
return true;
}
- for (unsigned i = 0; i < interpreter.outputs_size(); ++i) {
- if (copyOutput(*interpreter.output(i), job.output[i])) {
+ const char *exp = expected.begin();
+ for (unsigned int j = 0; j < output->bytes; ++j) {
+ if (output->data.uint8[j] != exp[j]) {
+ LOG_ERR("Expected output tensor data mismatch: job=%s, index=%u, offset=%u, "
+ "expected=%02x, network=%02x\n",
+ job.name.c_str(),
+ i,
+ j,
+ exp[j],
+ output->data.uint8[j]);
return true;
}
}
}
+ return false;
+}
+
+void InferenceProcess::printJob(InferenceJob &job, tflite::MicroInterpreter &interpreter) {
+ for (size_t i = 0; i < job.pmuEventCount.size(); i++) {
+ LOG("ethosu_pmu_cntr%zu : %" PRIu32 "\n", i, job.pmuEventCount[i]);
+ }
+
+ LOG("arena_used_bytes : %zu\n", interpreter.arena_used_bytes());
+
// Print all of the output data, or the first NUM_BYTES_TO_PRINT bytes,
// whichever comes first as well as the output shape.
LOG("num_of_outputs: %d\n", interpreter.outputs_size());
LOG("output_begin\n");
LOG("[\n");
+
for (unsigned int i = 0; i < interpreter.outputs_size(); i++) {
- TfLiteTensor *output = interpreter.output(i);
- print_output_data(output, job.numBytesToPrint);
+ printOutputTensor(interpreter.output(i), job.numBytesToPrint);
+
if (i != interpreter.outputs_size() - 1) {
LOG(",\n");
}
}
+
LOG("]\n");
LOG("output_end\n");
+}
- if (job.expectedOutput.size() > 0) {
- if (job.expectedOutput.size() != interpreter.outputs_size()) {
- LOG_ERR("Expected number of output tensors mismatch: job=%s, expected=%zu, network=%zu",
- job.name.c_str(),
- job.expectedOutput.size(),
- interpreter.outputs_size());
- return true;
- }
+void InferenceProcess::printOutputTensor(TfLiteTensor *output, size_t bytesToPrint) {
+ constexpr auto crc = Crc();
+ const uint32_t crc32 = crc.crc32(output->data.data, output->bytes);
+ const int numBytesToPrint = min(output->bytes, bytesToPrint);
+ int dims_size = output->dims->size;
- for (unsigned int i = 0; i < interpreter.outputs_size(); i++) {
- const DataPtr &expected = job.expectedOutput[i];
- const TfLiteTensor *output = interpreter.output(i);
+ LOG("{\n");
+ LOG("\"dims\": [%d,", dims_size);
- if (expected.size != output->bytes) {
- LOG_ERR("Expected output tensor size mismatch: job=%s, index=%u, expected=%zu, network=%zu",
- job.name.c_str(),
- i,
- expected.size,
- output->bytes);
- return true;
- }
+ for (int i = 0; i < output->dims->size - 1; ++i) {
+ LOG("%d,", output->dims->data[i]);
+ }
- for (unsigned int j = 0; j < output->bytes; ++j) {
- if (output->data.uint8[j] != static_cast<uint8_t *>(expected.data)[j]) {
- LOG_ERR("Expected output tensor data mismatch: job=%s, index=%u, offset=%u, "
- "expected=%02x, network=%02x\n",
- job.name.c_str(),
- i,
- j,
- static_cast<uint8_t *>(expected.data)[j],
- output->data.uint8[j]);
- return true;
- }
+ LOG("%d],\n", output->dims->data[dims_size - 1]);
+ LOG("\"data_address\": \"%08" PRIx32 "\",\n", (uint32_t)output->data.data);
+
+ if (numBytesToPrint) {
+ LOG("\"crc32\": \"%08" PRIx32 "\",\n", crc32);
+ LOG("\"data\":\"");
+
+ for (int i = 0; i < numBytesToPrint - 1; ++i) {
+ /*
+ * Workaround an issue when compiling with GCC where by
+ * printing only a '\n' the produced global output is wrong.
+ */
+ if (i % 15 == 0 && i != 0) {
+ LOG("0x%02x,\n", output->data.uint8[i]);
+ } else {
+ LOG("0x%02x,", output->data.uint8[i]);
}
}
+
+ LOG("0x%02x\"\n", output->data.uint8[numBytesToPrint - 1]);
+ } else {
+ LOG("\"crc32\": \"%08" PRIx32 "\"\n", crc32);
}
- LOG_INFO("Finished running job: %s", job.name.c_str());
+ LOG("}");
+}
- return false;
+void InferenceProcess::tfluDebugLog(const char *s) {
+ LOG("%s", s);
}
} // namespace InferenceProcess
diff --git a/lib/layer_by_layer_profiler/include/layer_by_layer_profiler.hpp b/lib/layer_by_layer_profiler/include/layer_by_layer_profiler.hpp
index 37bd868..a547576 100644
--- a/lib/layer_by_layer_profiler/include/layer_by_layer_profiler.hpp
+++ b/lib/layer_by_layer_profiler/include/layer_by_layer_profiler.hpp
@@ -38,7 +38,7 @@ public:
int32_t event_id = EventID(EventLevelError, EvtStatistics_No, EventRecordNone));
uint32_t BeginEvent(const char *tag);
void EndEvent(uint32_t event_handle);
- uint64_t GetTotalTicks() const;
+ int32_t GetTotalTicks() const;
void Log() const;
uint64_t GetPmuCycleCounterCount() const;
diff --git a/lib/layer_by_layer_profiler/src/layer_by_layer_profiler.cpp b/lib/layer_by_layer_profiler/src/layer_by_layer_profiler.cpp
index a5b8e5a..a4f67d6 100644
--- a/lib/layer_by_layer_profiler/src/layer_by_layer_profiler.cpp
+++ b/lib/layer_by_layer_profiler/src/layer_by_layer_profiler.cpp
@@ -40,13 +40,13 @@ uint64_t GetCurrentEthosuTicks(struct ethosu_driver *drv) {
namespace tflite {
LayerByLayerProfiler::LayerByLayerProfiler(const std::vector<uint8_t> &event_config,
- bool pmu_cycle_counter_enable,
+ bool _pmu_cycle_counter_enable,
size_t max_events,
- Backend backend,
- int32_t event_id) :
+ Backend _backend,
+ int32_t _event_id) :
pmu_event_config(event_config),
- pmu_event_count(), pmu_cycle_counter_enable(pmu_cycle_counter_enable), pmu_cycle_counter_count(0),
- max_events_(max_events), backend(backend), event_id(event_id), num_events_(0) {
+ pmu_event_count(), pmu_cycle_counter_enable(_pmu_cycle_counter_enable), pmu_cycle_counter_count(0),
+ max_events_(max_events), backend(_backend), event_id(_event_id), num_events_(0) {
tags_ = std::make_unique<const char *[]>(max_events);
start_ticks_ = std::make_unique<uint64_t[]>(max_events);
@@ -67,7 +67,7 @@ uint32_t LayerByLayerProfiler::BeginEvent(const char *tag) {
size_t numEventCounters = ETHOSU_PMU_Get_NumEventCounters();
if (pmu_event_config.size() > numEventCounters) {
- LOG_WARN("PMU event config list is bigger (%lu) than available PMU event counters (%lu)",
+ LOG_WARN("PMU event config list is bigger (%zu) than available PMU event counters (%zu)",
pmu_event_config.size(),
numEventCounters);
LOG_WARN("PMU event config list will be truncated");
@@ -135,7 +135,7 @@ void LayerByLayerProfiler::EndEvent(uint32_t event_handle) {
if (backend == PRINTF) {
if (strcmp("ethos-u", tags_[event_handle]) == 0) {
for (size_t i = 0; i < pmu_event_count.size(); i++) {
- LOG("ethos-u : ethosu_pmu_cntr%lu : %u\n", i, pmu_event_count[i]);
+ LOG("ethos-u : ethosu_pmu_cntr%zu : %" PRIu32 "\n", i, pmu_event_count[i]);
}
LOG("ethos-u : cycle_cnt : %" PRIu64 " cycles\n", pmu_cycle_counter_count);
} else {
@@ -148,11 +148,11 @@ void LayerByLayerProfiler::EndEvent(uint32_t event_handle) {
}
}
-uint64_t LayerByLayerProfiler::GetTotalTicks() const {
- uint64_t ticks = 0;
+int32_t LayerByLayerProfiler::GetTotalTicks() const {
+ int32_t ticks = 0;
for (size_t i = 0; i < num_events_; ++i) {
- ticks += end_ticks_[i] - start_ticks_[i];
+ ticks += static_cast<int32_t>(end_ticks_[i] - start_ticks_[i]);
}
return ticks;