From 895fda86af1daf86304499f3b74a6b930192395a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Per=20=C3=85strand?= Date: Thu, 7 Jan 2021 14:14:14 +0100 Subject: Rename cplusplus files for consistency Change-Id: I908ea337ffb7229a33af3364b5374209c7e9df54 --- applications/inference_process/CMakeLists.txt | 2 +- .../inference_process/src/inference_process.cc | 393 --------------------- .../inference_process/src/inference_process.cpp | 393 +++++++++++++++++++++ applications/message_process/CMakeLists.txt | 2 +- .../message_process/src/message_process.cc | 333 ----------------- .../message_process/src/message_process.cpp | 333 +++++++++++++++++ 6 files changed, 728 insertions(+), 728 deletions(-) delete mode 100644 applications/inference_process/src/inference_process.cc create mode 100644 applications/inference_process/src/inference_process.cpp delete mode 100644 applications/message_process/src/message_process.cc create mode 100644 applications/message_process/src/message_process.cpp (limited to 'applications') diff --git a/applications/inference_process/CMakeLists.txt b/applications/inference_process/CMakeLists.txt index ba9e44b..3348d28 100644 --- a/applications/inference_process/CMakeLists.txt +++ b/applications/inference_process/CMakeLists.txt @@ -23,7 +23,7 @@ add_library(inference_process STATIC) target_include_directories(inference_process PUBLIC include PRIVATE ${TENSORFLOW_PATH} ${TENSORFLOW_PATH}/tensorflow/lite/micro/tools/make/downloads/flatbuffers/include) target_link_libraries(inference_process PUBLIC tflu cmsis_core cmsis_device) -target_sources(inference_process PRIVATE src/inference_process.cc) +target_sources(inference_process PRIVATE src/inference_process.cpp) # Set arena size target_compile_definitions(inference_process PRIVATE TENSOR_ARENA_SIZE=${TR_ARENA_SIZE}) diff --git a/applications/inference_process/src/inference_process.cc b/applications/inference_process/src/inference_process.cc deleted file mode 100644 index b5ed5c4..0000000 --- a/applications/inference_process/src/inference_process.cc +++ /dev/null @@ -1,393 +0,0 @@ -/* - * Copyright (c) 2019-2020 Arm Limited. All rights reserved. - * - * SPDX-License-Identifier: Apache-2.0 - * - * Licensed under the Apache License, Version 2.0 (the License); you may - * not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an AS IS BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "tensorflow/lite/micro/all_ops_resolver.h" -#include "tensorflow/lite/micro/cortex_m_generic/debug_log_callback.h" -#include "tensorflow/lite/micro/micro_error_reporter.h" -#include "tensorflow/lite/micro/micro_interpreter.h" -#include "tensorflow/lite/micro/micro_profiler.h" -#include "tensorflow/lite/schema/schema_generated.h" -#include "tensorflow/lite/version.h" - -#include "inference_process.hpp" - -#include "cmsis_compiler.h" - -#include - -#ifndef TENSOR_ARENA_SIZE -#define TENSOR_ARENA_SIZE (1024) -#endif - -using namespace std; - -__attribute__((section(".bss.NoInit"), aligned(16))) uint8_t inferenceProcessTensorArena[TENSOR_ARENA_SIZE]; - -namespace { - -void tflu_debug_log(const char *s) { - fprintf(stderr, "%s", s); -} - -void print_output_data(TfLiteTensor *output, size_t bytesToPrint) { - const int numBytesToPrint = min(output->bytes, bytesToPrint); - - int dims_size = output->dims->size; - printf("{\n"); - printf("\"dims\": [%d,", dims_size); - for (int i = 0; i < output->dims->size - 1; ++i) { - printf("%d,", output->dims->data[i]); - } - printf("%d],\n", output->dims->data[dims_size - 1]); - - printf("\"data_address\": \"%08" PRIx32 "\",\n", (uint32_t)output->data.data); - printf("\"data\":\""); - for (int i = 0; i < numBytesToPrint - 1; ++i) { - if (i % 16 == 0 && i != 0) { - printf("\n"); - } - printf("0x%02x,", output->data.uint8[i]); - } - printf("0x%02x\"\n", output->data.uint8[numBytesToPrint - 1]); - printf("}"); -} - -bool copyOutput(const TfLiteTensor &src, InferenceProcess::DataPtr &dst) { - if (dst.data == nullptr) { - return false; - } - - if (src.bytes > dst.size) { - printf("Tensor size %d does not match output size %d.\n", src.bytes, dst.size); - return true; - } - - copy(src.data.uint8, src.data.uint8 + src.bytes, static_cast(dst.data)); - dst.size = src.bytes; - - return false; -} - -} // namespace - -namespace InferenceProcess { -DataPtr::DataPtr(void *_data, size_t _size) : data(_data), size(_size) {} - -void DataPtr::invalidate() { -#if defined(__DCACHE_PRESENT) && (__DCACHE_PRESENT == 1U) - SCB_InvalidateDCache_by_Addr(reinterpret_cast(data), size); -#endif -} - -void DataPtr::clean() { -#if defined(__DCACHE_PRESENT) && (__DCACHE_PRESENT == 1U) - SCB_CleanDCache_by_Addr(reinterpret_cast(data), size); -#endif -} - -InferenceJob::InferenceJob() : numBytesToPrint(0) {} - -InferenceJob::InferenceJob(const string &_name, - const DataPtr &_networkModel, - const vector &_input, - const vector &_output, - const vector &_expectedOutput, - size_t _numBytesToPrint, - const vector &_pmuEventConfig, - const uint32_t _pmuCycleCounterEnable) : - name(_name), - networkModel(_networkModel), input(_input), output(_output), expectedOutput(_expectedOutput), - numBytesToPrint(_numBytesToPrint), pmuEventConfig(_pmuEventConfig), pmuCycleCounterEnable(_pmuCycleCounterEnable), - pmuEventCount(), pmuCycleCounterCount(0) { -#if defined(INFERENCE_PROC_TFLU_PROFILER) && defined(ETHOSU) - pmuEventCount = vector(ETHOSU_PMU_NCOUNTERS, 0); -#endif -} - -void InferenceJob::invalidate() { - networkModel.invalidate(); - - for (auto &it : input) { - it.invalidate(); - } - - for (auto &it : output) { - it.invalidate(); - } - - for (auto &it : expectedOutput) { - it.invalidate(); - } -} - -void InferenceJob::clean() { - networkModel.clean(); - - for (auto &it : input) { - it.clean(); - } - - for (auto &it : output) { - it.clean(); - } - - for (auto &it : expectedOutput) { - it.clean(); - } -} - -InferenceProcess::InferenceProcess() : lock(0) {} - -// NOTE: Adding code for get_lock & free_lock with some corrections from -// http://infocenter.arm.com/help/index.jsp?topic=/com.arm.doc.dai0321a/BIHEJCHB.html -// TODO: check correctness? -void InferenceProcess::getLock() { - int status = 0; - - do { - // Wait until lock_var is free - while (__LDREXW(&lock) != 0) - ; - - // Try to set lock_var - status = __STREXW(1, &lock); - } while (status != 0); - - // Do not start any other memory access until memory barrier is completed - __DMB(); -} - -// TODO: check correctness? -void InferenceProcess::freeLock() { - // Ensure memory operations completed before releasing lock - __DMB(); - - lock = 0; -} - -bool InferenceProcess::push(const InferenceJob &job) { - getLock(); - inferenceJobQueue.push(job); - freeLock(); - - return true; -} - -bool InferenceProcess::runJob(InferenceJob &job) { - printf("Running inference job: %s\n", job.name.c_str()); - - // Register debug log callback for profiling - RegisterDebugLogCallback(tflu_debug_log); - - tflite::MicroErrorReporter microErrorReporter; - tflite::ErrorReporter *reporter = µErrorReporter; - - // Get model handle and verify that the version is correct - const tflite::Model *model = ::tflite::GetModel(job.networkModel.data); - if (model->version() != TFLITE_SCHEMA_VERSION) { - printf("Model provided is schema version %" PRIu32 " not equal to supported version %d.\n", - model->version(), - TFLITE_SCHEMA_VERSION); - return true; - } - - // Create the TFL micro interpreter - tflite::AllOpsResolver resolver; - tflite::MicroProfiler profiler(reporter); - -#if defined(INFERENCE_PROC_TFLU_PROFILER) && defined(ETHOSU) - profiler.MonitorEthosuPMUEvents(ethosu_pmu_event_type(job.pmuEventConfig[0]), - ethosu_pmu_event_type(job.pmuEventConfig[1]), - ethosu_pmu_event_type(job.pmuEventConfig[2]), - ethosu_pmu_event_type(job.pmuEventConfig[3])); -#endif - - tflite::MicroInterpreter interpreter( - model, resolver, inferenceProcessTensorArena, TENSOR_ARENA_SIZE, reporter, &profiler); - - // Allocate tensors - TfLiteStatus allocate_status = interpreter.AllocateTensors(); - if (allocate_status != kTfLiteOk) { - printf("AllocateTensors failed for inference job: %s\n", job.name.c_str()); - return true; - } - - // Create a filtered list of non empty input tensors - vector inputTensors; - for (size_t i = 0; i < interpreter.inputs_size(); ++i) { - TfLiteTensor *tensor = interpreter.input(i); - - if (tensor->bytes > 0) { - inputTensors.push_back(tensor); - } - } - - if (job.input.size() != inputTensors.size()) { - printf("Number of input buffers does not match number of non empty network tensors. input=%zu, network=%zu\n", - job.input.size(), - inputTensors.size()); - return true; - } - - // Copy input data - for (size_t i = 0; i < inputTensors.size(); ++i) { - const DataPtr &input = job.input[i]; - const TfLiteTensor *tensor = inputTensors[i]; - - if (input.size != tensor->bytes) { - printf("Input size does not match network size. job=%s, index=%zu, input=%zu, network=%u\n", - job.name.c_str(), - i, - input.size, - tensor->bytes); - return true; - } - - copy(static_cast(input.data), static_cast(input.data) + input.size, tensor->data.uint8); - } - - // Run the inference - TfLiteStatus invoke_status = interpreter.Invoke(); - if (invoke_status != kTfLiteOk) { - printf("Invoke failed for inference job: %s\n", job.name.c_str()); - return true; - } - - printf("%s : %zu\r\n", "arena_used_bytes", interpreter.arena_used_bytes()); - -#ifdef INFERENCE_PROC_TFLU_PROFILER - printf("Inference runtime: %u cycles\r\n", (unsigned int)profiler.TotalInferenceTime()); - - if (job.pmuCycleCounterEnable != 0) { - job.pmuCycleCounterCount = profiler.TotalInferenceTime(); - } - -#ifdef ETHOSU - for (uint32_t i = 0; i < ETHOSU_PMU_NCOUNTERS; i++) { - job.pmuEventCount[i] = profiler.GetEthosuPMUCounter(i); - } -#endif -#endif - - // Copy output data - if (job.output.size() > 0) { - if (interpreter.outputs_size() != job.output.size()) { - printf("Number of outputs mismatch. job=%zu, network=%u\n", job.output.size(), interpreter.outputs_size()); - return true; - } - - for (unsigned i = 0; i < interpreter.outputs_size(); ++i) { - if (copyOutput(*interpreter.output(i), job.output[i])) { - return true; - } - } - } - - if (job.numBytesToPrint > 0) { - // Print all of the output data, or the first NUM_BYTES_TO_PRINT bytes, - // whichever comes first as well as the output shape. - printf("num_of_outputs: %d\n", interpreter.outputs_size()); - printf("output_begin\n"); - printf("[\n"); - - for (unsigned int i = 0; i < interpreter.outputs_size(); i++) { - TfLiteTensor *output = interpreter.output(i); - print_output_data(output, job.numBytesToPrint); - if (i != interpreter.outputs_size() - 1) { - printf(",\n"); - } - } - - printf("]\n"); - printf("output_end\n"); - } - - if (job.expectedOutput.size() > 0) { - if (job.expectedOutput.size() != interpreter.outputs_size()) { - printf("Expected number of output tensors does not match network. job=%s, expected=%zu, network=%zu\n", - job.name.c_str(), - job.expectedOutput.size(), - interpreter.outputs_size()); - return true; - } - - for (unsigned int i = 0; i < interpreter.outputs_size(); i++) { - const DataPtr &expected = job.expectedOutput[i]; - const TfLiteTensor *output = interpreter.output(i); - - if (expected.size != output->bytes) { - printf( - "Expected tensor size does not match network size. job=%s, index=%u, expected=%zu, network=%zu\n", - job.name.c_str(), - i, - expected.size, - output->bytes); - return true; - } - - for (unsigned int j = 0; j < output->bytes; ++j) { - if (output->data.uint8[j] != static_cast(expected.data)[j]) { - printf("Expected tensor size does not match network size. job=%s, index=%u, offset=%u, " - "expected=%02x, network=%02x\n", - job.name.c_str(), - i, - j, - static_cast(expected.data)[j], - output->data.uint8[j]); - } - } - } - } - - printf("Finished running job: %s\n", job.name.c_str()); - - return false; -} - -bool InferenceProcess::run(bool exitOnEmpty) { - bool anyJobFailed = false; - - while (true) { - getLock(); - bool empty = inferenceJobQueue.empty(); - freeLock(); - - if (empty) { - if (exitOnEmpty) { - printf("Exit from InferenceProcess::run() on empty job queue!\n"); - break; - } - - continue; - } - - getLock(); - InferenceJob job = inferenceJobQueue.front(); - inferenceJobQueue.pop(); - freeLock(); - - if (runJob(job)) { - anyJobFailed = true; - continue; - } - } - - return anyJobFailed; -} - -} // namespace InferenceProcess diff --git a/applications/inference_process/src/inference_process.cpp b/applications/inference_process/src/inference_process.cpp new file mode 100644 index 0000000..b5ed5c4 --- /dev/null +++ b/applications/inference_process/src/inference_process.cpp @@ -0,0 +1,393 @@ +/* + * Copyright (c) 2019-2020 Arm Limited. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "tensorflow/lite/micro/all_ops_resolver.h" +#include "tensorflow/lite/micro/cortex_m_generic/debug_log_callback.h" +#include "tensorflow/lite/micro/micro_error_reporter.h" +#include "tensorflow/lite/micro/micro_interpreter.h" +#include "tensorflow/lite/micro/micro_profiler.h" +#include "tensorflow/lite/schema/schema_generated.h" +#include "tensorflow/lite/version.h" + +#include "inference_process.hpp" + +#include "cmsis_compiler.h" + +#include + +#ifndef TENSOR_ARENA_SIZE +#define TENSOR_ARENA_SIZE (1024) +#endif + +using namespace std; + +__attribute__((section(".bss.NoInit"), aligned(16))) uint8_t inferenceProcessTensorArena[TENSOR_ARENA_SIZE]; + +namespace { + +void tflu_debug_log(const char *s) { + fprintf(stderr, "%s", s); +} + +void print_output_data(TfLiteTensor *output, size_t bytesToPrint) { + const int numBytesToPrint = min(output->bytes, bytesToPrint); + + int dims_size = output->dims->size; + printf("{\n"); + printf("\"dims\": [%d,", dims_size); + for (int i = 0; i < output->dims->size - 1; ++i) { + printf("%d,", output->dims->data[i]); + } + printf("%d],\n", output->dims->data[dims_size - 1]); + + printf("\"data_address\": \"%08" PRIx32 "\",\n", (uint32_t)output->data.data); + printf("\"data\":\""); + for (int i = 0; i < numBytesToPrint - 1; ++i) { + if (i % 16 == 0 && i != 0) { + printf("\n"); + } + printf("0x%02x,", output->data.uint8[i]); + } + printf("0x%02x\"\n", output->data.uint8[numBytesToPrint - 1]); + printf("}"); +} + +bool copyOutput(const TfLiteTensor &src, InferenceProcess::DataPtr &dst) { + if (dst.data == nullptr) { + return false; + } + + if (src.bytes > dst.size) { + printf("Tensor size %d does not match output size %d.\n", src.bytes, dst.size); + return true; + } + + copy(src.data.uint8, src.data.uint8 + src.bytes, static_cast(dst.data)); + dst.size = src.bytes; + + return false; +} + +} // namespace + +namespace InferenceProcess { +DataPtr::DataPtr(void *_data, size_t _size) : data(_data), size(_size) {} + +void DataPtr::invalidate() { +#if defined(__DCACHE_PRESENT) && (__DCACHE_PRESENT == 1U) + SCB_InvalidateDCache_by_Addr(reinterpret_cast(data), size); +#endif +} + +void DataPtr::clean() { +#if defined(__DCACHE_PRESENT) && (__DCACHE_PRESENT == 1U) + SCB_CleanDCache_by_Addr(reinterpret_cast(data), size); +#endif +} + +InferenceJob::InferenceJob() : numBytesToPrint(0) {} + +InferenceJob::InferenceJob(const string &_name, + const DataPtr &_networkModel, + const vector &_input, + const vector &_output, + const vector &_expectedOutput, + size_t _numBytesToPrint, + const vector &_pmuEventConfig, + const uint32_t _pmuCycleCounterEnable) : + name(_name), + networkModel(_networkModel), input(_input), output(_output), expectedOutput(_expectedOutput), + numBytesToPrint(_numBytesToPrint), pmuEventConfig(_pmuEventConfig), pmuCycleCounterEnable(_pmuCycleCounterEnable), + pmuEventCount(), pmuCycleCounterCount(0) { +#if defined(INFERENCE_PROC_TFLU_PROFILER) && defined(ETHOSU) + pmuEventCount = vector(ETHOSU_PMU_NCOUNTERS, 0); +#endif +} + +void InferenceJob::invalidate() { + networkModel.invalidate(); + + for (auto &it : input) { + it.invalidate(); + } + + for (auto &it : output) { + it.invalidate(); + } + + for (auto &it : expectedOutput) { + it.invalidate(); + } +} + +void InferenceJob::clean() { + networkModel.clean(); + + for (auto &it : input) { + it.clean(); + } + + for (auto &it : output) { + it.clean(); + } + + for (auto &it : expectedOutput) { + it.clean(); + } +} + +InferenceProcess::InferenceProcess() : lock(0) {} + +// NOTE: Adding code for get_lock & free_lock with some corrections from +// http://infocenter.arm.com/help/index.jsp?topic=/com.arm.doc.dai0321a/BIHEJCHB.html +// TODO: check correctness? +void InferenceProcess::getLock() { + int status = 0; + + do { + // Wait until lock_var is free + while (__LDREXW(&lock) != 0) + ; + + // Try to set lock_var + status = __STREXW(1, &lock); + } while (status != 0); + + // Do not start any other memory access until memory barrier is completed + __DMB(); +} + +// TODO: check correctness? +void InferenceProcess::freeLock() { + // Ensure memory operations completed before releasing lock + __DMB(); + + lock = 0; +} + +bool InferenceProcess::push(const InferenceJob &job) { + getLock(); + inferenceJobQueue.push(job); + freeLock(); + + return true; +} + +bool InferenceProcess::runJob(InferenceJob &job) { + printf("Running inference job: %s\n", job.name.c_str()); + + // Register debug log callback for profiling + RegisterDebugLogCallback(tflu_debug_log); + + tflite::MicroErrorReporter microErrorReporter; + tflite::ErrorReporter *reporter = µErrorReporter; + + // Get model handle and verify that the version is correct + const tflite::Model *model = ::tflite::GetModel(job.networkModel.data); + if (model->version() != TFLITE_SCHEMA_VERSION) { + printf("Model provided is schema version %" PRIu32 " not equal to supported version %d.\n", + model->version(), + TFLITE_SCHEMA_VERSION); + return true; + } + + // Create the TFL micro interpreter + tflite::AllOpsResolver resolver; + tflite::MicroProfiler profiler(reporter); + +#if defined(INFERENCE_PROC_TFLU_PROFILER) && defined(ETHOSU) + profiler.MonitorEthosuPMUEvents(ethosu_pmu_event_type(job.pmuEventConfig[0]), + ethosu_pmu_event_type(job.pmuEventConfig[1]), + ethosu_pmu_event_type(job.pmuEventConfig[2]), + ethosu_pmu_event_type(job.pmuEventConfig[3])); +#endif + + tflite::MicroInterpreter interpreter( + model, resolver, inferenceProcessTensorArena, TENSOR_ARENA_SIZE, reporter, &profiler); + + // Allocate tensors + TfLiteStatus allocate_status = interpreter.AllocateTensors(); + if (allocate_status != kTfLiteOk) { + printf("AllocateTensors failed for inference job: %s\n", job.name.c_str()); + return true; + } + + // Create a filtered list of non empty input tensors + vector inputTensors; + for (size_t i = 0; i < interpreter.inputs_size(); ++i) { + TfLiteTensor *tensor = interpreter.input(i); + + if (tensor->bytes > 0) { + inputTensors.push_back(tensor); + } + } + + if (job.input.size() != inputTensors.size()) { + printf("Number of input buffers does not match number of non empty network tensors. input=%zu, network=%zu\n", + job.input.size(), + inputTensors.size()); + return true; + } + + // Copy input data + for (size_t i = 0; i < inputTensors.size(); ++i) { + const DataPtr &input = job.input[i]; + const TfLiteTensor *tensor = inputTensors[i]; + + if (input.size != tensor->bytes) { + printf("Input size does not match network size. job=%s, index=%zu, input=%zu, network=%u\n", + job.name.c_str(), + i, + input.size, + tensor->bytes); + return true; + } + + copy(static_cast(input.data), static_cast(input.data) + input.size, tensor->data.uint8); + } + + // Run the inference + TfLiteStatus invoke_status = interpreter.Invoke(); + if (invoke_status != kTfLiteOk) { + printf("Invoke failed for inference job: %s\n", job.name.c_str()); + return true; + } + + printf("%s : %zu\r\n", "arena_used_bytes", interpreter.arena_used_bytes()); + +#ifdef INFERENCE_PROC_TFLU_PROFILER + printf("Inference runtime: %u cycles\r\n", (unsigned int)profiler.TotalInferenceTime()); + + if (job.pmuCycleCounterEnable != 0) { + job.pmuCycleCounterCount = profiler.TotalInferenceTime(); + } + +#ifdef ETHOSU + for (uint32_t i = 0; i < ETHOSU_PMU_NCOUNTERS; i++) { + job.pmuEventCount[i] = profiler.GetEthosuPMUCounter(i); + } +#endif +#endif + + // Copy output data + if (job.output.size() > 0) { + if (interpreter.outputs_size() != job.output.size()) { + printf("Number of outputs mismatch. job=%zu, network=%u\n", job.output.size(), interpreter.outputs_size()); + return true; + } + + for (unsigned i = 0; i < interpreter.outputs_size(); ++i) { + if (copyOutput(*interpreter.output(i), job.output[i])) { + return true; + } + } + } + + if (job.numBytesToPrint > 0) { + // Print all of the output data, or the first NUM_BYTES_TO_PRINT bytes, + // whichever comes first as well as the output shape. + printf("num_of_outputs: %d\n", interpreter.outputs_size()); + printf("output_begin\n"); + printf("[\n"); + + for (unsigned int i = 0; i < interpreter.outputs_size(); i++) { + TfLiteTensor *output = interpreter.output(i); + print_output_data(output, job.numBytesToPrint); + if (i != interpreter.outputs_size() - 1) { + printf(",\n"); + } + } + + printf("]\n"); + printf("output_end\n"); + } + + if (job.expectedOutput.size() > 0) { + if (job.expectedOutput.size() != interpreter.outputs_size()) { + printf("Expected number of output tensors does not match network. job=%s, expected=%zu, network=%zu\n", + job.name.c_str(), + job.expectedOutput.size(), + interpreter.outputs_size()); + return true; + } + + for (unsigned int i = 0; i < interpreter.outputs_size(); i++) { + const DataPtr &expected = job.expectedOutput[i]; + const TfLiteTensor *output = interpreter.output(i); + + if (expected.size != output->bytes) { + printf( + "Expected tensor size does not match network size. job=%s, index=%u, expected=%zu, network=%zu\n", + job.name.c_str(), + i, + expected.size, + output->bytes); + return true; + } + + for (unsigned int j = 0; j < output->bytes; ++j) { + if (output->data.uint8[j] != static_cast(expected.data)[j]) { + printf("Expected tensor size does not match network size. job=%s, index=%u, offset=%u, " + "expected=%02x, network=%02x\n", + job.name.c_str(), + i, + j, + static_cast(expected.data)[j], + output->data.uint8[j]); + } + } + } + } + + printf("Finished running job: %s\n", job.name.c_str()); + + return false; +} + +bool InferenceProcess::run(bool exitOnEmpty) { + bool anyJobFailed = false; + + while (true) { + getLock(); + bool empty = inferenceJobQueue.empty(); + freeLock(); + + if (empty) { + if (exitOnEmpty) { + printf("Exit from InferenceProcess::run() on empty job queue!\n"); + break; + } + + continue; + } + + getLock(); + InferenceJob job = inferenceJobQueue.front(); + inferenceJobQueue.pop(); + freeLock(); + + if (runJob(job)) { + anyJobFailed = true; + continue; + } + } + + return anyJobFailed; +} + +} // namespace InferenceProcess diff --git a/applications/message_process/CMakeLists.txt b/applications/message_process/CMakeLists.txt index 2f7e5cf..ed27dcd 100644 --- a/applications/message_process/CMakeLists.txt +++ b/applications/message_process/CMakeLists.txt @@ -16,6 +16,6 @@ # limitations under the License. # -add_library(message_process STATIC src/message_process.cc) +add_library(message_process STATIC src/message_process.cpp) target_include_directories(message_process PUBLIC include ${LINUX_DRIVER_STACK_PATH}/kernel) target_link_libraries(message_process PRIVATE cmsis_core inference_process ethosu_mailbox) diff --git a/applications/message_process/src/message_process.cc b/applications/message_process/src/message_process.cc deleted file mode 100644 index 13ed8c4..0000000 --- a/applications/message_process/src/message_process.cc +++ /dev/null @@ -1,333 +0,0 @@ -/* - * Copyright (c) 2020 Arm Limited. All rights reserved. - * - * SPDX-License-Identifier: Apache-2.0 - * - * Licensed under the Apache License, Version 2.0 (the License); you may - * not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an AS IS BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include - -#include "cmsis_compiler.h" - -#include -#include -#include -#include - -using namespace std; -using namespace InferenceProcess; - -namespace MessageProcess { - -QueueImpl::QueueImpl(ethosu_core_queue &_queue) : queue(_queue) { - cleanHeaderData(); -} - -bool QueueImpl::empty() const { - return queue.header.read == queue.header.write; -} - -size_t QueueImpl::available() const { - size_t avail = queue.header.write - queue.header.read; - - if (queue.header.read > queue.header.write) { - avail += queue.header.size; - } - - return avail; -} - -size_t QueueImpl::capacity() const { - return queue.header.size - available(); -} - -bool QueueImpl::read(uint8_t *dst, uint32_t length) { - const uint8_t *end = dst + length; - uint32_t rpos = queue.header.read; - - invalidateHeaderData(); - - if (length > available()) { - return false; - } - - while (dst < end) { - *dst++ = queue.data[rpos]; - rpos = (rpos + 1) % queue.header.size; - } - - queue.header.read = rpos; - - cleanHeader(); - - return true; -} - -bool QueueImpl::write(const Vec *vec, size_t length) { - size_t total = 0; - - for (size_t i = 0; i < length; i++) { - total += vec[i].length; - } - - invalidateHeader(); - - if (total > capacity()) { - return false; - } - - uint32_t wpos = queue.header.write; - - for (size_t i = 0; i < length; i++) { - const uint8_t *src = reinterpret_cast(vec[i].base); - const uint8_t *end = src + vec[i].length; - - while (src < end) { - queue.data[wpos] = *src++; - wpos = (wpos + 1) % queue.header.size; - } - } - - // Update the write position last - queue.header.write = wpos; - - cleanHeaderData(); - - return true; -} - -bool QueueImpl::write(const uint32_t type, const void *src, uint32_t length) { - ethosu_core_msg msg = {type, length}; - Vec vec[2] = {{&msg, sizeof(msg)}, {src, length}}; - - return write(vec, 2); -} - -bool QueueImpl::skip(uint32_t length) { - uint32_t rpos = queue.header.read; - - invalidateHeader(); - - if (length > available()) { - return false; - } - - queue.header.read = (rpos + length) % queue.header.size; - - cleanHeader(); - - return true; -} - -void QueueImpl::cleanHeader() const { -#if defined(__DCACHE_PRESENT) && (__DCACHE_PRESENT == 1U) - SCB_CleanDCache_by_Addr(reinterpret_cast(&queue.header), sizeof(queue.header)); -#endif -} - -void QueueImpl::cleanHeaderData() const { -#if defined(__DCACHE_PRESENT) && (__DCACHE_PRESENT == 1U) - SCB_CleanDCache_by_Addr(reinterpret_cast(&queue.header), sizeof(queue.header)); - uintptr_t queueDataPtr = reinterpret_cast(&queue.data[0]); - SCB_CleanDCache_by_Addr(reinterpret_cast(queueDataPtr & ~3), queue.header.size + (queueDataPtr & 3)); -#endif -} - -void QueueImpl::invalidateHeader() const { -#if defined(__DCACHE_PRESENT) && (__DCACHE_PRESENT == 1U) - SCB_InvalidateDCache_by_Addr(reinterpret_cast(&queue.header), sizeof(queue.header)); -#endif -} - -void QueueImpl::invalidateHeaderData() const { -#if defined(__DCACHE_PRESENT) && (__DCACHE_PRESENT == 1U) - SCB_InvalidateDCache_by_Addr(reinterpret_cast(&queue.header), sizeof(queue.header)); - uintptr_t queueDataPtr = reinterpret_cast(&queue.data[0]); - SCB_InvalidateDCache_by_Addr(reinterpret_cast(queueDataPtr & ~3), - queue.header.size + (queueDataPtr & 3)); -#endif -} - -MessageProcess::MessageProcess(ethosu_core_queue &in, - ethosu_core_queue &out, - Mailbox::Mailbox &mbox, - ::InferenceProcess::InferenceProcess &_inferenceProcess) : - queueIn(in), - queueOut(out), mailbox(mbox), inferenceProcess(_inferenceProcess) { - mailbox.registerCallback(mailboxCallback, reinterpret_cast(this)); -} - -void MessageProcess::run() { - while (true) { - // Handle all messages in queue - while (handleMessage()) - ; - - // Wait for event - __WFE(); - } -} - -void MessageProcess::handleIrq() { - __SEV(); -} - -bool MessageProcess::handleMessage() { - ethosu_core_msg msg; - - // Read msg header - if (!queueIn.read(msg)) { - return false; - } - - printf("Message. type=%" PRIu32 ", length=%" PRIu32 "\n", msg.type, msg.length); - - switch (msg.type) { - case ETHOSU_CORE_MSG_PING: - printf("Ping\n"); - sendPong(); - break; - case ETHOSU_CORE_MSG_INFERENCE_REQ: { - ethosu_core_inference_req req; - - if (!queueIn.readOrSkip(req, msg.length)) { - printf("Failed to read payload.\n"); - return false; - } - - printf("InferenceReq. user_arg=0x%" PRIx64 ", network={0x%" PRIx32 ", %" PRIu32 "}", - req.user_arg, - req.network.ptr, - req.network.size); - - printf(", ifm_count=%" PRIu32 ", ifm=[", req.ifm_count); - for (uint32_t i = 0; i < req.ifm_count; ++i) { - if (i > 0) { - printf(", "); - } - - printf("{0x%" PRIx32 ", %" PRIu32 "}", req.ifm[i].ptr, req.ifm[i].size); - } - printf("]"); - - printf(", ofm_count=%" PRIu32 ", ofm=[", req.ofm_count); - for (uint32_t i = 0; i < req.ofm_count; ++i) { - if (i > 0) { - printf(", "); - } - - printf("{0x%" PRIx32 ", %" PRIu32 "}", req.ofm[i].ptr, req.ofm[i].size); - } - printf("]\n"); - - DataPtr networkModel(reinterpret_cast(req.network.ptr), req.network.size); - - vector ifm; - for (uint32_t i = 0; i < req.ifm_count; ++i) { - ifm.push_back(DataPtr(reinterpret_cast(req.ifm[i].ptr), req.ifm[i].size)); - } - - vector ofm; - for (uint32_t i = 0; i < req.ofm_count; ++i) { - ofm.push_back(DataPtr(reinterpret_cast(req.ofm[i].ptr), req.ofm[i].size)); - } - - vector expectedOutput; - - vector pmuEventConfig(ETHOSU_CORE_PMU_MAX); - for (uint32_t i = 0; i < ETHOSU_CORE_PMU_MAX; i++) { - pmuEventConfig[i] = req.pmu_event_config[i]; - } - - InferenceJob job( - "job", networkModel, ifm, ofm, expectedOutput, -1, pmuEventConfig, req.pmu_cycle_counter_enable); - job.invalidate(); - - bool failed = inferenceProcess.runJob(job); - job.clean(); - - sendInferenceRsp(req.user_arg, - job.output, - failed, - job.pmuEventConfig, - job.pmuCycleCounterEnable, - job.pmuEventCount, - job.pmuCycleCounterCount); - break; - } - default: { - printf("Unexpected message type: %" PRIu32 ", skipping %" PRIu32 " bytes\n", msg.type, msg.length); - - queueIn.skip(msg.length); - } break; - } - - return true; -} - -void MessageProcess::sendPong() { - if (!queueOut.write(ETHOSU_CORE_MSG_PONG)) { - printf("Failed to write pong.\n"); - } - mailbox.sendMessage(); -} - -void MessageProcess::sendInferenceRsp(uint64_t userArg, - vector &ofm, - bool failed, - vector &pmuEventConfig, - uint32_t pmuCycleCounterEnable, - vector &pmuEventCount, - uint64_t pmuCycleCounterCount) { - ethosu_core_inference_rsp rsp = { - .pmu_event_count = - { - 0, - }, - }; - - rsp.user_arg = userArg; - rsp.ofm_count = ofm.size(); - rsp.status = failed ? ETHOSU_CORE_STATUS_ERROR : ETHOSU_CORE_STATUS_OK; - - for (size_t i = 0; i < ofm.size(); ++i) { - rsp.ofm_size[i] = ofm[i].size; - } - - for (size_t i = 0; i < pmuEventConfig.size(); i++) { - rsp.pmu_event_config[i] = pmuEventConfig[i]; - } - rsp.pmu_cycle_counter_enable = pmuCycleCounterEnable; - for (size_t i = 0; i < pmuEventCount.size(); i++) { - rsp.pmu_event_count[i] = pmuEventCount[i]; - } - rsp.pmu_cycle_counter_count = pmuCycleCounterCount; - - printf("Sending inference response. userArg=0x%" PRIx64 ", ofm_count=%" PRIu32 ", status=%" PRIu32 "\n", - rsp.user_arg, - rsp.ofm_count, - rsp.status); - - if (!queueOut.write(ETHOSU_CORE_MSG_INFERENCE_RSP, rsp)) { - printf("Failed to write inference.\n"); - } - mailbox.sendMessage(); -} - -void MessageProcess::mailboxCallback(void *userArg) { - MessageProcess *_this = reinterpret_cast(userArg); - _this->handleIrq(); -} - -} // namespace MessageProcess diff --git a/applications/message_process/src/message_process.cpp b/applications/message_process/src/message_process.cpp new file mode 100644 index 0000000..13ed8c4 --- /dev/null +++ b/applications/message_process/src/message_process.cpp @@ -0,0 +1,333 @@ +/* + * Copyright (c) 2020 Arm Limited. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include + +#include "cmsis_compiler.h" + +#include +#include +#include +#include + +using namespace std; +using namespace InferenceProcess; + +namespace MessageProcess { + +QueueImpl::QueueImpl(ethosu_core_queue &_queue) : queue(_queue) { + cleanHeaderData(); +} + +bool QueueImpl::empty() const { + return queue.header.read == queue.header.write; +} + +size_t QueueImpl::available() const { + size_t avail = queue.header.write - queue.header.read; + + if (queue.header.read > queue.header.write) { + avail += queue.header.size; + } + + return avail; +} + +size_t QueueImpl::capacity() const { + return queue.header.size - available(); +} + +bool QueueImpl::read(uint8_t *dst, uint32_t length) { + const uint8_t *end = dst + length; + uint32_t rpos = queue.header.read; + + invalidateHeaderData(); + + if (length > available()) { + return false; + } + + while (dst < end) { + *dst++ = queue.data[rpos]; + rpos = (rpos + 1) % queue.header.size; + } + + queue.header.read = rpos; + + cleanHeader(); + + return true; +} + +bool QueueImpl::write(const Vec *vec, size_t length) { + size_t total = 0; + + for (size_t i = 0; i < length; i++) { + total += vec[i].length; + } + + invalidateHeader(); + + if (total > capacity()) { + return false; + } + + uint32_t wpos = queue.header.write; + + for (size_t i = 0; i < length; i++) { + const uint8_t *src = reinterpret_cast(vec[i].base); + const uint8_t *end = src + vec[i].length; + + while (src < end) { + queue.data[wpos] = *src++; + wpos = (wpos + 1) % queue.header.size; + } + } + + // Update the write position last + queue.header.write = wpos; + + cleanHeaderData(); + + return true; +} + +bool QueueImpl::write(const uint32_t type, const void *src, uint32_t length) { + ethosu_core_msg msg = {type, length}; + Vec vec[2] = {{&msg, sizeof(msg)}, {src, length}}; + + return write(vec, 2); +} + +bool QueueImpl::skip(uint32_t length) { + uint32_t rpos = queue.header.read; + + invalidateHeader(); + + if (length > available()) { + return false; + } + + queue.header.read = (rpos + length) % queue.header.size; + + cleanHeader(); + + return true; +} + +void QueueImpl::cleanHeader() const { +#if defined(__DCACHE_PRESENT) && (__DCACHE_PRESENT == 1U) + SCB_CleanDCache_by_Addr(reinterpret_cast(&queue.header), sizeof(queue.header)); +#endif +} + +void QueueImpl::cleanHeaderData() const { +#if defined(__DCACHE_PRESENT) && (__DCACHE_PRESENT == 1U) + SCB_CleanDCache_by_Addr(reinterpret_cast(&queue.header), sizeof(queue.header)); + uintptr_t queueDataPtr = reinterpret_cast(&queue.data[0]); + SCB_CleanDCache_by_Addr(reinterpret_cast(queueDataPtr & ~3), queue.header.size + (queueDataPtr & 3)); +#endif +} + +void QueueImpl::invalidateHeader() const { +#if defined(__DCACHE_PRESENT) && (__DCACHE_PRESENT == 1U) + SCB_InvalidateDCache_by_Addr(reinterpret_cast(&queue.header), sizeof(queue.header)); +#endif +} + +void QueueImpl::invalidateHeaderData() const { +#if defined(__DCACHE_PRESENT) && (__DCACHE_PRESENT == 1U) + SCB_InvalidateDCache_by_Addr(reinterpret_cast(&queue.header), sizeof(queue.header)); + uintptr_t queueDataPtr = reinterpret_cast(&queue.data[0]); + SCB_InvalidateDCache_by_Addr(reinterpret_cast(queueDataPtr & ~3), + queue.header.size + (queueDataPtr & 3)); +#endif +} + +MessageProcess::MessageProcess(ethosu_core_queue &in, + ethosu_core_queue &out, + Mailbox::Mailbox &mbox, + ::InferenceProcess::InferenceProcess &_inferenceProcess) : + queueIn(in), + queueOut(out), mailbox(mbox), inferenceProcess(_inferenceProcess) { + mailbox.registerCallback(mailboxCallback, reinterpret_cast(this)); +} + +void MessageProcess::run() { + while (true) { + // Handle all messages in queue + while (handleMessage()) + ; + + // Wait for event + __WFE(); + } +} + +void MessageProcess::handleIrq() { + __SEV(); +} + +bool MessageProcess::handleMessage() { + ethosu_core_msg msg; + + // Read msg header + if (!queueIn.read(msg)) { + return false; + } + + printf("Message. type=%" PRIu32 ", length=%" PRIu32 "\n", msg.type, msg.length); + + switch (msg.type) { + case ETHOSU_CORE_MSG_PING: + printf("Ping\n"); + sendPong(); + break; + case ETHOSU_CORE_MSG_INFERENCE_REQ: { + ethosu_core_inference_req req; + + if (!queueIn.readOrSkip(req, msg.length)) { + printf("Failed to read payload.\n"); + return false; + } + + printf("InferenceReq. user_arg=0x%" PRIx64 ", network={0x%" PRIx32 ", %" PRIu32 "}", + req.user_arg, + req.network.ptr, + req.network.size); + + printf(", ifm_count=%" PRIu32 ", ifm=[", req.ifm_count); + for (uint32_t i = 0; i < req.ifm_count; ++i) { + if (i > 0) { + printf(", "); + } + + printf("{0x%" PRIx32 ", %" PRIu32 "}", req.ifm[i].ptr, req.ifm[i].size); + } + printf("]"); + + printf(", ofm_count=%" PRIu32 ", ofm=[", req.ofm_count); + for (uint32_t i = 0; i < req.ofm_count; ++i) { + if (i > 0) { + printf(", "); + } + + printf("{0x%" PRIx32 ", %" PRIu32 "}", req.ofm[i].ptr, req.ofm[i].size); + } + printf("]\n"); + + DataPtr networkModel(reinterpret_cast(req.network.ptr), req.network.size); + + vector ifm; + for (uint32_t i = 0; i < req.ifm_count; ++i) { + ifm.push_back(DataPtr(reinterpret_cast(req.ifm[i].ptr), req.ifm[i].size)); + } + + vector ofm; + for (uint32_t i = 0; i < req.ofm_count; ++i) { + ofm.push_back(DataPtr(reinterpret_cast(req.ofm[i].ptr), req.ofm[i].size)); + } + + vector expectedOutput; + + vector pmuEventConfig(ETHOSU_CORE_PMU_MAX); + for (uint32_t i = 0; i < ETHOSU_CORE_PMU_MAX; i++) { + pmuEventConfig[i] = req.pmu_event_config[i]; + } + + InferenceJob job( + "job", networkModel, ifm, ofm, expectedOutput, -1, pmuEventConfig, req.pmu_cycle_counter_enable); + job.invalidate(); + + bool failed = inferenceProcess.runJob(job); + job.clean(); + + sendInferenceRsp(req.user_arg, + job.output, + failed, + job.pmuEventConfig, + job.pmuCycleCounterEnable, + job.pmuEventCount, + job.pmuCycleCounterCount); + break; + } + default: { + printf("Unexpected message type: %" PRIu32 ", skipping %" PRIu32 " bytes\n", msg.type, msg.length); + + queueIn.skip(msg.length); + } break; + } + + return true; +} + +void MessageProcess::sendPong() { + if (!queueOut.write(ETHOSU_CORE_MSG_PONG)) { + printf("Failed to write pong.\n"); + } + mailbox.sendMessage(); +} + +void MessageProcess::sendInferenceRsp(uint64_t userArg, + vector &ofm, + bool failed, + vector &pmuEventConfig, + uint32_t pmuCycleCounterEnable, + vector &pmuEventCount, + uint64_t pmuCycleCounterCount) { + ethosu_core_inference_rsp rsp = { + .pmu_event_count = + { + 0, + }, + }; + + rsp.user_arg = userArg; + rsp.ofm_count = ofm.size(); + rsp.status = failed ? ETHOSU_CORE_STATUS_ERROR : ETHOSU_CORE_STATUS_OK; + + for (size_t i = 0; i < ofm.size(); ++i) { + rsp.ofm_size[i] = ofm[i].size; + } + + for (size_t i = 0; i < pmuEventConfig.size(); i++) { + rsp.pmu_event_config[i] = pmuEventConfig[i]; + } + rsp.pmu_cycle_counter_enable = pmuCycleCounterEnable; + for (size_t i = 0; i < pmuEventCount.size(); i++) { + rsp.pmu_event_count[i] = pmuEventCount[i]; + } + rsp.pmu_cycle_counter_count = pmuCycleCounterCount; + + printf("Sending inference response. userArg=0x%" PRIx64 ", ofm_count=%" PRIu32 ", status=%" PRIu32 "\n", + rsp.user_arg, + rsp.ofm_count, + rsp.status); + + if (!queueOut.write(ETHOSU_CORE_MSG_INFERENCE_RSP, rsp)) { + printf("Failed to write inference.\n"); + } + mailbox.sendMessage(); +} + +void MessageProcess::mailboxCallback(void *userArg) { + MessageProcess *_this = reinterpret_cast(userArg); + _this->handleIrq(); +} + +} // namespace MessageProcess -- cgit v1.2.1