From 6a7703ea93b962869a78976811b8a5920f298f67 Mon Sep 17 00:00:00 2001 From: Anton Moberg Date: Tue, 2 Mar 2021 15:14:29 +0100 Subject: Multi NPU freertos example Modified freertos.cpp to the support multi NPUs API. Spawn NUM_INFERENCE_TASKS inference processing tasks (Reserves, locks, and runs inference on an available NPU driver. Usually 1 per NPU, but not required) Spawn NUM_JOB_TASKS job creating/recieving tasks (Creates NUM_JOBS_PER_TASK jobs, sends them to inference job queue, and recieves response once finished) Added: Mutex & Semaphore interfaces. Overrides weakly linked symbols in driver. Change-Id: Id187c6c9b43b988b1fdd560fdf3d57bd4b30b79c --- applications/freertos/main.cpp | 277 ++++++++++++++++++++++++++--------------- applications/freertos/model.h | 2 + applications/freertos/output.h | 2 +- 3 files changed, 183 insertions(+), 98 deletions(-) diff --git a/applications/freertos/main.cpp b/applications/freertos/main.cpp index 6f92faf..0010b70 100644 --- a/applications/freertos/main.cpp +++ b/applications/freertos/main.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2021 Arm Limited. All rights reserved. + * Copyright (c) 2019-2021 Arm Limited. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -20,147 +20,230 @@ * Includes ****************************************************************************/ -// FreeRTOS #include "FreeRTOS.h" #include "queue.h" +#include "semphr.h" #include "task.h" -// Ethos-U -#include "ethosu_driver.h" +#include +#include +#include + #include "inference_process.hpp" -// System includes -#include +// Model data (Defined & changable by modifiying compile definition in CMakeLists.txt) +#include "input.h" +#include "model.h" +#include "output.h" using namespace std; using namespace InferenceProcess; +/**************************************************************************** + * Defines + ****************************************************************************/ + +// Nr. of tasks to process inferences with. Task reserves driver & runs inference (Normally 1 per NPU, but not a must) +#define NUM_INFERENCE_TASKS 1 +// Nr. of tasks to create jobs and recieve responses +#define NUM_JOB_TASKS 1 +// Nr. of jobs to create per job task +#define NUM_JOBS_PER_TASK 1 + +// Tensor arena size +#ifdef TENSOR_ARENA_SIZE // If defined in model.h +#define TENSOR_ARENA_SIZE_PER_INFERENCE TENSOR_ARENA_SIZE +#else // If not defined, use maximum available +#define TENSOR_ARENA_SIZE_PER_INFERENCE 2000000 / NUM_INFERENCE_TASKS +#endif + /**************************************************************************** * InferenceJob ****************************************************************************/ -#define TENSOR_ARENA_SIZE 0xa0000 +struct ProcessTaskParams { + ProcessTaskParams() {} + ProcessTaskParams(QueueHandle_t _queue, uint8_t *_tensorArena, size_t _arenaSize) : + queueHandle(_queue), tensorArena(_tensorArena), arenaSize(_arenaSize) {} -__attribute__((section(".bss.tensor_arena"), aligned(16))) uint8_t inferenceProcessTensorArena[TENSOR_ARENA_SIZE]; + QueueHandle_t queueHandle; + uint8_t *tensorArena; + size_t arenaSize; +}; + +// Number of total completed jobs, needed to exit application correctly if NUM_JOB_TASKS > 1 +static int totalCompletedJobs = 0; -namespace { +// TensorArena static initialisation +static const size_t arenaSize = TENSOR_ARENA_SIZE_PER_INFERENCE; +__attribute__((section(".bss.tensor_arena"), aligned(16))) +uint8_t inferenceProcessTensorArena[NUM_INFERENCE_TASKS][arenaSize]; +// Wrapper around InferenceProcess::InferenceJob. Adds responseQueue and status for FreeRTOS multi-tasking purposes. struct xInferenceJob : public InferenceJob { - QueueHandle_t queue; + QueueHandle_t responseQueue; bool status; - xInferenceJob(); - xInferenceJob(const string &name, - const DataPtr &networkModel, - const vector &input, - const vector &output, - const vector &expectedOutput, - size_t numBytesToPrint, - const vector &pmuEventConfig, - const uint32_t pmuCycleCounterEnable, - QueueHandle_t queue); + xInferenceJob() : InferenceJob(), responseQueue(nullptr), status(false) {} + xInferenceJob(const string &_name, + const DataPtr &_networkModel, + const vector &_input, + const vector &_output, + const vector &_expectedOutput, + const size_t _numBytesToPrint, + const vector &_pmuEventConfig, + const uint32_t _pmuCycleCounterEnable, + QueueHandle_t _queue) : + InferenceJob(_name, + _networkModel, + _input, + _output, + _expectedOutput, + _numBytesToPrint, + _pmuEventConfig, + _pmuCycleCounterEnable), + responseQueue(_queue), status(false) {} }; -xInferenceJob::xInferenceJob() : InferenceJob(), queue(nullptr), status(false) {} - -xInferenceJob::xInferenceJob(const std::string &_name, - const DataPtr &_networkModel, - const std::vector &_input, - const std::vector &_output, - const std::vector &_expectedOutput, - size_t _numBytesToPrint, - const vector &_pmuEventConfig, - const uint32_t _pmuCycleCounterEnable, - QueueHandle_t _queue) : - InferenceJob(_name, - _networkModel, - _input, - _output, - _expectedOutput, - _numBytesToPrint, - _pmuEventConfig, - _pmuCycleCounterEnable), - queue(_queue), status(false) {} - -} // namespace - /**************************************************************************** - * Functions + * Mutex & Semaphore + * Overrides weak-linked symbols in ethosu_driver.c to implement thread handling ****************************************************************************/ -namespace { +extern "C" { -#include "model.h" -#include "input.h" -#include "output.h" +void *ethosu_mutex_create(void) { + return xSemaphoreCreateMutex(); +} -void inferenceProcessTask(void *pvParameters) { - QueueHandle_t queue = reinterpret_cast(pvParameters); +void ethosu_mutex_lock(void *mutex) { + SemaphoreHandle_t handle = reinterpret_cast(mutex); + xSemaphoreTake(handle, portMAX_DELAY); +} + +void ethosu_mutex_unlock(void *mutex) { + SemaphoreHandle_t handle = reinterpret_cast(mutex); + xSemaphoreGive(handle); +} - class InferenceProcess inferenceProcess(inferenceProcessTensorArena, TENSOR_ARENA_SIZE); +void *ethosu_semaphore_create(void) { + return xSemaphoreCreateBinary(); +} - while (true) { - xInferenceJob *job; +void ethosu_semaphore_take(void *sem) { + SemaphoreHandle_t handle = reinterpret_cast(sem); + xSemaphoreTake(handle, portMAX_DELAY); +} - // Wait for inference job - xQueueReceive(queue, &job, portMAX_DELAY); - printf("Received inference job. job=%p, name=%s\n", job, job->name.c_str()); +void ethosu_semaphore_give(void *sem) { + SemaphoreHandle_t handle = reinterpret_cast(sem); + xSemaphoreGive(handle); +} +} - bool status = inferenceProcess.runJob(*job); - job->status = status; +/**************************************************************************** + * Functions + ****************************************************************************/ - // Return inference job response - xQueueSend(job->queue, &job, portMAX_DELAY); - } +// inferenceProcessTask - Run jobs from queue with available driver +void inferenceProcessTask(void *pvParameters) { + ProcessTaskParams params = *reinterpret_cast(pvParameters); + + class InferenceProcess inferenceProcess(params.tensorArena, params.arenaSize); - vTaskDelete(NULL); + for (;;) { + xInferenceJob *xJob; + + xQueueReceive(params.queueHandle, &xJob, portMAX_DELAY); + bool status = inferenceProcess.runJob(*xJob); + xJob->status = status; + xQueueSend(xJob->responseQueue, &xJob, portMAX_DELAY); + } + vTaskDelete(nullptr); } -void inferenceJobTask(void *pvParameters) { +// inferenceSenderTask - Creates NUM_INFERNECE_JOBS jobs, queues them, and then listens for completion status +void inferenceSenderTask(void *pvParameters) { + int ret = 0; + QueueHandle_t inferenceProcessQueue = reinterpret_cast(pvParameters); + xInferenceJob jobs[NUM_JOBS_PER_TASK]; // Create queue for response messages - QueueHandle_t senderQueue = xQueueCreate(10, sizeof(xInferenceJob *)); - - // Inference job - DataPtr networkModel(networkModelData, sizeof(networkModelData)); - DataPtr input(inputData, sizeof(inputData)); - DataPtr expected(expectedData, sizeof(expectedData)); - - xInferenceJob job; - xInferenceJob *j = &job; - job.name = "mobilenet_v2"; - job.networkModel = networkModel; - job.input.push_back(input); - job.expectedOutput.push_back(expected); - job.queue = senderQueue; - - // Send job - printf("Sending inference job\n"); - xQueueSend(inferenceProcessQueue, &j, portMAX_DELAY); - - // Wait for response - xQueueReceive(senderQueue, &j, portMAX_DELAY); - printf("Received inference job response. status=%u\n", j->status); - - exit(j->status); + QueueHandle_t senderQueue = xQueueCreate(NUM_JOBS_PER_TASK, sizeof(xInferenceJob *)); + + // Create and queue the jobs + for (int n = 0; n < NUM_JOBS_PER_TASK; n++) { + // Create job + xInferenceJob *job = &jobs[n]; + job->name = string(modelName); + job->networkModel = DataPtr(networkModelData, sizeof(networkModelData)); + job->input.push_back(DataPtr(inputData, sizeof(inputData))); + job->expectedOutput.push_back(DataPtr(expectedOutputData, sizeof(expectedOutputData))); + job->responseQueue = senderQueue; + // Send job + printf("Sending inference job: job=%p, name=%s\n", job, job->name.c_str()); + xQueueSend(inferenceProcessQueue, &job, portMAX_DELAY); + } + + // Listen for completion status + do { + xInferenceJob *pSendJob; + xQueueReceive(senderQueue, &pSendJob, portMAX_DELAY); + printf("inferenceSenderTask: received response for job: %s, status = %u\n", + pSendJob->name.c_str(), + pSendJob->status); + + totalCompletedJobs++; + ret = (pSendJob->status); + if (pSendJob->status != 0) { + break; + } + } while (totalCompletedJobs < NUM_JOBS_PER_TASK * NUM_JOB_TASKS); + + vQueueDelete(senderQueue); + + printf("FreeRTOS application returning %d.\n", ret); + exit(ret); } -} // namespace +/**************************************************************************** + * Application + ****************************************************************************/ -/* Keep the queue ouf of the stack sinde freertos resets it when the scheduler starts.*/ -QueueHandle_t inferenceProcessQueue; +// Declare variables in global scope to avoid stack since FreeRTOS resets stack when the scheduler is started +static QueueHandle_t inferenceProcessQueue; +static ProcessTaskParams taskParams[NUM_INFERENCE_TASKS]; +// FreeRTOS application. NOTE: Additional tasks may require increased heap size. int main() { - // Inference process - inferenceProcessQueue = xQueueCreate(10, sizeof(xInferenceJob *)); - xTaskCreate(inferenceProcessTask, "inferenceProcess", 2 * 1024, inferenceProcessQueue, 1, nullptr); + BaseType_t ret; + inferenceProcessQueue = xQueueCreate(NUM_JOBS_PER_TASK, sizeof(xInferenceJob *)); + + // inferenceSender tasks to create and queue the jobs + for (int n = 0; n < NUM_JOB_TASKS; n++) { + ret = xTaskCreate(inferenceSenderTask, "inferenceSenderTask", 2 * 1024, inferenceProcessQueue, 2, nullptr); + if (ret != pdPASS) { + printf("FreeRTOS: Failed to create 'inferenceSenderTask%i'\n", n); + exit(1); + } + } - // Inference job task - xTaskCreate(inferenceJobTask, "inferenceJob", 2 * 1024, inferenceProcessQueue, 2, nullptr); + // Create inferenceProcess tasks to process the queued jobs + for (int n = 0; n < NUM_INFERENCE_TASKS; n++) { + taskParams[n] = ProcessTaskParams(inferenceProcessQueue, inferenceProcessTensorArena[n], arenaSize); + ret = xTaskCreate(inferenceProcessTask, "inferenceProcessTask", 3 * 1024, &taskParams[n], 3, nullptr); + if (ret != pdPASS) { + printf("FreeRTOS: Failed to create 'inferenceProcessTask%i'\n", n); + exit(1); + } + } - // Run the scheduler + // Start Scheduler vTaskStartScheduler(); + printf("FreeRTOS application failed to initialise \n"); + exit(1); + return 0; -} +} \ No newline at end of file diff --git a/applications/freertos/model.h b/applications/freertos/model.h index cd8f656..3b3078a 100644 --- a/applications/freertos/model.h +++ b/applications/freertos/model.h @@ -16,6 +16,8 @@ * limitations under the License. */ +const char *modelName = "keyword_spotting_cnn_small_int8"; + uint8_t networkModelData[] __attribute__((section("network_model_sec"), aligned(16))) = { 0x28, 0x00, 0x00, 0x00, 0x54, 0x46, 0x4c, 0x33, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x12, 0x00, diff --git a/applications/freertos/output.h b/applications/freertos/output.h index 48380b4..0aa7954 100644 --- a/applications/freertos/output.h +++ b/applications/freertos/output.h @@ -16,7 +16,7 @@ * limitations under the License. */ -uint8_t expectedData[1001] __attribute__((aligned(4), section("expected_output_data_sec"))) = { +uint8_t expectedOutputData[1001] __attribute__((aligned(4), section("expected_output_data_sec"))) = { 0x32, 0x35, 0x45, 0x51, 0x55, 0x5b, 0x4c, 0x5a, 0x42, 0x39, 0x2f, 0x37, 0x3b, 0x31, 0x3f, 0x3a, 0x36, 0x43, 0x36, 0x46, 0x32, 0x39, 0x32, 0x35, 0x3b, 0x31, 0x48, 0x3d, 0x43, 0x3c, 0x3b, 0x43, 0x45, 0x30, 0x43, 0x52, -- cgit v1.2.1