/* * Copyright (c) 2019-2021 Arm Limited. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the License); you may * not use this file except in compliance with the License. * You may obtain a copy of the License at * * www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an AS IS BASIS, WITHOUT * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ /**************************************************************************** * Includes ****************************************************************************/ #include "FreeRTOS.h" #include "queue.h" #include "semphr.h" #include "task.h" #include #include #include #include "inference_process.hpp" // Model data (Defined & changable by modifiying compile definition in CMakeLists.txt) #include "input.h" #include "model.h" #include "output.h" using namespace std; using namespace InferenceProcess; /**************************************************************************** * Defines ****************************************************************************/ // Nr. of tasks to process inferences with. Task reserves driver & runs inference (Normally 1 per NPU, but not a must) #define NUM_INFERENCE_TASKS 1 // Nr. of tasks to create jobs and recieve responses #define NUM_JOB_TASKS 1 // Nr. of jobs to create per job task #define NUM_JOBS_PER_TASK 1 // Tensor arena size #ifdef TENSOR_ARENA_SIZE // If defined in model.h #define TENSOR_ARENA_SIZE_PER_INFERENCE TENSOR_ARENA_SIZE #else // If not defined, use maximum available #define TENSOR_ARENA_SIZE_PER_INFERENCE 2000000 / NUM_INFERENCE_TASKS #endif /**************************************************************************** * InferenceJob ****************************************************************************/ struct ProcessTaskParams { ProcessTaskParams() {} ProcessTaskParams(QueueHandle_t _queue, uint8_t *_tensorArena, size_t _arenaSize) : queueHandle(_queue), tensorArena(_tensorArena), arenaSize(_arenaSize) {} QueueHandle_t queueHandle; uint8_t *tensorArena; size_t arenaSize; }; // Number of total completed jobs, needed to exit application correctly if NUM_JOB_TASKS > 1 static int totalCompletedJobs = 0; // TensorArena static initialisation static const size_t arenaSize = TENSOR_ARENA_SIZE_PER_INFERENCE; __attribute__((section(".bss.tensor_arena"), aligned(16))) uint8_t inferenceProcessTensorArena[NUM_INFERENCE_TASKS][arenaSize]; // Wrapper around InferenceProcess::InferenceJob. Adds responseQueue and status for FreeRTOS multi-tasking purposes. struct xInferenceJob : public InferenceJob { QueueHandle_t responseQueue; bool status; xInferenceJob() : InferenceJob(), responseQueue(nullptr), status(false) {} xInferenceJob(const string &_name, const DataPtr &_networkModel, const vector &_input, const vector &_output, const vector &_expectedOutput, const size_t _numBytesToPrint, const vector &_pmuEventConfig, const uint32_t _pmuCycleCounterEnable, QueueHandle_t _queue) : InferenceJob(_name, _networkModel, _input, _output, _expectedOutput, _numBytesToPrint, _pmuEventConfig, _pmuCycleCounterEnable), responseQueue(_queue), status(false) {} }; /**************************************************************************** * Mutex & Semaphore * Overrides weak-linked symbols in ethosu_driver.c to implement thread handling ****************************************************************************/ extern "C" { void *ethosu_mutex_create(void) { return xSemaphoreCreateMutex(); } void ethosu_mutex_lock(void *mutex) { SemaphoreHandle_t handle = reinterpret_cast(mutex); xSemaphoreTake(handle, portMAX_DELAY); } void ethosu_mutex_unlock(void *mutex) { SemaphoreHandle_t handle = reinterpret_cast(mutex); xSemaphoreGive(handle); } void *ethosu_semaphore_create(void) { return xSemaphoreCreateBinary(); } void ethosu_semaphore_take(void *sem) { SemaphoreHandle_t handle = reinterpret_cast(sem); xSemaphoreTake(handle, portMAX_DELAY); } void ethosu_semaphore_give(void *sem) { SemaphoreHandle_t handle = reinterpret_cast(sem); xSemaphoreGive(handle); } } /**************************************************************************** * Functions ****************************************************************************/ // inferenceProcessTask - Run jobs from queue with available driver void inferenceProcessTask(void *pvParameters) { ProcessTaskParams params = *reinterpret_cast(pvParameters); class InferenceProcess inferenceProcess(params.tensorArena, params.arenaSize); for (;;) { xInferenceJob *xJob; xQueueReceive(params.queueHandle, &xJob, portMAX_DELAY); bool status = inferenceProcess.runJob(*xJob); xJob->status = status; xQueueSend(xJob->responseQueue, &xJob, portMAX_DELAY); } vTaskDelete(nullptr); } // inferenceSenderTask - Creates NUM_INFERNECE_JOBS jobs, queues them, and then listens for completion status void inferenceSenderTask(void *pvParameters) { int ret = 0; QueueHandle_t inferenceProcessQueue = reinterpret_cast(pvParameters); xInferenceJob jobs[NUM_JOBS_PER_TASK]; // Create queue for response messages QueueHandle_t senderQueue = xQueueCreate(NUM_JOBS_PER_TASK, sizeof(xInferenceJob *)); // Create and queue the jobs for (int n = 0; n < NUM_JOBS_PER_TASK; n++) { // Create job xInferenceJob *job = &jobs[n]; job->name = string(modelName); job->networkModel = DataPtr(networkModelData, sizeof(networkModelData)); job->input.push_back(DataPtr(inputData, sizeof(inputData))); job->expectedOutput.push_back(DataPtr(expectedOutputData, sizeof(expectedOutputData))); job->responseQueue = senderQueue; // Send job printf("Sending inference job: job=%p, name=%s\n", job, job->name.c_str()); xQueueSend(inferenceProcessQueue, &job, portMAX_DELAY); } // Listen for completion status do { xInferenceJob *pSendJob; xQueueReceive(senderQueue, &pSendJob, portMAX_DELAY); printf("inferenceSenderTask: received response for job: %s, status = %u\n", pSendJob->name.c_str(), pSendJob->status); totalCompletedJobs++; ret = (pSendJob->status); if (pSendJob->status != 0) { break; } } while (totalCompletedJobs < NUM_JOBS_PER_TASK * NUM_JOB_TASKS); vQueueDelete(senderQueue); printf("FreeRTOS application returning %d.\n", ret); exit(ret); } /**************************************************************************** * Application ****************************************************************************/ // Declare variables in global scope to avoid stack since FreeRTOS resets stack when the scheduler is started static QueueHandle_t inferenceProcessQueue; static ProcessTaskParams taskParams[NUM_INFERENCE_TASKS]; // FreeRTOS application. NOTE: Additional tasks may require increased heap size. int main() { BaseType_t ret; inferenceProcessQueue = xQueueCreate(NUM_JOBS_PER_TASK, sizeof(xInferenceJob *)); // inferenceSender tasks to create and queue the jobs for (int n = 0; n < NUM_JOB_TASKS; n++) { ret = xTaskCreate(inferenceSenderTask, "inferenceSenderTask", 2 * 1024, inferenceProcessQueue, 2, nullptr); if (ret != pdPASS) { printf("FreeRTOS: Failed to create 'inferenceSenderTask%i'\n", n); exit(1); } } // Create inferenceProcess tasks to process the queued jobs for (int n = 0; n < NUM_INFERENCE_TASKS; n++) { taskParams[n] = ProcessTaskParams(inferenceProcessQueue, inferenceProcessTensorArena[n], arenaSize); ret = xTaskCreate(inferenceProcessTask, "inferenceProcessTask", 3 * 1024, &taskParams[n], 3, nullptr); if (ret != pdPASS) { printf("FreeRTOS: Failed to create 'inferenceProcessTask%i'\n", n); exit(1); } } // Start Scheduler vTaskStartScheduler(); printf("FreeRTOS application failed to initialise \n"); exit(1); return 0; }