aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAnton Moberg <anton.moberg@arm.com>2021-03-02 15:14:29 +0100
committerKristofer Jonsson <kristofer.jonsson@arm.com>2021-04-09 11:48:08 +0000
commit6a7703ea93b962869a78976811b8a5920f298f67 (patch)
treee256e28cb8d65562c458274f6be0d569026fff4c
parentfa3e51bf84baa19bac43fbd69ad702932a8c9dbe (diff)
downloadethos-u-core-platform-6a7703ea93b962869a78976811b8a5920f298f67.tar.gz
Multi NPU freertos example
Modified freertos.cpp to the support multi NPUs API. Spawn NUM_INFERENCE_TASKS inference processing tasks (Reserves, locks, and runs inference on an available NPU driver. Usually 1 per NPU, but not required) Spawn NUM_JOB_TASKS job creating/recieving tasks (Creates NUM_JOBS_PER_TASK jobs, sends them to inference job queue, and recieves response once finished) Added: Mutex & Semaphore interfaces. Overrides weakly linked symbols in driver. Change-Id: Id187c6c9b43b988b1fdd560fdf3d57bd4b30b79c
-rw-r--r--applications/freertos/main.cpp277
-rw-r--r--applications/freertos/model.h2
-rw-r--r--applications/freertos/output.h2
3 files changed, 183 insertions, 98 deletions
diff --git a/applications/freertos/main.cpp b/applications/freertos/main.cpp
index 6f92faf..0010b70 100644
--- a/applications/freertos/main.cpp
+++ b/applications/freertos/main.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2020-2021 Arm Limited. All rights reserved.
+ * Copyright (c) 2019-2021 Arm Limited. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
@@ -20,147 +20,230 @@
* Includes
****************************************************************************/
-// FreeRTOS
#include "FreeRTOS.h"
#include "queue.h"
+#include "semphr.h"
#include "task.h"
-// Ethos-U
-#include "ethosu_driver.h"
+#include <inttypes.h>
+#include <stdio.h>
+#include <vector>
+
#include "inference_process.hpp"
-// System includes
-#include <stdio.h>
+// Model data (Defined & changable by modifiying compile definition in CMakeLists.txt)
+#include "input.h"
+#include "model.h"
+#include "output.h"
using namespace std;
using namespace InferenceProcess;
/****************************************************************************
+ * Defines
+ ****************************************************************************/
+
+// Nr. of tasks to process inferences with. Task reserves driver & runs inference (Normally 1 per NPU, but not a must)
+#define NUM_INFERENCE_TASKS 1
+// Nr. of tasks to create jobs and recieve responses
+#define NUM_JOB_TASKS 1
+// Nr. of jobs to create per job task
+#define NUM_JOBS_PER_TASK 1
+
+// Tensor arena size
+#ifdef TENSOR_ARENA_SIZE // If defined in model.h
+#define TENSOR_ARENA_SIZE_PER_INFERENCE TENSOR_ARENA_SIZE
+#else // If not defined, use maximum available
+#define TENSOR_ARENA_SIZE_PER_INFERENCE 2000000 / NUM_INFERENCE_TASKS
+#endif
+
+/****************************************************************************
* InferenceJob
****************************************************************************/
-#define TENSOR_ARENA_SIZE 0xa0000
+struct ProcessTaskParams {
+ ProcessTaskParams() {}
+ ProcessTaskParams(QueueHandle_t _queue, uint8_t *_tensorArena, size_t _arenaSize) :
+ queueHandle(_queue), tensorArena(_tensorArena), arenaSize(_arenaSize) {}
-__attribute__((section(".bss.tensor_arena"), aligned(16))) uint8_t inferenceProcessTensorArena[TENSOR_ARENA_SIZE];
+ QueueHandle_t queueHandle;
+ uint8_t *tensorArena;
+ size_t arenaSize;
+};
+
+// Number of total completed jobs, needed to exit application correctly if NUM_JOB_TASKS > 1
+static int totalCompletedJobs = 0;
-namespace {
+// TensorArena static initialisation
+static const size_t arenaSize = TENSOR_ARENA_SIZE_PER_INFERENCE;
+__attribute__((section(".bss.tensor_arena"), aligned(16)))
+uint8_t inferenceProcessTensorArena[NUM_INFERENCE_TASKS][arenaSize];
+// Wrapper around InferenceProcess::InferenceJob. Adds responseQueue and status for FreeRTOS multi-tasking purposes.
struct xInferenceJob : public InferenceJob {
- QueueHandle_t queue;
+ QueueHandle_t responseQueue;
bool status;
- xInferenceJob();
- xInferenceJob(const string &name,
- const DataPtr &networkModel,
- const vector<DataPtr> &input,
- const vector<DataPtr> &output,
- const vector<DataPtr> &expectedOutput,
- size_t numBytesToPrint,
- const vector<uint8_t> &pmuEventConfig,
- const uint32_t pmuCycleCounterEnable,
- QueueHandle_t queue);
+ xInferenceJob() : InferenceJob(), responseQueue(nullptr), status(false) {}
+ xInferenceJob(const string &_name,
+ const DataPtr &_networkModel,
+ const vector<DataPtr> &_input,
+ const vector<DataPtr> &_output,
+ const vector<DataPtr> &_expectedOutput,
+ const size_t _numBytesToPrint,
+ const vector<uint8_t> &_pmuEventConfig,
+ const uint32_t _pmuCycleCounterEnable,
+ QueueHandle_t _queue) :
+ InferenceJob(_name,
+ _networkModel,
+ _input,
+ _output,
+ _expectedOutput,
+ _numBytesToPrint,
+ _pmuEventConfig,
+ _pmuCycleCounterEnable),
+ responseQueue(_queue), status(false) {}
};
-xInferenceJob::xInferenceJob() : InferenceJob(), queue(nullptr), status(false) {}
-
-xInferenceJob::xInferenceJob(const std::string &_name,
- const DataPtr &_networkModel,
- const std::vector<DataPtr> &_input,
- const std::vector<DataPtr> &_output,
- const std::vector<DataPtr> &_expectedOutput,
- size_t _numBytesToPrint,
- const vector<uint8_t> &_pmuEventConfig,
- const uint32_t _pmuCycleCounterEnable,
- QueueHandle_t _queue) :
- InferenceJob(_name,
- _networkModel,
- _input,
- _output,
- _expectedOutput,
- _numBytesToPrint,
- _pmuEventConfig,
- _pmuCycleCounterEnable),
- queue(_queue), status(false) {}
-
-} // namespace
-
/****************************************************************************
- * Functions
+ * Mutex & Semaphore
+ * Overrides weak-linked symbols in ethosu_driver.c to implement thread handling
****************************************************************************/
-namespace {
+extern "C" {
-#include "model.h"
-#include "input.h"
-#include "output.h"
+void *ethosu_mutex_create(void) {
+ return xSemaphoreCreateMutex();
+}
-void inferenceProcessTask(void *pvParameters) {
- QueueHandle_t queue = reinterpret_cast<QueueHandle_t>(pvParameters);
+void ethosu_mutex_lock(void *mutex) {
+ SemaphoreHandle_t handle = reinterpret_cast<SemaphoreHandle_t>(mutex);
+ xSemaphoreTake(handle, portMAX_DELAY);
+}
+
+void ethosu_mutex_unlock(void *mutex) {
+ SemaphoreHandle_t handle = reinterpret_cast<SemaphoreHandle_t>(mutex);
+ xSemaphoreGive(handle);
+}
- class InferenceProcess inferenceProcess(inferenceProcessTensorArena, TENSOR_ARENA_SIZE);
+void *ethosu_semaphore_create(void) {
+ return xSemaphoreCreateBinary();
+}
- while (true) {
- xInferenceJob *job;
+void ethosu_semaphore_take(void *sem) {
+ SemaphoreHandle_t handle = reinterpret_cast<SemaphoreHandle_t>(sem);
+ xSemaphoreTake(handle, portMAX_DELAY);
+}
- // Wait for inference job
- xQueueReceive(queue, &job, portMAX_DELAY);
- printf("Received inference job. job=%p, name=%s\n", job, job->name.c_str());
+void ethosu_semaphore_give(void *sem) {
+ SemaphoreHandle_t handle = reinterpret_cast<SemaphoreHandle_t>(sem);
+ xSemaphoreGive(handle);
+}
+}
- bool status = inferenceProcess.runJob(*job);
- job->status = status;
+/****************************************************************************
+ * Functions
+ ****************************************************************************/
- // Return inference job response
- xQueueSend(job->queue, &job, portMAX_DELAY);
- }
+// inferenceProcessTask - Run jobs from queue with available driver
+void inferenceProcessTask(void *pvParameters) {
+ ProcessTaskParams params = *reinterpret_cast<ProcessTaskParams *>(pvParameters);
+
+ class InferenceProcess inferenceProcess(params.tensorArena, params.arenaSize);
- vTaskDelete(NULL);
+ for (;;) {
+ xInferenceJob *xJob;
+
+ xQueueReceive(params.queueHandle, &xJob, portMAX_DELAY);
+ bool status = inferenceProcess.runJob(*xJob);
+ xJob->status = status;
+ xQueueSend(xJob->responseQueue, &xJob, portMAX_DELAY);
+ }
+ vTaskDelete(nullptr);
}
-void inferenceJobTask(void *pvParameters) {
+// inferenceSenderTask - Creates NUM_INFERNECE_JOBS jobs, queues them, and then listens for completion status
+void inferenceSenderTask(void *pvParameters) {
+ int ret = 0;
+
QueueHandle_t inferenceProcessQueue = reinterpret_cast<QueueHandle_t>(pvParameters);
+ xInferenceJob jobs[NUM_JOBS_PER_TASK];
// Create queue for response messages
- QueueHandle_t senderQueue = xQueueCreate(10, sizeof(xInferenceJob *));
-
- // Inference job
- DataPtr networkModel(networkModelData, sizeof(networkModelData));
- DataPtr input(inputData, sizeof(inputData));
- DataPtr expected(expectedData, sizeof(expectedData));
-
- xInferenceJob job;
- xInferenceJob *j = &job;
- job.name = "mobilenet_v2";
- job.networkModel = networkModel;
- job.input.push_back(input);
- job.expectedOutput.push_back(expected);
- job.queue = senderQueue;
-
- // Send job
- printf("Sending inference job\n");
- xQueueSend(inferenceProcessQueue, &j, portMAX_DELAY);
-
- // Wait for response
- xQueueReceive(senderQueue, &j, portMAX_DELAY);
- printf("Received inference job response. status=%u\n", j->status);
-
- exit(j->status);
+ QueueHandle_t senderQueue = xQueueCreate(NUM_JOBS_PER_TASK, sizeof(xInferenceJob *));
+
+ // Create and queue the jobs
+ for (int n = 0; n < NUM_JOBS_PER_TASK; n++) {
+ // Create job
+ xInferenceJob *job = &jobs[n];
+ job->name = string(modelName);
+ job->networkModel = DataPtr(networkModelData, sizeof(networkModelData));
+ job->input.push_back(DataPtr(inputData, sizeof(inputData)));
+ job->expectedOutput.push_back(DataPtr(expectedOutputData, sizeof(expectedOutputData)));
+ job->responseQueue = senderQueue;
+ // Send job
+ printf("Sending inference job: job=%p, name=%s\n", job, job->name.c_str());
+ xQueueSend(inferenceProcessQueue, &job, portMAX_DELAY);
+ }
+
+ // Listen for completion status
+ do {
+ xInferenceJob *pSendJob;
+ xQueueReceive(senderQueue, &pSendJob, portMAX_DELAY);
+ printf("inferenceSenderTask: received response for job: %s, status = %u\n",
+ pSendJob->name.c_str(),
+ pSendJob->status);
+
+ totalCompletedJobs++;
+ ret = (pSendJob->status);
+ if (pSendJob->status != 0) {
+ break;
+ }
+ } while (totalCompletedJobs < NUM_JOBS_PER_TASK * NUM_JOB_TASKS);
+
+ vQueueDelete(senderQueue);
+
+ printf("FreeRTOS application returning %d.\n", ret);
+ exit(ret);
}
-} // namespace
+/****************************************************************************
+ * Application
+ ****************************************************************************/
-/* Keep the queue ouf of the stack sinde freertos resets it when the scheduler starts.*/
-QueueHandle_t inferenceProcessQueue;
+// Declare variables in global scope to avoid stack since FreeRTOS resets stack when the scheduler is started
+static QueueHandle_t inferenceProcessQueue;
+static ProcessTaskParams taskParams[NUM_INFERENCE_TASKS];
+// FreeRTOS application. NOTE: Additional tasks may require increased heap size.
int main() {
- // Inference process
- inferenceProcessQueue = xQueueCreate(10, sizeof(xInferenceJob *));
- xTaskCreate(inferenceProcessTask, "inferenceProcess", 2 * 1024, inferenceProcessQueue, 1, nullptr);
+ BaseType_t ret;
+ inferenceProcessQueue = xQueueCreate(NUM_JOBS_PER_TASK, sizeof(xInferenceJob *));
+
+ // inferenceSender tasks to create and queue the jobs
+ for (int n = 0; n < NUM_JOB_TASKS; n++) {
+ ret = xTaskCreate(inferenceSenderTask, "inferenceSenderTask", 2 * 1024, inferenceProcessQueue, 2, nullptr);
+ if (ret != pdPASS) {
+ printf("FreeRTOS: Failed to create 'inferenceSenderTask%i'\n", n);
+ exit(1);
+ }
+ }
- // Inference job task
- xTaskCreate(inferenceJobTask, "inferenceJob", 2 * 1024, inferenceProcessQueue, 2, nullptr);
+ // Create inferenceProcess tasks to process the queued jobs
+ for (int n = 0; n < NUM_INFERENCE_TASKS; n++) {
+ taskParams[n] = ProcessTaskParams(inferenceProcessQueue, inferenceProcessTensorArena[n], arenaSize);
+ ret = xTaskCreate(inferenceProcessTask, "inferenceProcessTask", 3 * 1024, &taskParams[n], 3, nullptr);
+ if (ret != pdPASS) {
+ printf("FreeRTOS: Failed to create 'inferenceProcessTask%i'\n", n);
+ exit(1);
+ }
+ }
- // Run the scheduler
+ // Start Scheduler
vTaskStartScheduler();
+ printf("FreeRTOS application failed to initialise \n");
+ exit(1);
+
return 0;
-}
+} \ No newline at end of file
diff --git a/applications/freertos/model.h b/applications/freertos/model.h
index cd8f656..3b3078a 100644
--- a/applications/freertos/model.h
+++ b/applications/freertos/model.h
@@ -16,6 +16,8 @@
* limitations under the License.
*/
+const char *modelName = "keyword_spotting_cnn_small_int8";
+
uint8_t networkModelData[] __attribute__((section("network_model_sec"), aligned(16))) = {
0x28, 0x00, 0x00, 0x00, 0x54, 0x46, 0x4c, 0x33, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x12, 0x00,
diff --git a/applications/freertos/output.h b/applications/freertos/output.h
index 48380b4..0aa7954 100644
--- a/applications/freertos/output.h
+++ b/applications/freertos/output.h
@@ -16,7 +16,7 @@
* limitations under the License.
*/
-uint8_t expectedData[1001] __attribute__((aligned(4), section("expected_output_data_sec"))) = {
+uint8_t expectedOutputData[1001] __attribute__((aligned(4), section("expected_output_data_sec"))) = {
0x32, 0x35, 0x45, 0x51, 0x55, 0x5b, 0x4c, 0x5a, 0x42, 0x39, 0x2f, 0x37,
0x3b, 0x31, 0x3f, 0x3a, 0x36, 0x43, 0x36, 0x46, 0x32, 0x39, 0x32, 0x35,
0x3b, 0x31, 0x48, 0x3d, 0x43, 0x3c, 0x3b, 0x43, 0x45, 0x30, 0x43, 0x52,