applications/freertos/main.cpp


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294

/*
 * Copyright (c) 2019-2021 Arm Limited. All rights reserved.
 *
 * SPDX-License-Identifier: Apache-2.0
 *
 * Licensed under the Apache License, Version 2.0 (the License); you may
 * not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an AS IS BASIS, WITHOUT
 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

/****************************************************************************
 * Includes
 ****************************************************************************/

#include "FreeRTOS.h"
#include "portmacro.h"
#include "queue.h"
#include "semphr.h"
#include "task.h"

#include <inttypes.h>
#include <stdio.h>
#include <vector>

#include "inference_process.hpp"

// Model data (Defined & changable by modifiying compile definition in CMakeLists.txt)
#include "input.h"
#include "model.h"
#include "output.h"

using namespace std;
using namespace InferenceProcess;

/****************************************************************************
 * Defines
 ****************************************************************************/

// Nr. of tasks to process inferences with. Task reserves driver & runs inference (Normally 1 per NPU, but not a must)
#define NUM_INFERENCE_TASKS 1
// Nr. of tasks to create jobs and recieve responses
#define NUM_JOB_TASKS 2
// Nr. of jobs to create per job task
#define NUM_JOBS_PER_TASK 1

// Tensor arena size
#ifdef TENSOR_ARENA_SIZE // If defined in model.h
#define TENSOR_ARENA_SIZE_PER_INFERENCE TENSOR_ARENA_SIZE
#else // If not defined, use maximum available
#define TENSOR_ARENA_SIZE_PER_INFERENCE 2000000 / NUM_INFERENCE_TASKS
#endif

/****************************************************************************
 * InferenceJob
 ****************************************************************************/

struct ProcessTaskParams {
    ProcessTaskParams() : queueHandle(nullptr), tensorArena(nullptr), arenaSize(0) {}
    ProcessTaskParams(QueueHandle_t _queue, uint8_t *_tensorArena, size_t _arenaSize) :
        queueHandle(_queue), tensorArena(_tensorArena), arenaSize(_arenaSize) {}

    QueueHandle_t queueHandle;
    uint8_t *tensorArena;
    size_t arenaSize;
};

namespace {
// Number of total completed jobs, needed to exit application correctly if NUM_JOB_TASKS > 1
int totalCompletedJobs = 0;

// TensorArena static initialisation
const size_t arenaSize = TENSOR_ARENA_SIZE_PER_INFERENCE;

// Declare below variables in global scope to avoid stack since FreeRTOS resets stack when the scheduler is started
QueueHandle_t inferenceProcessQueue;
ProcessTaskParams taskParams[NUM_INFERENCE_TASKS];
} // namespace

__attribute__((section(".bss.tensor_arena"), aligned(16)))
uint8_t inferenceProcessTensorArena[NUM_INFERENCE_TASKS][arenaSize];

// Wrapper around InferenceProcess::InferenceJob. Adds responseQueue and status for FreeRTOS multi-tasking purposes.
struct xInferenceJob : public InferenceJob {
    QueueHandle_t responseQueue;
    bool status;

    xInferenceJob() : InferenceJob(), responseQueue(nullptr), status(false) {}
    xInferenceJob(const string &_name,
                  const DataPtr &_networkModel,
                  const vector<DataPtr> &_input,
                  const vector<DataPtr> &_output,
                  const vector<DataPtr> &_expectedOutput,
                  const size_t _numBytesToPrint,
                  const vector<uint8_t> &_pmuEventConfig,
                  const uint32_t _pmuCycleCounterEnable,
                  QueueHandle_t _queue) :
        InferenceJob(_name,
                     _networkModel,
                     _input,
                     _output,
                     _expectedOutput,
                     _numBytesToPrint,
                     _pmuEventConfig,
                     _pmuCycleCounterEnable),
        responseQueue(_queue), status(false) {}
};

/****************************************************************************
 * Mutex & Semaphore
 * Overrides weak-linked symbols in ethosu_driver.c to implement thread handling
 ****************************************************************************/

extern "C" {

void *ethosu_mutex_create(void) {
    SemaphoreHandle_t sem = xSemaphoreCreateMutex();
    if (sem == NULL) {
        printf("Error: Failed to create mutex.\n");
    }
    return (void *)sem;
}

void ethosu_mutex_lock(void *mutex) {
    SemaphoreHandle_t handle = reinterpret_cast<SemaphoreHandle_t>(mutex);
    if (xSemaphoreTake(handle, portMAX_DELAY) != pdTRUE) {
        printf("Error: Failed to lock mutex.\n");
    }
}

void ethosu_mutex_unlock(void *mutex) {
    SemaphoreHandle_t handle = reinterpret_cast<SemaphoreHandle_t>(mutex);
    if (xSemaphoreGive(handle) != pdTRUE) {
        printf("Error: Failed to unlock mutex.\n");
    }
}

void *ethosu_semaphore_create(void) {
    SemaphoreHandle_t sem = xSemaphoreCreateBinary();
    if (sem == NULL) {
        printf("Error: Failed to create semaphore.\n");
    }
    return (void *)sem;
}

void ethosu_semaphore_take(void *sem) {
    SemaphoreHandle_t handle = reinterpret_cast<SemaphoreHandle_t>(sem);
    if (xSemaphoreTake(handle, portMAX_DELAY) != pdTRUE) {
        printf("Error: Failed to take semaphore.\n");
    }
}

void ethosu_semaphore_give(void *sem) {
    SemaphoreHandle_t handle = reinterpret_cast<SemaphoreHandle_t>(sem);
    BaseType_t ret;

    if (xPortIsInsideInterrupt()) {
        ret = xSemaphoreGiveFromISR(handle, NULL);
        if (ret != pdTRUE) {
            printf("Error: Failed to give semaphore from ISR. ret - 0x%08x\n", ret);
        }
    } else {
        ret = xSemaphoreGive(handle);
        if (ret != pdTRUE) {
            /* The next line is in comment because xSemaphoreGive returns pdFAIL when
               calling it twice in a row during this application run.
               This failure doesn't affect the final result of the FreeRTOS application. */
            /* printf("Error: Failed to give semaphore. ret - 0x%08x\n", ret); */
        }
    }
}
}

/****************************************************************************
 * Functions
 ****************************************************************************/

//  inferenceProcessTask - Run jobs from queue with available driver
void inferenceProcessTask(void *pvParameters) {
    ProcessTaskParams params = *reinterpret_cast<ProcessTaskParams *>(pvParameters);

    class InferenceProcess inferenceProcess(params.tensorArena, params.arenaSize);

    for (;;) {
        xInferenceJob *xJob;

        if (xQueueReceive(params.queueHandle, &xJob, portMAX_DELAY) != pdPASS) {
            printf("Error: inferenceProcessTask failed in receive from Q.\n");
            exit(1);
        }

        bool status  = inferenceProcess.runJob(*xJob);
        xJob->status = status;
        if (xQueueSend(xJob->responseQueue, &xJob, portMAX_DELAY) != pdPASS) {
            printf("Error: inferenceProcessTask failed in send to Q.\n");
            exit(1);
        }
    }
    vTaskDelete(nullptr);
}

//  inferenceSenderTask - Creates NUM_INFERNECE_JOBS jobs, queues them, and then listens for completion status
void inferenceSenderTask(void *pvParameters) {
    int ret = 0;

    QueueHandle_t inferenceProcessQueue = reinterpret_cast<QueueHandle_t>(pvParameters);
    xInferenceJob jobs[NUM_JOBS_PER_TASK];

    // Create queue for response messages
    QueueHandle_t senderQueue = xQueueCreate(NUM_JOBS_PER_TASK, sizeof(xInferenceJob *));

    // Create and queue the jobs
    for (int n = 0; n < NUM_JOBS_PER_TASK; n++) {
        // Create job
        xInferenceJob *job = &jobs[n];
        job->name          = string(modelName);
        job->networkModel  = DataPtr(networkModelData, sizeof(networkModelData));
        job->input.push_back(DataPtr(inputData, sizeof(inputData)));
        job->expectedOutput.push_back(DataPtr(expectedOutputData, sizeof(expectedOutputData)));
        job->responseQueue = senderQueue;
        // Send job
        printf("inferenceSenderTask: Sending inference job: job=%p, name=%s\n", job, job->name.c_str());
        if (xQueueSend(inferenceProcessQueue, &job, portMAX_DELAY) != pdPASS) {
            printf("Error: inferenceSenderTask failed in send to Q.\n");
            exit(1);
        }
    }

    // Listen for completion status
    do {
        xInferenceJob *pSendJob;
        if (xQueueReceive(senderQueue, &pSendJob, portMAX_DELAY) != pdPASS) {
            printf("Error: inferenceSenderTask failed in receive from Q.\n");
            exit(1);
        }
        printf("inferenceSenderTask: received response for job: %s, status = %u\n",
               pSendJob->name.c_str(),
               pSendJob->status);

        totalCompletedJobs++;
        ret = (pSendJob->status);
        if (pSendJob->status != 0) {
            break;
        }
    } while (totalCompletedJobs < NUM_JOBS_PER_TASK * NUM_JOB_TASKS);

    vQueueDelete(senderQueue);

    printf("FreeRTOS application returning %d.\n", ret);
    exit(ret);
}

/****************************************************************************
 * Application
 ****************************************************************************/
// FreeRTOS application. NOTE: Additional tasks may require increased heap size.
int main() {
    BaseType_t ret;
    inferenceProcessQueue = xQueueCreate(NUM_JOBS_PER_TASK, sizeof(xInferenceJob *));

    // inferenceSender tasks to create and queue the jobs
    for (int n = 0; n < NUM_JOB_TASKS; n++) {
        ret = xTaskCreate(inferenceSenderTask, "inferenceSenderTask", 2 * 1024, inferenceProcessQueue, 2, nullptr);
        if (ret != pdPASS) {
            printf("Error: Failed to create 'inferenceSenderTask%i'\n", n);
            exit(1);
        }
    }

    // Create inferenceProcess tasks to process the queued jobs
    for (int n = 0; n < NUM_INFERENCE_TASKS; n++) {
        taskParams[n] = ProcessTaskParams(inferenceProcessQueue, inferenceProcessTensorArena[n], arenaSize);
        ret           = xTaskCreate(inferenceProcessTask, "inferenceProcessTask", 8 * 1024, &taskParams[n], 3, nullptr);
        if (ret != pdPASS) {
            printf("Error: Failed to create 'inferenceProcessTask%i'\n", n);
            exit(1);
        }
    }

    // Start Scheduler
    vTaskStartScheduler();

    printf("Error: FreeRTOS application failed to initialise.\n");
    exit(1);

    return 0;
}