From 641c09157ebe25590e0ba378f70f05fc7466b8b1 Mon Sep 17 00:00:00 2001
From: Kristofer Jonsson <kristofer.jonsson@arm.com>
Date: Mon, 31 Aug 2020 11:34:14 +0200
Subject: Moving message- and inference processes to core software

Change-Id: I76e94440402d58848116d06ff3fd1ed2000ac505
---
 .clang-format                                      |  61 +++++
 CMakeLists.txt                                     |   8 +-
 applications/CMakeLists.txt                        |  27 +++
 applications/inference_process/CMakeLists.txt      |  29 +++
 .../include/inference_process.hpp                  |  65 +++++
 .../inference_process/src/inference_process.cc     | 270 +++++++++++++++++++++
 applications/message_process/CMakeLists.txt        |  21 ++
 .../message_process/include/message_process.hpp    |  90 +++++++
 .../message_process/src/message_process.cc         | 214 ++++++++++++++++
 tensorflow.cmake                                   |   1 +
 10 files changed, 784 insertions(+), 2 deletions(-)
 create mode 100644 .clang-format
 create mode 100644 applications/CMakeLists.txt
 create mode 100644 applications/inference_process/CMakeLists.txt
 create mode 100644 applications/inference_process/include/inference_process.hpp
 create mode 100644 applications/inference_process/src/inference_process.cc
 create mode 100644 applications/message_process/CMakeLists.txt
 create mode 100644 applications/message_process/include/message_process.hpp
 create mode 100644 applications/message_process/src/message_process.cc

diff --git a/.clang-format b/.clang-format
new file mode 100644
index 0000000..6052b78
--- /dev/null
+++ b/.clang-format
@@ -0,0 +1,61 @@
+#
+# Copyright (c) 2020 Arm Limited. All rights reserved.
+#
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the License); you may
+# not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an AS IS BASIS, WITHOUT
+# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+---
+Language: Cpp
+BasedOnStyle: LLVM
+IndentWidth: 4
+ColumnLimit: 120
+AccessModifierOffset: -4
+PointerAlignment: Right
+
+AlignAfterOpenBracket: Align
+AlignConsecutiveAssignments: true
+AlignConsecutiveMacros: true
+AlignEscapedNewlines: Left
+AlignTrailingComments: true
+
+AllowAllParametersOfDeclarationOnNextLine: false
+AllowShortFunctionsOnASingleLine: Empty
+AllowShortBlocksOnASingleLine: true
+AlwaysBreakTemplateDeclarations: true
+
+BinPackArguments: false
+BinPackParameters: false
+
+BreakInheritanceList: AfterColon
+BreakConstructorInitializers: AfterColon
+BreakBeforeBraces: Custom
+
+BraceWrapping:
+    AfterClass: false
+    AfterControlStatement: false
+    AfterEnum: false
+    AfterFunction: false
+    AfterNamespace: false
+    AfterObjCDeclaration: true
+    AfterStruct: false
+    AfterUnion: false
+    AfterExternBlock: false
+    BeforeCatch: false
+    BeforeElse: false
+    IndentBraces: false
+    SplitEmptyFunction: false
+    SplitEmptyRecord: false
+    SplitEmptyNamespace: true
+---
diff --git a/CMakeLists.txt b/CMakeLists.txt
index a59231f..0487b44 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -27,10 +27,11 @@ project(core_software VERSION 0.0.1)
 # Setup paths
 set(CMSIS_PATH "${CMAKE_CURRENT_SOURCE_DIR}/cmsis" CACHE PATH "Path to CMSIS.")
 set(CORE_DRIVER_PATH "${CMAKE_CURRENT_SOURCE_DIR}/core_driver" CACHE PATH "Path to core driver.")
+set(LINUX_DRIVER_STACK_PATH "${CMAKE_CURRENT_SOURCE_DIR}/../linux_driver_stack" CACHE PATH "Path to Linux driver stack for Arm Ethos-U.")
 set(TENSORFLOW_PATH "${CMAKE_CURRENT_SOURCE_DIR}/tensorflow" CACHE PATH "Path to Tensorflow.")
 
 # Select accelerator for tensorflow
-set(CORE_SOFTWARE_ACCELERATOR "NPU" CACHE STRING "Set NPU backend for TFLU")
+set(CORE_SOFTWARE_ACCELERATOR "NPU" CACHE STRING "Set NPU backend for Tensorflow Lite for microcontrollers")
 set_property(CACHE CORE_SOFTWARE_ACCELERATOR PROPERTY STRINGS CPU CMSIS-NN NPU)
 
 # Define build options
@@ -54,5 +55,8 @@ endif()
 # Build Tensorflow library
 include(tensorflow.cmake)
 
+# Build applications
+add_subdirectory(applications)
+
 # Merge libraries into static library
-target_link_libraries(ethosu_core INTERFACE tflu cmsis_core cmsis_device)
+target_link_libraries(ethosu_core INTERFACE tflu cmsis_core cmsis_device ethosu_applications)
diff --git a/applications/CMakeLists.txt b/applications/CMakeLists.txt
new file mode 100644
index 0000000..22de770
--- /dev/null
+++ b/applications/CMakeLists.txt
@@ -0,0 +1,27 @@
+#
+# Copyright (c) 2019-2020 Arm Limited. All rights reserved.
+#
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the License); you may
+# not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an AS IS BASIS, WITHOUT
+# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+add_library(ethosu_applications INTERFACE)
+
+# Build message process
+add_subdirectory(message_process)
+target_link_libraries(ethosu_applications INTERFACE message_process)
+
+# Build inference process
+add_subdirectory(inference_process)
+target_link_libraries(ethosu_applications INTERFACE inference_process)
diff --git a/applications/inference_process/CMakeLists.txt b/applications/inference_process/CMakeLists.txt
new file mode 100644
index 0000000..ba9e44b
--- /dev/null
+++ b/applications/inference_process/CMakeLists.txt
@@ -0,0 +1,29 @@
+#
+# Copyright (c) 2019-2020 Arm Limited. All rights reserved.
+#
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the License); you may
+# not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an AS IS BASIS, WITHOUT
+# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+set(TR_ARENA_SIZE "200000" CACHE STRING "Arena size.")
+set(TR_PRINT_OUTPUT_BYTES "" CACHE STRING "Print output data.")
+
+add_library(inference_process STATIC)
+target_include_directories(inference_process PUBLIC include
+                           PRIVATE ${TENSORFLOW_PATH} ${TENSORFLOW_PATH}/tensorflow/lite/micro/tools/make/downloads/flatbuffers/include)
+target_link_libraries(inference_process PUBLIC tflu cmsis_core cmsis_device)
+target_sources(inference_process PRIVATE src/inference_process.cc)
+
+# Set arena size
+target_compile_definitions(inference_process PRIVATE TENSOR_ARENA_SIZE=${TR_ARENA_SIZE})
diff --git a/applications/inference_process/include/inference_process.hpp b/applications/inference_process/include/inference_process.hpp
new file mode 100644
index 0000000..a5fef2c
--- /dev/null
+++ b/applications/inference_process/include/inference_process.hpp
@@ -0,0 +1,65 @@
+/*
+ * Copyright (c) 2019-2020 Arm Limited. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include <queue>
+#include <stdlib.h>
+#include <string>
+
+namespace InferenceProcess {
+struct DataPtr {
+    void *data;
+    size_t size;
+
+    DataPtr(void *data = nullptr, size_t size = 0);
+};
+
+struct InferenceJob {
+    std::string name;
+    DataPtr networkModel;
+    DataPtr input;
+    DataPtr output;
+    DataPtr expectedOutput;
+    size_t numBytesToPrint;
+
+    InferenceJob();
+    InferenceJob(const std::string &name,
+                 const DataPtr &networkModel,
+                 const DataPtr &input,
+                 const DataPtr &output,
+                 const DataPtr &expectedOutput,
+                 size_t numBytesToPrint);
+};
+
+class InferenceProcess {
+public:
+    InferenceProcess();
+
+    bool push(const InferenceJob &job);
+    bool runJob(InferenceJob &job);
+    bool run(bool exitOnEmpty = true);
+
+private:
+    volatile uint32_t lock;
+    std::queue<InferenceJob> inferenceJobQueue;
+
+    void getLock();
+    void freeLock();
+};
+} // namespace InferenceProcess
diff --git a/applications/inference_process/src/inference_process.cc b/applications/inference_process/src/inference_process.cc
new file mode 100644
index 0000000..448e29b
--- /dev/null
+++ b/applications/inference_process/src/inference_process.cc
@@ -0,0 +1,270 @@
+/*
+ * Copyright (c) 2019-2020 Arm Limited. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "tensorflow/lite/micro/all_ops_resolver.h"
+#include "tensorflow/lite/micro/micro_error_reporter.h"
+#include "tensorflow/lite/micro/micro_interpreter.h"
+#include "tensorflow/lite/schema/schema_generated.h"
+#include "tensorflow/lite/version.h"
+
+#include "inference_process.hpp"
+
+#ifndef TENSOR_ARENA_SIZE
+#define TENSOR_ARENA_SIZE (1024)
+#endif
+
+__attribute__((section(".bss.NoInit"), aligned(16))) uint8_t inferenceProcessTensorArena[TENSOR_ARENA_SIZE];
+
+namespace {
+void print_output_data(TfLiteTensor *output, size_t bytesToPrint) {
+    const int numBytesToPrint = std::min(output->bytes, bytesToPrint);
+
+    int dims_size = output->dims->size;
+    printf("{\n");
+    printf("\"dims\": [%d,", dims_size);
+    for (int i = 0; i < output->dims->size - 1; ++i) {
+        printf("%d,", output->dims->data[i]);
+    }
+    printf("%d],\n", output->dims->data[dims_size - 1]);
+
+    printf("\"data_address\": \"%08x\",\n", (uint32_t)output->data.data);
+    printf("\"data\":\"");
+    for (int i = 0; i < numBytesToPrint - 1; ++i) {
+        if (i % 16 == 0 && i != 0) {
+            printf("\n");
+        }
+        printf("0x%02x,", output->data.uint8[i]);
+    }
+    printf("0x%02x\"\n", output->data.uint8[numBytesToPrint - 1]);
+    printf("}");
+}
+
+bool copyOutput(const TfLiteTensor &src, InferenceProcess::DataPtr &dst) {
+    if (dst.data == nullptr) {
+        return false;
+    }
+
+    if (src.bytes > dst.size) {
+        printf("Tensor size %d does not match output size %d.\n", src.bytes, dst.size);
+        return true;
+    }
+
+    std::copy(src.data.uint8, src.data.uint8 + src.bytes, static_cast<uint8_t *>(dst.data));
+    dst.size = src.bytes;
+
+    return false;
+}
+
+} // namespace
+
+namespace InferenceProcess {
+DataPtr::DataPtr(void *data, size_t size) : data(data), size(size) {}
+
+InferenceJob::InferenceJob() : numBytesToPrint(0) {}
+
+InferenceJob::InferenceJob(const std::string &name,
+                           const DataPtr &networkModel,
+                           const DataPtr &input,
+                           const DataPtr &output,
+                           const DataPtr &expectedOutput,
+                           size_t numBytesToPrint) :
+    name(name),
+    networkModel(networkModel), input(input), output(output), expectedOutput(expectedOutput),
+    numBytesToPrint(numBytesToPrint) {}
+
+InferenceProcess::InferenceProcess() : lock(0) {}
+
+// NOTE: Adding code for get_lock & free_lock with some corrections from
+// http://infocenter.arm.com/help/index.jsp?topic=/com.arm.doc.dai0321a/BIHEJCHB.html
+// TODO: check correctness?
+void InferenceProcess::getLock() {
+    int status = 0;
+
+    do {
+        // Wait until lock_var is free
+        while (__LDREXW(&lock) != 0)
+            ;
+
+        // Try to set lock_var
+        status = __STREXW(1, &lock);
+    } while (status != 0);
+
+    // Do not start any other memory access until memory barrier is completed
+    __DMB();
+}
+
+// TODO: check correctness?
+void InferenceProcess::freeLock() {
+    // Ensure memory operations completed before releasing lock
+    __DMB();
+
+    lock = 0;
+}
+
+bool InferenceProcess::push(const InferenceJob &job) {
+    getLock();
+    inferenceJobQueue.push(job);
+    freeLock();
+
+    return true;
+}
+
+bool InferenceProcess::runJob(InferenceJob &job) {
+    printf("Running inference job: %s\n", job.name.c_str());
+
+    tflite::MicroErrorReporter microErrorReporter;
+    tflite::ErrorReporter *reporter = &microErrorReporter;
+
+    const tflite::Model *model = ::tflite::GetModel(job.networkModel.data);
+    if (model->version() != TFLITE_SCHEMA_VERSION) {
+        printf("Model provided is schema version %d not equal "
+               "to supported version %d.\n",
+               model->version(),
+               TFLITE_SCHEMA_VERSION);
+        return true;
+    }
+
+    tflite::AllOpsResolver resolver;
+
+    tflite::MicroInterpreter interpreter(model, resolver, inferenceProcessTensorArena, TENSOR_ARENA_SIZE, reporter);
+
+    TfLiteStatus allocate_status = interpreter.AllocateTensors();
+    if (allocate_status != kTfLiteOk) {
+        printf("AllocateTensors failed for inference job: %s\n", job.name.c_str());
+        return true;
+    }
+
+    bool inputSizeError = false;
+    // TODO: adapt for multiple inputs
+    // for (unsigned int i = 0; i < interpreter.inputs_size(); ++i)
+    for (unsigned int i = 0; i < 1; ++i) {
+        TfLiteTensor *input = interpreter.input(i);
+        if (input->bytes != job.input.size) {
+            // If input sizes don't match, then we could end up copying
+            // uninitialized or partial data.
+            inputSizeError = true;
+            printf("Allocated size: %d for input: %d doesn't match the "
+                   "received input size: %d for job: %s\n",
+                   input->bytes,
+                   i,
+                   job.input.size,
+                   job.name.c_str());
+            return true;
+        }
+        memcpy(input->data.uint8, job.input.data, input->bytes);
+    }
+    if (inputSizeError) {
+        return true;
+    }
+
+    TfLiteStatus invoke_status = interpreter.Invoke();
+    if (invoke_status != kTfLiteOk) {
+        printf("Invoke failed for inference job: %s\n", job.name.c_str());
+        return true;
+    }
+
+    copyOutput(*interpreter.output(0), job.output);
+
+    if (job.numBytesToPrint > 0) {
+        // Print all of the output data, or the first NUM_BYTES_TO_PRINT bytes,
+        // whichever comes first as well as the output shape.
+        printf("num_of_outputs: %d\n", interpreter.outputs_size());
+        printf("output_begin\n");
+        printf("[\n");
+        for (unsigned int i = 0; i < interpreter.outputs_size(); i++) {
+            TfLiteTensor *output = interpreter.output(i);
+            print_output_data(output, job.numBytesToPrint);
+            if (i != interpreter.outputs_size() - 1) {
+                printf(",\n");
+            }
+        }
+        printf("]\n");
+        printf("output_end\n");
+    }
+
+    if (job.expectedOutput.data != nullptr) {
+        bool outputSizeError = false;
+        // TODO: adapt for multiple outputs
+        // for (unsigned int i = 0; i < interpreter.outputs_size(); i++)
+        for (unsigned int i = 0; i < 1; i++) {
+            TfLiteTensor *output = interpreter.output(i);
+            if (job.expectedOutput.size != output->bytes) {
+                // If the expected output & the actual output size doesn't
+                // match, we could end up accessing out-of-bound data.
+                // Also there's no need to compare the data, as we know
+                // that sizes differ.
+                outputSizeError = true;
+                printf("Output size: %d for output: %d doesn't match with "
+                       "the expected output size: %d for job: %s\n",
+                       output->bytes,
+                       i,
+                       job.expectedOutput.size,
+                       job.name.c_str());
+                return true;
+            }
+            for (unsigned int j = 0; j < output->bytes; ++j) {
+                if (output->data.uint8[j] != (static_cast<uint8_t *>(job.expectedOutput.data))[j]) {
+                    printf("Output data doesn't match expected output data at index: "
+                           "%d, expected: %02X actual: %02X",
+                           j,
+                           (static_cast<uint8_t *>(job.expectedOutput.data))[j],
+                           output->data.uint8[j]);
+                }
+            }
+        }
+        if (outputSizeError) {
+            return true;
+        }
+    }
+    printf("Finished running job: %s\n", job.name.c_str());
+
+    return false;
+}
+
+bool InferenceProcess::run(bool exitOnEmpty) {
+    bool anyJobFailed = false;
+
+    while (true) {
+        getLock();
+        bool empty = inferenceJobQueue.empty();
+        freeLock();
+
+        if (empty) {
+            if (exitOnEmpty) {
+                printf("Exit from InferenceProcess::run() on empty job queue!\n");
+                break;
+            }
+
+            continue;
+        }
+
+        getLock();
+        InferenceJob job = inferenceJobQueue.front();
+        inferenceJobQueue.pop();
+        freeLock();
+
+        if (runJob(job)) {
+            anyJobFailed = true;
+            continue;
+        }
+    }
+
+    return anyJobFailed;
+}
+
+} // namespace InferenceProcess
diff --git a/applications/message_process/CMakeLists.txt b/applications/message_process/CMakeLists.txt
new file mode 100644
index 0000000..51d514b
--- /dev/null
+++ b/applications/message_process/CMakeLists.txt
@@ -0,0 +1,21 @@
+#
+# Copyright (c) 2019-2020 Arm Limited. All rights reserved.
+#
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the License); you may
+# not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an AS IS BASIS, WITHOUT
+# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+add_library(message_process STATIC src/message_process.cc)
+target_include_directories(message_process PUBLIC include ${LINUX_DRIVER_STACK_PATH}/kernel)
+target_link_libraries(message_process PRIVATE cmsis_core inference_process)
diff --git a/applications/message_process/include/message_process.hpp b/applications/message_process/include/message_process.hpp
new file mode 100644
index 0000000..8044f7c
--- /dev/null
+++ b/applications/message_process/include/message_process.hpp
@@ -0,0 +1,90 @@
+/*
+ * Copyright (c) 2020 Arm Limited. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef MESSAGE_PROCESS_H
+#define MESSAGE_PROCESS_H
+
+#include <ethosu_core_interface.h>
+#include <inference_process.hpp>
+
+#include <cstddef>
+#include <cstdio>
+
+namespace MessageProcess {
+
+template <uint32_t SIZE>
+struct Queue {
+    ethosu_core_queue_header header;
+    uint8_t data[SIZE];
+
+    constexpr Queue() : header({SIZE, 0, 0}) {}
+
+    constexpr ethosu_core_queue *toQueue() {
+        return reinterpret_cast<ethosu_core_queue *>(&header);
+    }
+};
+
+class QueueImpl {
+public:
+    struct Vec {
+        const void *base;
+        size_t length;
+    };
+
+    QueueImpl(ethosu_core_queue &queue);
+
+    bool empty() const;
+    size_t available() const;
+    size_t capacity() const;
+    bool read(uint8_t *dst, uint32_t length);
+    bool write(const Vec *vec, size_t length);
+    bool write(const uint32_t type, const void *src = nullptr, uint32_t length = 0);
+
+    template <typename T>
+    bool read(T &dst) {
+        return read(reinterpret_cast<uint8_t *>(&dst), sizeof(dst));
+    }
+
+    template <typename T>
+    bool write(const uint32_t type, const T &src) {
+        return write(type, reinterpret_cast<const void *>(&src), sizeof(src));
+    }
+
+private:
+    ethosu_core_queue &queue;
+};
+
+class MessageProcess {
+public:
+    MessageProcess(ethosu_core_queue &in, ethosu_core_queue &out, InferenceProcess::InferenceProcess &inferenceProcess);
+
+    void run();
+    void handleIrq();
+    bool handleMessage();
+    void sendPong();
+    void sendInferenceRsp(uint64_t userArg, size_t ofmSize, bool failed);
+
+private:
+    QueueImpl queueIn;
+    QueueImpl queueOut;
+    InferenceProcess::InferenceProcess &inferenceProcess;
+};
+
+} // namespace MessageProcess
+
+#endif
diff --git a/applications/message_process/src/message_process.cc b/applications/message_process/src/message_process.cc
new file mode 100644
index 0000000..e61c648
--- /dev/null
+++ b/applications/message_process/src/message_process.cc
@@ -0,0 +1,214 @@
+/*
+ * Copyright (c) 2020 Arm Limited. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <message_process.hpp>
+
+#include <cstddef>
+#include <cstdio>
+
+namespace MessageProcess {
+
+QueueImpl::QueueImpl(ethosu_core_queue &queue) : queue(queue) {}
+
+bool QueueImpl::empty() const {
+    return queue.header.read == queue.header.write;
+}
+
+size_t QueueImpl::available() const {
+    size_t avail = queue.header.write - queue.header.read;
+
+    if (queue.header.read > queue.header.write) {
+        avail += queue.header.size;
+    }
+
+    return avail;
+}
+
+size_t QueueImpl::capacity() const {
+    return queue.header.size - available();
+}
+
+bool QueueImpl::read(uint8_t *dst, uint32_t length) {
+    const uint8_t *end = dst + length;
+    uint32_t rpos      = queue.header.read;
+
+    if (length > available()) {
+        return false;
+    }
+
+    while (dst < end) {
+        *dst++ = queue.data[rpos];
+        rpos   = (rpos + 1) % queue.header.size;
+    }
+
+    queue.header.read = rpos;
+
+#if defined(__DCACHE_PRESENT) && (__DCACHE_PRESENT == 1U)
+    SCB_CleanDCache();
+#endif
+
+    return true;
+}
+
+bool QueueImpl::write(const Vec *vec, size_t length) {
+    size_t total = 0;
+
+    for (size_t i = 0; i < length; i++) {
+        total += vec[i].length;
+    }
+
+    if (total > capacity()) {
+        return false;
+    }
+
+    uint32_t wpos = queue.header.write;
+
+    for (size_t i = 0; i < length; i++) {
+        const uint8_t *src = reinterpret_cast<const uint8_t *>(vec[i].base);
+        const uint8_t *end = src + vec[i].length;
+
+        while (src < end) {
+            queue.data[wpos] = *src++;
+            wpos             = (wpos + 1) % queue.header.size;
+        }
+    }
+
+    // Update the write position last
+    queue.header.write = wpos;
+
+#if defined(__DCACHE_PRESENT) && (__DCACHE_PRESENT == 1U)
+    SCB_CleanDCache();
+#endif
+
+    // TODO replace with mailbox driver APIs
+    volatile uint32_t *set = reinterpret_cast<volatile uint32_t *>(0x41A00014);
+    *set                   = 0x1;
+
+    return true;
+}
+
+bool QueueImpl::write(const uint32_t type, const void *src, uint32_t length) {
+    ethosu_core_msg msg = {type, length};
+    Vec vec[2]          = {{&msg, sizeof(msg)}, {src, length}};
+
+    return write(vec, 2);
+}
+
+MessageProcess::MessageProcess(ethosu_core_queue &in,
+                               ethosu_core_queue &out,
+                               InferenceProcess::InferenceProcess &inferenceProcess) :
+    queueIn(in),
+    queueOut(out), inferenceProcess(inferenceProcess) {}
+
+void MessageProcess::run() {
+    while (true) {
+        // Handle all messages in queue
+        while (handleMessage())
+            ;
+
+        // Wait for event
+        __WFE();
+    }
+}
+
+void MessageProcess::handleIrq() {
+    __SEV();
+}
+
+bool MessageProcess::handleMessage() {
+    ethosu_core_msg msg;
+    union {
+        ethosu_core_inference_req inferenceReq;
+        uint8_t data[1000];
+    } data;
+
+#if defined(__DCACHE_PRESENT) && (__DCACHE_PRESENT == 1U)
+    SCB_InvalidateDCache();
+#endif
+
+    // Read msg header
+    if (!queueIn.read(msg)) {
+        return false;
+    }
+
+    printf("Message. type=%u, length=%u\n", msg.type, msg.length);
+
+    // Read payload
+    if (!queueIn.read(data.data, msg.length)) {
+        printf("Failed to read payload.\n");
+        return false;
+    }
+
+    switch (msg.type) {
+    case ETHOSU_CORE_MSG_PING:
+        printf("Ping\n");
+        sendPong();
+        break;
+    case ETHOSU_CORE_MSG_INFERENCE_REQ: {
+        std::memcpy(&data.inferenceReq, data.data, sizeof(data.data));
+
+        ethosu_core_inference_req &req = data.inferenceReq;
+
+        printf("InferenceReq. network={0x%x, %u}, ifm={0x%x, %u}, ofm={0x%x, %u}\n",
+               req.network.ptr,
+               req.network.size,
+               req.ifm.ptr,
+               req.ifm.size,
+               req.ofm.ptr,
+               req.ofm.size,
+               req.user_arg);
+
+        InferenceProcess::DataPtr networkModel(reinterpret_cast<void *>(req.network.ptr), req.network.size);
+        InferenceProcess::DataPtr ifm(reinterpret_cast<void *>(req.ifm.ptr), req.ifm.size);
+        InferenceProcess::DataPtr ofm(reinterpret_cast<void *>(req.ofm.ptr), req.ofm.size);
+        InferenceProcess::DataPtr expectedOutput;
+        InferenceProcess::InferenceJob job("job", networkModel, ifm, ofm, expectedOutput, -1);
+
+        bool failed = inferenceProcess.runJob(job);
+
+        sendInferenceRsp(data.inferenceReq.user_arg, job.output.size, failed);
+        break;
+    }
+    default:
+        break;
+    }
+
+    return true;
+}
+
+void MessageProcess::sendPong() {
+    if (!queueOut.write(ETHOSU_CORE_MSG_PONG)) {
+        printf("Failed to write pong.\n");
+    }
+}
+
+void MessageProcess::sendInferenceRsp(uint64_t userArg, size_t ofmSize, bool failed) {
+    ethosu_core_inference_rsp rsp;
+
+    rsp.user_arg = userArg;
+    rsp.ofm_size = ofmSize;
+    rsp.status   = failed ? ETHOSU_CORE_STATUS_ERROR : ETHOSU_CORE_STATUS_OK;
+
+    printf(
+        "Sending inference response. userArg=0x%llx, ofm_size=%u, status=%u\n", rsp.user_arg, rsp.ofm_size, rsp.status);
+
+    if (!queueOut.write(ETHOSU_CORE_MSG_INFERENCE_RSP, rsp)) {
+        printf("Failed to write inference.\n");
+    }
+}
+} // namespace MessageProcess
diff --git a/tensorflow.cmake b/tensorflow.cmake
index f806ee4..8d9ae1d 100644
--- a/tensorflow.cmake
+++ b/tensorflow.cmake
@@ -67,6 +67,7 @@ add_library(tflu STATIC IMPORTED)
 set_property(TARGET tflu PROPERTY IMPORTED_LOCATION ${TFLU_GENDIR}/lib/libtensorflow-microlite.a)
 add_dependencies(tflu tflu_gen)
 target_include_directories(tflu INTERFACE ${TENSORFLOW_PATH})
+target_compile_definitions(tflu INTERFACE TF_LITE_STATIC_MEMORY)
 
 # Install libraries and header files
 get_target_property(TFLU_IMPORTED_LOCATION tflu IMPORTED_LOCATION)
-- 
cgit v1.2.1