Support inferences with multiple inputs and outputs

Update inference process apis to support inferences with multiple inputs and multiple outputs. Update message process to handle new inference request message with an array of input- and output buffers. Change-Id: Ide0897385a1d829f58edace79140d01d8e3b85a3
author: Kristofer Jonsson <kristofer.jonsson@arm.com> 2020-09-10 13:26:41 +0200
committer: Kristofer Jonsson <kristofer.jonsson@arm.com> 2020-09-15 13:27:50 +0200
commit: 72fa50bcf362643431c39642e5af30781714b2fc (patch)
tree: 4c8234b1f5a76d898991379fcdd6441eff3d18b0
parent: 98e379c83dd24619752e72e7aefdc15484813652 (diff)
download: ethos-u-core-software-72fa50bcf362643431c39642e5af30781714b2fc.tar.gz
4 files changed, 148 insertions, 82 deletions
diff --git a/applications/inference_process/include/inference_process.hpp b/applications/inference_process/include/inference_process.hpp
index a5fef2c..53b9331 100644
--- a/applications/inference_process/include/inference_process.hpp
+++ b/applications/inference_process/include/inference_process.hpp
@@ -21,6 +21,7 @@
 #include <queue>
 #include <stdlib.h>
 #include <string>
+#include <vector>
 
 namespace InferenceProcess {
 struct DataPtr {
@@ -33,17 +34,17 @@ struct DataPtr {
 struct InferenceJob {
     std::string name;
     DataPtr networkModel;
-    DataPtr input;
-    DataPtr output;
-    DataPtr expectedOutput;
+    std::vector<DataPtr> input;
+    std::vector<DataPtr> output;
+    std::vector<DataPtr> expectedOutput;
     size_t numBytesToPrint;
 
     InferenceJob();
     InferenceJob(const std::string &name,
                  const DataPtr &networkModel,
-                 const DataPtr &input,
-                 const DataPtr &output,
-                 const DataPtr &expectedOutput,
+                 const std::vector<DataPtr> &input,
+                 const std::vector<DataPtr> &output,
+                 const std::vector<DataPtr> &expectedOutput,
                  size_t numBytesToPrint);
 };
 
diff --git a/applications/inference_process/src/inference_process.cc b/applications/inference_process/src/inference_process.cc
index 448e29b..5807530 100644
--- a/applications/inference_process/src/inference_process.cc
+++ b/applications/inference_process/src/inference_process.cc
@@ -28,11 +28,13 @@
 #define TENSOR_ARENA_SIZE (1024)
 #endif
 
+using namespace std;
+
 __attribute__((section(".bss.NoInit"), aligned(16))) uint8_t inferenceProcessTensorArena[TENSOR_ARENA_SIZE];
 
 namespace {
 void print_output_data(TfLiteTensor *output, size_t bytesToPrint) {
-    const int numBytesToPrint = std::min(output->bytes, bytesToPrint);
+    const int numBytesToPrint = min(output->bytes, bytesToPrint);
 
     int dims_size = output->dims->size;
     printf("{\n");
@@ -64,7 +66,7 @@ bool copyOutput(const TfLiteTensor &src, InferenceProcess::DataPtr &dst) {
         return true;
     }
 
-    std::copy(src.data.uint8, src.data.uint8 + src.bytes, static_cast<uint8_t *>(dst.data));
+    copy(src.data.uint8, src.data.uint8 + src.bytes, static_cast<uint8_t *>(dst.data));
     dst.size = src.bytes;
 
     return false;
@@ -77,11 +79,11 @@ DataPtr::DataPtr(void *data, size_t size) : data(data), size(size) {}
 
 InferenceJob::InferenceJob() : numBytesToPrint(0) {}
 
-InferenceJob::InferenceJob(const std::string &name,
+InferenceJob::InferenceJob(const string &name,
                            const DataPtr &networkModel,
-                           const DataPtr &input,
-                           const DataPtr &output,
-                           const DataPtr &expectedOutput,
+                           const vector<DataPtr> &input,
+                           const vector<DataPtr> &output,
+                           const vector<DataPtr> &expectedOutput,
                            size_t numBytesToPrint) :
     name(name),
     networkModel(networkModel), input(input), output(output), expectedOutput(expectedOutput),
@@ -130,55 +132,80 @@ bool InferenceProcess::runJob(InferenceJob &job) {
     tflite::MicroErrorReporter microErrorReporter;
     tflite::ErrorReporter *reporter = &microErrorReporter;
 
+    // Get model handle and verify that the version is correct
     const tflite::Model *model = ::tflite::GetModel(job.networkModel.data);
     if (model->version() != TFLITE_SCHEMA_VERSION) {
-        printf("Model provided is schema version %d not equal "
-               "to supported version %d.\n",
+        printf("Model provided is schema version %d not equal to supported version %d.\n",
                model->version(),
                TFLITE_SCHEMA_VERSION);
         return true;
     }
 
+    // Create the TFL micro interpreter
     tflite::AllOpsResolver resolver;
-
     tflite::MicroInterpreter interpreter(model, resolver, inferenceProcessTensorArena, TENSOR_ARENA_SIZE, reporter);
 
+    // Allocate tensors
     TfLiteStatus allocate_status = interpreter.AllocateTensors();
     if (allocate_status != kTfLiteOk) {
         printf("AllocateTensors failed for inference job: %s\n", job.name.c_str());
         return true;
     }
 
-    bool inputSizeError = false;
-    // TODO: adapt for multiple inputs
-    // for (unsigned int i = 0; i < interpreter.inputs_size(); ++i)
-    for (unsigned int i = 0; i < 1; ++i) {
-        TfLiteTensor *input = interpreter.input(i);
-        if (input->bytes != job.input.size) {
-            // If input sizes don't match, then we could end up copying
-            // uninitialized or partial data.
-            inputSizeError = true;
-            printf("Allocated size: %d for input: %d doesn't match the "
-                   "received input size: %d for job: %s\n",
-                   input->bytes,
-                   i,
-                   job.input.size,
-                   job.name.c_str());
-            return true;
+    // Create a filtered list of non empty input tensors
+    vector<TfLiteTensor *> inputTensors;
+    for (size_t i = 0; i < interpreter.inputs_size(); ++i) {
+        TfLiteTensor *tensor = interpreter.input(i);
+
+        if (tensor->bytes > 0) {
+            inputTensors.push_back(tensor);
         }
-        memcpy(input->data.uint8, job.input.data, input->bytes);
     }
-    if (inputSizeError) {
+
+    if (job.input.size() != inputTensors.size()) {
+        printf("Number of input buffers does not match number of non empty network tensors. input=%zu, network=%zu\n",
+               job.input.size(),
+               inputTensors.size());
         return true;
     }
 
+    // Copy input data
+    for (size_t i = 0; i < inputTensors.size(); ++i) {
+        const DataPtr &input       = job.input[i];
+        const TfLiteTensor *tensor = inputTensors[i];
+
+        if (input.size != tensor->bytes) {
+            printf("Input size does not match network size. job=%s, index=%zu, input=%zu, network=%u\n",
+                   job.name.c_str(),
+                   i,
+                   input.size,
+                   tensor->bytes);
+            return true;
+        }
+
+        copy(static_cast<char *>(input.data), static_cast<char *>(input.data) + input.size, tensor->data.uint8);
+    }
+
+    // Run the inference
     TfLiteStatus invoke_status = interpreter.Invoke();
     if (invoke_status != kTfLiteOk) {
         printf("Invoke failed for inference job: %s\n", job.name.c_str());
         return true;
     }
 
-    copyOutput(*interpreter.output(0), job.output);
+    // Copy output data
+    if (job.output.size() > 0) {
+        if (interpreter.outputs_size() != job.output.size()) {
+            printf("Number of outputs mismatch. job=%zu, network=%u\n", job.output.size(), interpreter.outputs_size());
+            return true;
+        }
+
+        for (unsigned i = 0; i < interpreter.outputs_size(); ++i) {
+            if (copyOutput(*interpreter.output(i), job.output[i])) {
+                return true;
+            }
+        }
+    }
 
     if (job.numBytesToPrint > 0) {
         // Print all of the output data, or the first NUM_BYTES_TO_PRINT bytes,
@@ -186,6 +213,7 @@ bool InferenceProcess::runJob(InferenceJob &job) {
         printf("num_of_outputs: %d\n", interpreter.outputs_size());
         printf("output_begin\n");
         printf("[\n");
+
         for (unsigned int i = 0; i < interpreter.outputs_size(); i++) {
             TfLiteTensor *output = interpreter.output(i);
             print_output_data(output, job.numBytesToPrint);
@@ -193,44 +221,48 @@ bool InferenceProcess::runJob(InferenceJob &job) {
                 printf(",\n");
             }
         }
+
         printf("]\n");
         printf("output_end\n");
     }
 
-    if (job.expectedOutput.data != nullptr) {
-        bool outputSizeError = false;
-        // TODO: adapt for multiple outputs
-        // for (unsigned int i = 0; i < interpreter.outputs_size(); i++)
-        for (unsigned int i = 0; i < 1; i++) {
-            TfLiteTensor *output = interpreter.output(i);
-            if (job.expectedOutput.size != output->bytes) {
-                // If the expected output & the actual output size doesn't
-                // match, we could end up accessing out-of-bound data.
-                // Also there's no need to compare the data, as we know
-                // that sizes differ.
-                outputSizeError = true;
-                printf("Output size: %d for output: %d doesn't match with "
-                       "the expected output size: %d for job: %s\n",
-                       output->bytes,
-                       i,
-                       job.expectedOutput.size,
-                       job.name.c_str());
+    if (job.expectedOutput.size() > 0) {
+        if (job.expectedOutput.size() != interpreter.outputs_size()) {
+            printf("Expeded number of output tensors does not match network. job=%s, expected=%zu, network=%zu\n",
+                   job.name.c_str(),
+                   job.expectedOutput.size(),
+                   interpreter.outputs_size());
+            return true;
+        }
+
+        for (unsigned int i = 0; i < interpreter.outputs_size(); i++) {
+            const DataPtr &expected    = job.expectedOutput[i];
+            const TfLiteTensor *output = interpreter.output(i);
+
+            if (expected.size != output->bytes) {
+                printf(
+                    "Expected tensor size does not match network size. job=%s, index=%u, expected=%zu, network=%zu\n",
+                    job.name.c_str(),
+                    i,
+                    expected.size,
+                    output->bytes);
                 return true;
             }
+
             for (unsigned int j = 0; j < output->bytes; ++j) {
-                if (output->data.uint8[j] != (static_cast<uint8_t *>(job.expectedOutput.data))[j]) {
-                    printf("Output data doesn't match expected output data at index: "
-                           "%d, expected: %02X actual: %02X",
+                if (output->data.uint8[j] != static_cast<uint8_t *>(expected.data)[j]) {
+                    printf("Expected tensor size does not match network size. job=%s, index=%u, offset=%u, "
+                           "expected=%02x, network=%02x\n",
+                           job.name.c_str(),
+                           i,
                            j,
-                           (static_cast<uint8_t *>(job.expectedOutput.data))[j],
+                           static_cast<uint8_t *>(expected.data)[j],
                            output->data.uint8[j]);
                 }
             }
         }
-        if (outputSizeError) {
-            return true;
-        }
     }
+
     printf("Finished running job: %s\n", job.name.c_str());
 
     return false;
diff --git a/applications/message_process/include/message_process.hpp b/applications/message_process/include/message_process.hpp
index 8044f7c..51f474d 100644
--- a/applications/message_process/include/message_process.hpp
+++ b/applications/message_process/include/message_process.hpp
@@ -24,6 +24,7 @@
 
 #include <cstddef>
 #include <cstdio>
+#include <vector>
 
 namespace MessageProcess {
 
@@ -77,7 +78,7 @@ public:
     void handleIrq();
     bool handleMessage();
     void sendPong();
-    void sendInferenceRsp(uint64_t userArg, size_t ofmSize, bool failed);
+    void sendInferenceRsp(uint64_t userArg, std::vector<InferenceProcess::DataPtr> &ofm, bool failed);
 
 private:
     QueueImpl queueIn;
diff --git a/applications/message_process/src/message_process.cc b/applications/message_process/src/message_process.cc
index 2820275..b201f32 100644
--- a/applications/message_process/src/message_process.cc
+++ b/applications/message_process/src/message_process.cc
@@ -22,6 +22,9 @@
 #include <cstdio>
 #include <cstring>
 
+using namespace std;
+using namespace InferenceProcess;
+
 namespace MessageProcess {
 
 QueueImpl::QueueImpl(ethosu_core_queue &queue) : queue(queue) {}
@@ -112,7 +115,7 @@ bool QueueImpl::write(const uint32_t type, const void *src, uint32_t length) {
 
 MessageProcess::MessageProcess(ethosu_core_queue &in,
                                ethosu_core_queue &out,
-                               InferenceProcess::InferenceProcess &inferenceProcess) :
+                               ::InferenceProcess::InferenceProcess &inferenceProcess) :
     queueIn(in),
     queueOut(out), inferenceProcess(inferenceProcess) {}
 
@@ -165,24 +168,47 @@ bool MessageProcess::handleMessage() {
 
         ethosu_core_inference_req &req = data.inferenceReq;
 
-        printf("InferenceReq. network={0x%x, %u}, ifm={0x%x, %u}, ofm={0x%x, %u}\n",
-               req.network.ptr,
-               req.network.size,
-               req.ifm.ptr,
-               req.ifm.size,
-               req.ofm.ptr,
-               req.ofm.size,
-               req.user_arg);
-
-        InferenceProcess::DataPtr networkModel(reinterpret_cast<void *>(req.network.ptr), req.network.size);
-        InferenceProcess::DataPtr ifm(reinterpret_cast<void *>(req.ifm.ptr), req.ifm.size);
-        InferenceProcess::DataPtr ofm(reinterpret_cast<void *>(req.ofm.ptr), req.ofm.size);
-        InferenceProcess::DataPtr expectedOutput;
-        InferenceProcess::InferenceJob job("job", networkModel, ifm, ofm, expectedOutput, -1);
+        printf("InferenceReq. user_arg=0x%x, network={0x%x, %u}", req.user_arg, req.network.ptr, req.network.size);
+
+        printf(", ifm_count=%u, ifm=[", req.ifm_count);
+        for (uint32_t i = 0; i < req.ifm_count; ++i) {
+            if (i > 0) {
+                printf(", ");
+            }
+
+            printf("{0x%x, %u}", req.ifm[i].ptr, req.ifm[i].size);
+        }
+        printf("]");
+
+        printf(", ofm_count=%u, ofm=[", req.ofm_count);
+        for (uint32_t i = 0; i < req.ofm_count; ++i) {
+            if (i > 0) {
+                printf(", ");
+            }
+
+            printf("{0x%x, %u}", req.ofm[i].ptr, req.ofm[i].size);
+        }
+        printf("]\n");
+
+        DataPtr networkModel(reinterpret_cast<void *>(req.network.ptr), req.network.size);
+
+        vector<DataPtr> ifm;
+        for (uint32_t i = 0; i < req.ifm_count; ++i) {
+            ifm.push_back(DataPtr(reinterpret_cast<void *>(req.ifm[i].ptr), req.ifm[i].size));
+        }
+
+        vector<DataPtr> ofm;
+        for (uint32_t i = 0; i < req.ofm_count; ++i) {
+            ofm.push_back(DataPtr(reinterpret_cast<void *>(req.ofm[i].ptr), req.ofm[i].size));
+        }
+
+        vector<DataPtr> expectedOutput;
+
+        InferenceJob job("job", networkModel, ifm, ofm, expectedOutput, -1);
 
         bool failed = inferenceProcess.runJob(job);
 
-        sendInferenceRsp(data.inferenceReq.user_arg, job.output.size, failed);
+        sendInferenceRsp(data.inferenceReq.user_arg, job.output, failed);
         break;
     }
     default:
@@ -198,15 +224,21 @@ void MessageProcess::sendPong() {
     }
 }
 
-void MessageProcess::sendInferenceRsp(uint64_t userArg, size_t ofmSize, bool failed) {
+void MessageProcess::sendInferenceRsp(uint64_t userArg, vector<DataPtr> &ofm, bool failed) {
     ethosu_core_inference_rsp rsp;
 
-    rsp.user_arg = userArg;
-    rsp.ofm_size = ofmSize;
-    rsp.status   = failed ? ETHOSU_CORE_STATUS_ERROR : ETHOSU_CORE_STATUS_OK;
+    rsp.user_arg  = userArg;
+    rsp.ofm_count = ofm.size();
+    rsp.status    = failed ? ETHOSU_CORE_STATUS_ERROR : ETHOSU_CORE_STATUS_OK;
+
+    for (size_t i = 0; i < ofm.size(); ++i) {
+        rsp.ofm_size[i] = ofm[i].size;
+    }
 
-    printf(
-        "Sending inference response. userArg=0x%llx, ofm_size=%u, status=%u\n", rsp.user_arg, rsp.ofm_size, rsp.status);
+    printf("Sending inference response. userArg=0x%llx, ofm_count=%u, status=%u\n",
+           rsp.user_arg,
+           rsp.ofm_count,
+           rsp.status);
 
     if (!queueOut.write(ETHOSU_CORE_MSG_INFERENCE_RSP, rsp)) {
         printf("Failed to write inference.\n");
author	Kristofer Jonsson <kristofer.jonsson@arm.com>	2020-09-10 13:26:41 +0200
committer	Kristofer Jonsson <kristofer.jonsson@arm.com>	2020-09-15 13:27:50 +0200
commit	72fa50bcf362643431c39642e5af30781714b2fc (patch)
tree	4c8234b1f5a76d898991379fcdd6441eff3d18b0
parent	98e379c83dd24619752e72e7aefdc15484813652 (diff)
download	ethos-u-core-software-72fa50bcf362643431c39642e5af30781714b2fc.tar.gz