diff options
Diffstat (limited to 'applications')
4 files changed, 148 insertions, 82 deletions
diff --git a/applications/inference_process/include/inference_process.hpp b/applications/inference_process/include/inference_process.hpp index a5fef2c..53b9331 100644 --- a/applications/inference_process/include/inference_process.hpp +++ b/applications/inference_process/include/inference_process.hpp @@ -21,6 +21,7 @@ #include <queue> #include <stdlib.h> #include <string> +#include <vector> namespace InferenceProcess { struct DataPtr { @@ -33,17 +34,17 @@ struct DataPtr { struct InferenceJob { std::string name; DataPtr networkModel; - DataPtr input; - DataPtr output; - DataPtr expectedOutput; + std::vector<DataPtr> input; + std::vector<DataPtr> output; + std::vector<DataPtr> expectedOutput; size_t numBytesToPrint; InferenceJob(); InferenceJob(const std::string &name, const DataPtr &networkModel, - const DataPtr &input, - const DataPtr &output, - const DataPtr &expectedOutput, + const std::vector<DataPtr> &input, + const std::vector<DataPtr> &output, + const std::vector<DataPtr> &expectedOutput, size_t numBytesToPrint); }; diff --git a/applications/inference_process/src/inference_process.cc b/applications/inference_process/src/inference_process.cc index 448e29b..5807530 100644 --- a/applications/inference_process/src/inference_process.cc +++ b/applications/inference_process/src/inference_process.cc @@ -28,11 +28,13 @@ #define TENSOR_ARENA_SIZE (1024) #endif +using namespace std; + __attribute__((section(".bss.NoInit"), aligned(16))) uint8_t inferenceProcessTensorArena[TENSOR_ARENA_SIZE]; namespace { void print_output_data(TfLiteTensor *output, size_t bytesToPrint) { - const int numBytesToPrint = std::min(output->bytes, bytesToPrint); + const int numBytesToPrint = min(output->bytes, bytesToPrint); int dims_size = output->dims->size; printf("{\n"); @@ -64,7 +66,7 @@ bool copyOutput(const TfLiteTensor &src, InferenceProcess::DataPtr &dst) { return true; } - std::copy(src.data.uint8, src.data.uint8 + src.bytes, static_cast<uint8_t *>(dst.data)); + copy(src.data.uint8, src.data.uint8 + src.bytes, static_cast<uint8_t *>(dst.data)); dst.size = src.bytes; return false; @@ -77,11 +79,11 @@ DataPtr::DataPtr(void *data, size_t size) : data(data), size(size) {} InferenceJob::InferenceJob() : numBytesToPrint(0) {} -InferenceJob::InferenceJob(const std::string &name, +InferenceJob::InferenceJob(const string &name, const DataPtr &networkModel, - const DataPtr &input, - const DataPtr &output, - const DataPtr &expectedOutput, + const vector<DataPtr> &input, + const vector<DataPtr> &output, + const vector<DataPtr> &expectedOutput, size_t numBytesToPrint) : name(name), networkModel(networkModel), input(input), output(output), expectedOutput(expectedOutput), @@ -130,55 +132,80 @@ bool InferenceProcess::runJob(InferenceJob &job) { tflite::MicroErrorReporter microErrorReporter; tflite::ErrorReporter *reporter = µErrorReporter; + // Get model handle and verify that the version is correct const tflite::Model *model = ::tflite::GetModel(job.networkModel.data); if (model->version() != TFLITE_SCHEMA_VERSION) { - printf("Model provided is schema version %d not equal " - "to supported version %d.\n", + printf("Model provided is schema version %d not equal to supported version %d.\n", model->version(), TFLITE_SCHEMA_VERSION); return true; } + // Create the TFL micro interpreter tflite::AllOpsResolver resolver; - tflite::MicroInterpreter interpreter(model, resolver, inferenceProcessTensorArena, TENSOR_ARENA_SIZE, reporter); + // Allocate tensors TfLiteStatus allocate_status = interpreter.AllocateTensors(); if (allocate_status != kTfLiteOk) { printf("AllocateTensors failed for inference job: %s\n", job.name.c_str()); return true; } - bool inputSizeError = false; - // TODO: adapt for multiple inputs - // for (unsigned int i = 0; i < interpreter.inputs_size(); ++i) - for (unsigned int i = 0; i < 1; ++i) { - TfLiteTensor *input = interpreter.input(i); - if (input->bytes != job.input.size) { - // If input sizes don't match, then we could end up copying - // uninitialized or partial data. - inputSizeError = true; - printf("Allocated size: %d for input: %d doesn't match the " - "received input size: %d for job: %s\n", - input->bytes, - i, - job.input.size, - job.name.c_str()); - return true; + // Create a filtered list of non empty input tensors + vector<TfLiteTensor *> inputTensors; + for (size_t i = 0; i < interpreter.inputs_size(); ++i) { + TfLiteTensor *tensor = interpreter.input(i); + + if (tensor->bytes > 0) { + inputTensors.push_back(tensor); } - memcpy(input->data.uint8, job.input.data, input->bytes); } - if (inputSizeError) { + + if (job.input.size() != inputTensors.size()) { + printf("Number of input buffers does not match number of non empty network tensors. input=%zu, network=%zu\n", + job.input.size(), + inputTensors.size()); return true; } + // Copy input data + for (size_t i = 0; i < inputTensors.size(); ++i) { + const DataPtr &input = job.input[i]; + const TfLiteTensor *tensor = inputTensors[i]; + + if (input.size != tensor->bytes) { + printf("Input size does not match network size. job=%s, index=%zu, input=%zu, network=%u\n", + job.name.c_str(), + i, + input.size, + tensor->bytes); + return true; + } + + copy(static_cast<char *>(input.data), static_cast<char *>(input.data) + input.size, tensor->data.uint8); + } + + // Run the inference TfLiteStatus invoke_status = interpreter.Invoke(); if (invoke_status != kTfLiteOk) { printf("Invoke failed for inference job: %s\n", job.name.c_str()); return true; } - copyOutput(*interpreter.output(0), job.output); + // Copy output data + if (job.output.size() > 0) { + if (interpreter.outputs_size() != job.output.size()) { + printf("Number of outputs mismatch. job=%zu, network=%u\n", job.output.size(), interpreter.outputs_size()); + return true; + } + + for (unsigned i = 0; i < interpreter.outputs_size(); ++i) { + if (copyOutput(*interpreter.output(i), job.output[i])) { + return true; + } + } + } if (job.numBytesToPrint > 0) { // Print all of the output data, or the first NUM_BYTES_TO_PRINT bytes, @@ -186,6 +213,7 @@ bool InferenceProcess::runJob(InferenceJob &job) { printf("num_of_outputs: %d\n", interpreter.outputs_size()); printf("output_begin\n"); printf("[\n"); + for (unsigned int i = 0; i < interpreter.outputs_size(); i++) { TfLiteTensor *output = interpreter.output(i); print_output_data(output, job.numBytesToPrint); @@ -193,44 +221,48 @@ bool InferenceProcess::runJob(InferenceJob &job) { printf(",\n"); } } + printf("]\n"); printf("output_end\n"); } - if (job.expectedOutput.data != nullptr) { - bool outputSizeError = false; - // TODO: adapt for multiple outputs - // for (unsigned int i = 0; i < interpreter.outputs_size(); i++) - for (unsigned int i = 0; i < 1; i++) { - TfLiteTensor *output = interpreter.output(i); - if (job.expectedOutput.size != output->bytes) { - // If the expected output & the actual output size doesn't - // match, we could end up accessing out-of-bound data. - // Also there's no need to compare the data, as we know - // that sizes differ. - outputSizeError = true; - printf("Output size: %d for output: %d doesn't match with " - "the expected output size: %d for job: %s\n", - output->bytes, - i, - job.expectedOutput.size, - job.name.c_str()); + if (job.expectedOutput.size() > 0) { + if (job.expectedOutput.size() != interpreter.outputs_size()) { + printf("Expeded number of output tensors does not match network. job=%s, expected=%zu, network=%zu\n", + job.name.c_str(), + job.expectedOutput.size(), + interpreter.outputs_size()); + return true; + } + + for (unsigned int i = 0; i < interpreter.outputs_size(); i++) { + const DataPtr &expected = job.expectedOutput[i]; + const TfLiteTensor *output = interpreter.output(i); + + if (expected.size != output->bytes) { + printf( + "Expected tensor size does not match network size. job=%s, index=%u, expected=%zu, network=%zu\n", + job.name.c_str(), + i, + expected.size, + output->bytes); return true; } + for (unsigned int j = 0; j < output->bytes; ++j) { - if (output->data.uint8[j] != (static_cast<uint8_t *>(job.expectedOutput.data))[j]) { - printf("Output data doesn't match expected output data at index: " - "%d, expected: %02X actual: %02X", + if (output->data.uint8[j] != static_cast<uint8_t *>(expected.data)[j]) { + printf("Expected tensor size does not match network size. job=%s, index=%u, offset=%u, " + "expected=%02x, network=%02x\n", + job.name.c_str(), + i, j, - (static_cast<uint8_t *>(job.expectedOutput.data))[j], + static_cast<uint8_t *>(expected.data)[j], output->data.uint8[j]); } } } - if (outputSizeError) { - return true; - } } + printf("Finished running job: %s\n", job.name.c_str()); return false; diff --git a/applications/message_process/include/message_process.hpp b/applications/message_process/include/message_process.hpp index 8044f7c..51f474d 100644 --- a/applications/message_process/include/message_process.hpp +++ b/applications/message_process/include/message_process.hpp @@ -24,6 +24,7 @@ #include <cstddef> #include <cstdio> +#include <vector> namespace MessageProcess { @@ -77,7 +78,7 @@ public: void handleIrq(); bool handleMessage(); void sendPong(); - void sendInferenceRsp(uint64_t userArg, size_t ofmSize, bool failed); + void sendInferenceRsp(uint64_t userArg, std::vector<InferenceProcess::DataPtr> &ofm, bool failed); private: QueueImpl queueIn; diff --git a/applications/message_process/src/message_process.cc b/applications/message_process/src/message_process.cc index 2820275..b201f32 100644 --- a/applications/message_process/src/message_process.cc +++ b/applications/message_process/src/message_process.cc @@ -22,6 +22,9 @@ #include <cstdio> #include <cstring> +using namespace std; +using namespace InferenceProcess; + namespace MessageProcess { QueueImpl::QueueImpl(ethosu_core_queue &queue) : queue(queue) {} @@ -112,7 +115,7 @@ bool QueueImpl::write(const uint32_t type, const void *src, uint32_t length) { MessageProcess::MessageProcess(ethosu_core_queue &in, ethosu_core_queue &out, - InferenceProcess::InferenceProcess &inferenceProcess) : + ::InferenceProcess::InferenceProcess &inferenceProcess) : queueIn(in), queueOut(out), inferenceProcess(inferenceProcess) {} @@ -165,24 +168,47 @@ bool MessageProcess::handleMessage() { ethosu_core_inference_req &req = data.inferenceReq; - printf("InferenceReq. network={0x%x, %u}, ifm={0x%x, %u}, ofm={0x%x, %u}\n", - req.network.ptr, - req.network.size, - req.ifm.ptr, - req.ifm.size, - req.ofm.ptr, - req.ofm.size, - req.user_arg); - - InferenceProcess::DataPtr networkModel(reinterpret_cast<void *>(req.network.ptr), req.network.size); - InferenceProcess::DataPtr ifm(reinterpret_cast<void *>(req.ifm.ptr), req.ifm.size); - InferenceProcess::DataPtr ofm(reinterpret_cast<void *>(req.ofm.ptr), req.ofm.size); - InferenceProcess::DataPtr expectedOutput; - InferenceProcess::InferenceJob job("job", networkModel, ifm, ofm, expectedOutput, -1); + printf("InferenceReq. user_arg=0x%x, network={0x%x, %u}", req.user_arg, req.network.ptr, req.network.size); + + printf(", ifm_count=%u, ifm=[", req.ifm_count); + for (uint32_t i = 0; i < req.ifm_count; ++i) { + if (i > 0) { + printf(", "); + } + + printf("{0x%x, %u}", req.ifm[i].ptr, req.ifm[i].size); + } + printf("]"); + + printf(", ofm_count=%u, ofm=[", req.ofm_count); + for (uint32_t i = 0; i < req.ofm_count; ++i) { + if (i > 0) { + printf(", "); + } + + printf("{0x%x, %u}", req.ofm[i].ptr, req.ofm[i].size); + } + printf("]\n"); + + DataPtr networkModel(reinterpret_cast<void *>(req.network.ptr), req.network.size); + + vector<DataPtr> ifm; + for (uint32_t i = 0; i < req.ifm_count; ++i) { + ifm.push_back(DataPtr(reinterpret_cast<void *>(req.ifm[i].ptr), req.ifm[i].size)); + } + + vector<DataPtr> ofm; + for (uint32_t i = 0; i < req.ofm_count; ++i) { + ofm.push_back(DataPtr(reinterpret_cast<void *>(req.ofm[i].ptr), req.ofm[i].size)); + } + + vector<DataPtr> expectedOutput; + + InferenceJob job("job", networkModel, ifm, ofm, expectedOutput, -1); bool failed = inferenceProcess.runJob(job); - sendInferenceRsp(data.inferenceReq.user_arg, job.output.size, failed); + sendInferenceRsp(data.inferenceReq.user_arg, job.output, failed); break; } default: @@ -198,15 +224,21 @@ void MessageProcess::sendPong() { } } -void MessageProcess::sendInferenceRsp(uint64_t userArg, size_t ofmSize, bool failed) { +void MessageProcess::sendInferenceRsp(uint64_t userArg, vector<DataPtr> &ofm, bool failed) { ethosu_core_inference_rsp rsp; - rsp.user_arg = userArg; - rsp.ofm_size = ofmSize; - rsp.status = failed ? ETHOSU_CORE_STATUS_ERROR : ETHOSU_CORE_STATUS_OK; + rsp.user_arg = userArg; + rsp.ofm_count = ofm.size(); + rsp.status = failed ? ETHOSU_CORE_STATUS_ERROR : ETHOSU_CORE_STATUS_OK; + + for (size_t i = 0; i < ofm.size(); ++i) { + rsp.ofm_size[i] = ofm[i].size; + } - printf( - "Sending inference response. userArg=0x%llx, ofm_size=%u, status=%u\n", rsp.user_arg, rsp.ofm_size, rsp.status); + printf("Sending inference response. userArg=0x%llx, ofm_count=%u, status=%u\n", + rsp.user_arg, + rsp.ofm_count, + rsp.status); if (!queueOut.write(ETHOSU_CORE_MSG_INFERENCE_RSP, rsp)) { printf("Failed to write inference.\n"); |