From aa5e1f6c960b8a88f389ba70dd200d6dacd95a03 Mon Sep 17 00:00:00 2001
From: Kshitij Sisodia <kshitij.sisodia@arm.com>
Date: Fri, 24 Sep 2021 14:42:08 +0100
Subject: MLECO-2345: Adding dynamic load support for FVPs

With this patch, the generic inference runner use-case can be
configured to accept the model tflite file at run-time via
the FVP's command line parameters. Same is true for the IFM
and the inference results can be dumped out too.

NOTE: this change is only for supporting the FVP, the FPGA
implementation will not allow additional loading for the
changes in this patch to be useful.

Change-Id: I1318bd5b0cfb7bb635ced6fe58d22c3e401d2547
---
 .../bare-metal/bsp/mem_layout/mps3-sse-300.ld      |   7 +
 .../bare-metal/bsp/mem_layout/mps3-sse-300.sct     |  24 +++
 source/application/main/UseCaseCommonUtils.cc      |  13 +-
 source/use_case/inference_runner/src/TestModel.cc  |  40 +++--
 .../inference_runner/src/UseCaseHandler.cc         | 179 ++++++++++++++-------
 source/use_case/inference_runner/usecase.cmake     |  54 +++++--
 source/use_case/vww/usecase.cmake                  |  10 +-
 7 files changed, 247 insertions(+), 80 deletions(-)

(limited to 'source')

diff --git a/source/application/hal/platforms/bare-metal/bsp/mem_layout/mps3-sse-300.ld b/source/application/hal/platforms/bare-metal/bsp/mem_layout/mps3-sse-300.ld
index ceaff7d..e5c2a14 100644
--- a/source/application/hal/platforms/bare-metal/bsp/mem_layout/mps3-sse-300.ld
+++ b/source/application/hal/platforms/bare-metal/bsp/mem_layout/mps3-sse-300.ld
@@ -26,6 +26,13 @@ MEMORY
   BRAM  (rwx) : ORIGIN = 0x11000000, LENGTH = 0x00200000
   SRAM  (rwx) : ORIGIN = 0x31000000, LENGTH = 0x00400000
   DDR   (rwx) : ORIGIN = 0x70000000, LENGTH = 0x02000000
+
+  /* Dynamic load regions declared for use by FVP only
+   * These regions are mentioned in the CMake subsystem profile.
+   * Do not change the addresses here in isolation. */
+  DDR_dynamic_model (rx) : ORIGIN = 0x90000000, LENGTH = 0x02000000
+  DDR_dynamic_ifm   (rx) : ORIGIN = 0x92000000, LENGTH = 0x01000000
+  DDR_dynamic_ofm   (rx) : ORIGIN = 0x93000000, LENGTH = 0x01000000
 }
 
 /* Linker script to place sections and symbol values. Should be used together
diff --git a/source/application/hal/platforms/bare-metal/bsp/mem_layout/mps3-sse-300.sct b/source/application/hal/platforms/bare-metal/bsp/mem_layout/mps3-sse-300.sct
index dd53a57..4760875 100644
--- a/source/application/hal/platforms/bare-metal/bsp/mem_layout/mps3-sse-300.sct
+++ b/source/application/hal/platforms/bare-metal/bsp/mem_layout/mps3-sse-300.sct
@@ -115,4 +115,28 @@ LOAD_REGION_1       0x70000000                  0x02000000
     ;-----------------------------------------------------
     ARM_LIB_HEAP    0x11040000 EMPTY ALIGN 8    0x000F0000
     {}
+
+    ;-----------------------------------------------------
+    ; The following regions are for use by the FVP to
+    ; allow loading or dumping of dynamic data into or
+    ; from the memory. These regions are mentioned in
+    ; the CMake subsystem profile. Do not change the
+    ; addresses and sizes below in isolation.
+    ;-----------------------------------------------------
+    ; 32 MiB of model space for run-time load of model
+    ;-----------------------------------------------------
+    runtime_model   0x90000000 EMPTY ALIGN 16   0x02000000
+    {}
+
+    ;-----------------------------------------------------
+    ; 16 MiB of IFM space for run-time loading (FVP only)
+    ;-----------------------------------------------------
+    runtime_ifm     0x92000000 EMPTY ALIGN 16   0x01000000
+    {}
+
+    ;-----------------------------------------------------
+    ; 16 MiB of OFM space for run-time loading (FVP only)
+    ;-----------------------------------------------------
+    runtime_ofm     0x93000000 EMPTY ALIGN 16   0x01000000
+    {}
 }
diff --git a/source/application/main/UseCaseCommonUtils.cc b/source/application/main/UseCaseCommonUtils.cc
index 9834475..a99e05d 100644
--- a/source/application/main/UseCaseCommonUtils.cc
+++ b/source/application/main/UseCaseCommonUtils.cc
@@ -136,6 +136,7 @@ bool image::PresentInferenceResult(hal_platform &platform,
 
 void IncrementAppCtxIfmIdx(arm::app::ApplicationContext& ctx, std::string useCase)
 {
+#if NUMBER_OF_FILES > 0
     auto curImIdx = ctx.Get<uint32_t>(useCase);
 
     if (curImIdx + 1 >= NUMBER_OF_FILES) {
@@ -144,10 +145,15 @@ void IncrementAppCtxIfmIdx(arm::app::ApplicationContext& ctx, std::string useCas
     }
     ++curImIdx;
     ctx.Set<uint32_t>(useCase, curImIdx);
+#else /* NUMBER_OF_FILES > 0 */
+    UNUSED(ctx);
+    UNUSED(useCase);
+#endif /* NUMBER_OF_FILES > 0 */
 }
 
 bool SetAppCtxIfmIdx(arm::app::ApplicationContext& ctx, uint32_t idx, std::string ctxIfmName)
 {
+#if NUMBER_OF_FILES > 0
     if (idx >= NUMBER_OF_FILES) {
         printf_err("Invalid idx %" PRIu32 " (expected less than %u)\n",
                    idx, NUMBER_OF_FILES);
@@ -155,9 +161,14 @@ bool SetAppCtxIfmIdx(arm::app::ApplicationContext& ctx, uint32_t idx, std::strin
     }
     ctx.Set<uint32_t>(ctxIfmName, idx);
     return true;
+#else /* NUMBER_OF_FILES > 0 */
+    UNUSED(ctx);
+    UNUSED(idx);
+    UNUSED(ctxIfmName);
+    return false;
+#endif /* NUMBER_OF_FILES > 0 */
 }
 
-
 namespace arm {
 namespace app {
 
diff --git a/source/use_case/inference_runner/src/TestModel.cc b/source/use_case/inference_runner/src/TestModel.cc
index 4512a9b..274790f 100644
--- a/source/use_case/inference_runner/src/TestModel.cc
+++ b/source/use_case/inference_runner/src/TestModel.cc
@@ -23,14 +23,34 @@ const tflite::AllOpsResolver& arm::app::TestModel::GetOpResolver()
     return this->m_opResolver;
 }
 
-extern uint8_t* GetModelPointer();
-const uint8_t* arm::app::TestModel::ModelPointer()
-{
-    return GetModelPointer();
-}
+#if defined(DYNAMIC_MODEL_BASE) && defined(DYNAMIC_MODEL_SIZE)
 
-extern size_t GetModelLen();
-size_t arm::app::TestModel::ModelSize()
-{
-    return GetModelLen();
-}
\ No newline at end of file
+    const uint8_t* arm::app::TestModel::ModelPointer()
+    {
+        info("Model pointer: 0x%08x\n", DYNAMIC_MODEL_BASE);
+        return reinterpret_cast<uint8_t *>(DYNAMIC_MODEL_BASE);
+    }
+
+    size_t arm::app::TestModel::ModelSize()
+    {
+        /* TODO: Can we get the actual model size here somehow?
+         * Currently we return the reserved space. It is possible to do
+         * so by reading the memory pattern but it will not be reliable. */
+        return static_cast<size_t>(DYNAMIC_MODEL_SIZE);
+    }
+
+#else /* defined(DYNAMIC_MODEL_BASE) && defined(DYNAMIC_MODEL_SIZE) */
+
+    extern uint8_t* GetModelPointer();
+    const uint8_t* arm::app::TestModel::ModelPointer()
+    {
+        return GetModelPointer();
+    }
+
+    extern size_t GetModelLen();
+    size_t arm::app::TestModel::ModelSize()
+    {
+        return GetModelLen();
+    }
+
+#endif /* defined(DYNAMIC_MODEL_BASE) && defined(DYNAMIC_MODEL_SIZE) */
diff --git a/source/use_case/inference_runner/src/UseCaseHandler.cc b/source/use_case/inference_runner/src/UseCaseHandler.cc
index b98b1c5..66b7042 100644
--- a/source/use_case/inference_runner/src/UseCaseHandler.cc
+++ b/source/use_case/inference_runner/src/UseCaseHandler.cc
@@ -25,81 +25,150 @@
 namespace arm {
 namespace app {
 
-    bool RunInferenceHandler(ApplicationContext& ctx)
-    {
-        auto& platform = ctx.Get<hal_platform&>("platform");
-        auto& profiler = ctx.Get<Profiler&>("profiler");
-        auto& model = ctx.Get<Model&>("model");
-
-        constexpr uint32_t dataPsnTxtInfStartX = 150;
-        constexpr uint32_t dataPsnTxtInfStartY = 40;
-
-        if (!model.IsInited()) {
-            printf_err("Model is not initialised! Terminating processing.\n");
-            return false;
-        }
+static void PopulateInputTensor(const Model& model)
+{
+    const size_t numInputs = model.GetNumInputs();
 
-        const size_t numInputs = model.GetNumInputs();
+#if defined(DYNAMIC_IFM_BASE) && defined(DYNAMIC_IFM_SIZE)
+    size_t curInputIdx = 0;
+#endif /* defined(DYNAMIC_IFM_BASE) && defined(DYNAMIC_IFM_SIZE) */
 
-#if VERIFY_TEST_OUTPUT
-        info("Initial input tensors values:\n");
-        for (size_t inputIndex = 0; inputIndex < model.GetNumInputs(); inputIndex++) {
-            arm::app::DumpTensor(model.GetInputTensor(inputIndex));
-        }
-        info("Initial output tensors values:\n");
-        for (size_t outputIndex = 0; outputIndex < model.GetNumOutputs(); outputIndex++) {
-            arm::app::DumpTensor(model.GetOutputTensor(outputIndex));
-        }
-#endif /* VERIFY_TEST_OUTPUT */
+    /* Populate each input tensor with random data. */
+    for (size_t inputIndex = 0; inputIndex < numInputs; inputIndex++) {
 
-        /* Populate each input tensor with random data. */
-        for (size_t inputIndex = 0; inputIndex < numInputs; inputIndex++) {
+        TfLiteTensor* inputTensor = model.GetInputTensor(inputIndex);
 
-            TfLiteTensor* inputTensor = model.GetInputTensor(inputIndex);
+        debug("Populating input tensor %zu@%p\n", inputIndex, inputTensor);
+        debug("Total input size to be populated: %zu\n", inputTensor->bytes);
 
-            debug("Populating input tensor %zu@%p\n", inputIndex, inputTensor);
-            debug("Total input size to be populated: %zu\n", inputTensor->bytes);
+        if (inputTensor->bytes > 0) {
 
-            /* Create a random input. */
-            if (inputTensor->bytes > 0) {
+            uint8_t* tData = tflite::GetTensorData<uint8_t>(inputTensor);
 
-                uint8_t* tData = tflite::GetTensorData<uint8_t>(inputTensor);
+#if defined(DYNAMIC_IFM_BASE) && defined(DYNAMIC_IFM_SIZE)
+            if (curInputIdx + inputTensor->bytes > DYNAMIC_IFM_SIZE) {
+                printf_err("IFM reserved buffer size insufficient\n");
+                return;
+            }
+            memcpy(tData, reinterpret_cast<void *>(DYNAMIC_IFM_BASE + curInputIdx),
+                    inputTensor->bytes);
+            curInputIdx += inputTensor->bytes;
+#else /* defined(DYNAMIC_IFM_BASE) */
+            /* Create a random input. */
+            for (size_t j = 0; j < inputTensor->bytes; ++j) {
+                tData[j] = static_cast<uint8_t>(std::rand() & 0xFF);
+            }
+#endif /* defined(DYNAMIC_IFM_BASE) && defined(DYNAMIC_IFM_SIZE) */
+        }
+    }
 
-                for (size_t j = 0; j < inputTensor->bytes; ++j) {
-                    tData[j] = static_cast<uint8_t>(std::rand() & 0xFF);
-                }
+#if defined(DYNAMIC_IFM_BASE)
+    info("%d input tensor/s populated with %d bytes with data read from 0x%08x\n",
+        numInputs, curInputIdx, DYNAMIC_IFM_BASE);
+#endif /* defined(DYNAMIC_IFM_BASE) */
+}
+
+#if defined (DYNAMIC_OFM_BASE) && defined(DYNAMIC_OFM_SIZE)
+static void PopulateDynamicOfm(const Model& model)
+{
+    /* Dump the output to a known memory location */
+    const size_t numOutputs = model.GetNumOutputs();
+    size_t curCopyIdx = 0;
+    uint8_t* const dstPtr = reinterpret_cast<uint8_t *>(DYNAMIC_OFM_BASE);
+
+    for (size_t outputIdx = 0; outputIdx < numOutputs; ++outputIdx) {
+        TfLiteTensor* outputTensor = model.GetOutputTensor(outputIdx);
+        uint8_t* const tData = tflite::GetTensorData<uint8_t>(outputTensor);
+
+        if (tData && outputTensor->bytes > 0) {
+            if (curCopyIdx + outputTensor->bytes > DYNAMIC_OFM_SIZE) {
+                printf_err("OFM reserved buffer size insufficient\n");
+                return;
             }
+            memcpy(dstPtr + curCopyIdx, tData, outputTensor->bytes);
+            curCopyIdx += outputTensor->bytes;
         }
+    }
 
-        /* Strings for presentation/logging. */
-        std::string str_inf{"Running inference... "};
+    info("%d output tensor/s worth %d bytes copied to 0x%08x\n",
+        numOutputs, curCopyIdx, DYNAMIC_OFM_BASE);
+}
+#endif /* defined (DYNAMIC_OFM_BASE) && defined(DYNAMIC_OFM_SIZE) */
 
-        /* Display message on the LCD - inference running. */
-        platform.data_psn->present_data_text(
-                                str_inf.c_str(), str_inf.size(),
-                                dataPsnTxtInfStartX, dataPsnTxtInfStartY, 0);
+#if VERIFY_TEST_OUTPUT
+static void DumpInputs(const Model& model, const char* message)
+{
+    info("%s\n", message);
+    for (size_t inputIndex = 0; inputIndex < model.GetNumInputs(); inputIndex++) {
+        arm::app::DumpTensor(model.GetInputTensor(inputIndex));
+    }
+}
 
-        if (!RunInference(model, profiler)) {
-            return false;
-        }
+static void DumpOutputs(const Model& model, const char* message)
+{
+    info("%s\n", message);
+    for (size_t outputIndex = 0; outputIndex < model.GetNumOutputs(); outputIndex++) {
+        arm::app::DumpTensor(model.GetOutputTensor(outputIndex));
+    }
+}
+#endif /* VERIFY_TEST_OUTPUT */
 
-        /* Erase. */
-        str_inf = std::string(str_inf.size(), ' ');
-        platform.data_psn->present_data_text(
-                                str_inf.c_str(), str_inf.size(),
-                                dataPsnTxtInfStartX, dataPsnTxtInfStartY, 0);
+bool RunInferenceHandler(ApplicationContext& ctx)
+{
+    auto& platform = ctx.Get<hal_platform&>("platform");
+    auto& profiler = ctx.Get<Profiler&>("profiler");
+    auto& model = ctx.Get<Model&>("model");
 
-        info("Final results:\n");
-        profiler.PrintProfilingResult();
+    constexpr uint32_t dataPsnTxtInfStartX = 150;
+    constexpr uint32_t dataPsnTxtInfStartY = 40;
+
+    if (!model.IsInited()) {
+        printf_err("Model is not initialised! Terminating processing.\n");
+        return false;
+    }
 
 #if VERIFY_TEST_OUTPUT
-        for (size_t outputIndex = 0; outputIndex < model.GetNumOutputs(); outputIndex++) {
-            arm::app::DumpTensor(model.GetOutputTensor(outputIndex));
-        }
+    DumpInputs(model, "Initial input tensors values");
+    DumpOutputs(model, "Initial output tensors values");
+#endif /* VERIFY_TEST_OUTPUT */
+
+    PopulateInputTensor(model);
+
+#if VERIFY_TEST_OUTPUT
+    DumpInputs(model, "input tensors populated");
 #endif /* VERIFY_TEST_OUTPUT */
 
-        return true;
+    /* Strings for presentation/logging. */
+    std::string str_inf{"Running inference... "};
+
+    /* Display message on the LCD - inference running. */
+    platform.data_psn->present_data_text(
+                            str_inf.c_str(), str_inf.size(),
+                            dataPsnTxtInfStartX, dataPsnTxtInfStartY, 0);
+
+    if (!RunInference(model, profiler)) {
+        return false;
     }
 
+    /* Erase. */
+    str_inf = std::string(str_inf.size(), ' ');
+    platform.data_psn->present_data_text(
+                            str_inf.c_str(), str_inf.size(),
+                            dataPsnTxtInfStartX, dataPsnTxtInfStartY, 0);
+
+    info("Final results:\n");
+    profiler.PrintProfilingResult();
+
+#if VERIFY_TEST_OUTPUT
+    DumpOutputs(model, "output tensors post inference");
+#endif /* VERIFY_TEST_OUTPUT */
+
+#if defined (DYNAMIC_OFM_BASE) && defined(DYNAMIC_OFM_SIZE)
+    PopulateDynamicOfm(model);
+#endif /* defined (DYNAMIC_OFM_BASE) && defined(DYNAMIC_OFM_SIZE) */
+
+    return true;
+}
+
 } /* namespace app */
 } /* namespace arm */
diff --git a/source/use_case/inference_runner/usecase.cmake b/source/use_case/inference_runner/usecase.cmake
index 79bec23..bab5c65 100644
--- a/source/use_case/inference_runner/usecase.cmake
+++ b/source/use_case/inference_runner/usecase.cmake
@@ -27,12 +27,48 @@ else()
     set(DEFAULT_MODEL_PATH      ${DEFAULT_MODEL_DIR}/dnn_s_quantized.tflite)
 endif()
 
-USER_OPTION(${use_case}_MODEL_TFLITE_PATH "NN models file to be used in the evaluation application. Model files must be in tflite format."
-    ${DEFAULT_MODEL_PATH}
-    FILEPATH)
-
-# Generate model file
-generate_tflite_code(
-    MODEL_PATH ${${use_case}_MODEL_TFLITE_PATH}
-    DESTINATION ${SRC_GEN_DIR}
-)
+if (NOT TARGET_PLATFORM STREQUAL native)
+    USER_OPTION(
+        ${use_case}_DYNAMIC_MEM_LOAD_ENABLED
+        "Allow dynamically loading model and ifm at runtime (valid for FVP only)"
+        OFF
+        BOOL)
+endif()
+
+# For non-native targets, for use with the FVPs only.
+if (${${use_case}_DYNAMIC_MEM_LOAD_ENABLED})
+
+    message(STATUS "NOTE: Dynamic memory load enabled. This ${use_case} application will run on FVP only.")
+
+    if (NOT DEFINED DYNAMIC_MODEL_BASE AND DEFINED DYNAMIC_MODEL_SIZE)
+        message(FATAL_ERROR "${TARGET_PLATFORM} does not support dynamic load for model files.")
+    else()
+        set(${use_case}_COMPILE_DEFS
+            "DYNAMIC_MODEL_BASE=${DYNAMIC_MODEL_BASE};DYNAMIC_MODEL_SIZE=${DYNAMIC_MODEL_SIZE}")
+    endif()
+
+    if (DEFINED DYNAMIC_IFM_BASE AND DEFINED DYNAMIC_IFM_SIZE)
+        string(APPEND ${use_case}_COMPILE_DEFS
+            ";DYNAMIC_IFM_BASE=${DYNAMIC_IFM_BASE};DYNAMIC_IFM_SIZE=${DYNAMIC_IFM_SIZE}")
+    else()
+        message(WARNING "${TARGET_PLATFORM} does not support dynamic load for input tensors.")
+    endif()
+
+    if (DEFINED DYNAMIC_OFM_BASE AND DEFINED DYNAMIC_OFM_SIZE)
+        string(APPEND ${use_case}_COMPILE_DEFS
+            ";DYNAMIC_OFM_BASE=${DYNAMIC_OFM_BASE};DYNAMIC_OFM_SIZE=${DYNAMIC_OFM_SIZE}")
+    else()
+        message(WARNING "${TARGET_PLATFORM} does not support dumping of output tensors.")
+    endif()
+
+else()
+    USER_OPTION(${use_case}_MODEL_TFLITE_PATH "NN models file to be used in the evaluation application. Model files must be in tflite format."
+        ${DEFAULT_MODEL_PATH}
+        FILEPATH)
+
+    # Generate model file
+    generate_tflite_code(
+        MODEL_PATH ${${use_case}_MODEL_TFLITE_PATH}
+        DESTINATION ${SRC_GEN_DIR}
+    )
+endif()
diff --git a/source/use_case/vww/usecase.cmake b/source/use_case/vww/usecase.cmake
index 9a732b7..0201aed 100644
--- a/source/use_case/vww/usecase.cmake
+++ b/source/use_case/vww/usecase.cmake
@@ -1,18 +1,18 @@
 #  Copyright (c) 2021 Arm Limited. All rights reserved.
 #  SPDX-License-Identifier: Apache-2.0
-# 
+#
 #  Licensed under the Apache License, Version 2.0 (the "License");
 #  you may not use this file except in compliance with the License.
 #  You may obtain a copy of the License at
-# 
+#
 #      http://www.apache.org/licenses/LICENSE-2.0
-# 
+#
 #  Unless required by applicable law or agreed to in writing, software
 #  distributed under the License is distributed on an "AS IS" BASIS,
 #  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 #  See the License for the specific language governing permissions and
 #  limitations under the License.
- 
+
 
 USER_OPTION(${use_case}_FILE_PATH "Directory with custom image files, or path to a single image file, to use in the evaluation application"
     ${CMAKE_CURRENT_SOURCE_DIR}/resources/${use_case}/samples/
@@ -30,7 +30,7 @@ USER_OPTION(${use_case}_ACTIVATION_BUF_SZ "Activation buffer size for the chosen
     0x00200000
     STRING)
 
-if (ETHOS_U55_ENABLED)
+if (ETHOS_U_NPU_ENABLED)
     set(DEFAULT_MODEL_PATH      ${DEFAULT_MODEL_DIR}/vww4_128_128_INT8_vela_H128.tflite)
 else()
     set(DEFAULT_MODEL_PATH      ${DEFAULT_MODEL_DIR}/vww4_128_128_INT8.tflite)
-- 
cgit v1.2.1