6 files changed, 304 insertions, 19 deletions
diff --git a/source/use_case/img_class/usecase.cmake b/source/use_case/img_class/usecase.cmake
index 4c25989..f18522c 100644
--- a/source/use_case/img_class/usecase.cmake
+++ b/source/use_case/img_class/usecase.cmake
@@ -1,5 +1,5 @@
 #----------------------------------------------------------------------------
-#  SPDX-FileCopyrightText: Copyright 2021 Arm Limited and/or its affiliates <open-source-office@arm.com>
+#  SPDX-FileCopyrightText: Copyright 2021, 2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
 #  SPDX-License-Identifier: Apache-2.0
 #
 #  Licensed under the Apache License, Version 2.0 (the "License");
@@ -28,10 +28,12 @@ USER_OPTION(${use_case}_LABELS_TXT_FILE "Labels' txt file for the chosen model"
     FILEPATH)
 
 # Generate input files
-generate_images_code("${${use_case}_FILE_PATH}"
-                     ${SRC_GEN_DIR}
-                     ${INC_GEN_DIR}
-                     "${${use_case}_IMAGE_SIZE}")
+generate_images_code(
+    INPUT_DIR   "${${use_case}_FILE_PATH}"
+    SRC_OUT     ${SRC_GEN_DIR}
+    HDR_OUT     ${INC_GEN_DIR}
+    IMG_SIZE    "${${use_case}_IMAGE_SIZE}"
+)
 
 # Generate labels file
 set(${use_case}_LABELS_CPP_FILE Labels)
diff --git a/source/use_case/object_detection/include/UseCaseHandler.hpp b/source/use_case/object_detection/include/UseCaseHandler.hpp
index 9fe716f..43fb6bf 100644
--- a/source/use_case/object_detection/include/UseCaseHandler.hpp
+++ b/source/use_case/object_detection/include/UseCaseHandler.hpp
@@ -1,5 +1,5 @@
 /*
- * SPDX-FileCopyrightText: Copyright 2022 Arm Limited and/or its affiliates <open-source-office@arm.com>
+ * SPDX-FileCopyrightText: Copyright 2022, 2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
  * SPDX-License-Identifier: Apache-2.0
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
@@ -17,11 +17,39 @@
 #ifndef OBJ_DET_HANDLER_HPP
 #define OBJ_DET_HANDLER_HPP
 
+#if VSI_ENABLED
+#include "DetectionResult.hpp"
+#include <vector>
+#endif
+
+#include <cstdint>
 #include "AppContext.hpp"
 
 namespace arm {
 namespace app {
 
+#if VSI_ENABLED
+    typedef object_detection::DetectionResult OdResults;
+
+    /**
+     * @brief       Draw boxe(s) detected by the model.
+     * @param[in]   image        Pointer to the image.
+     * @param[in]   imageWidth   Image width.
+     * @param[in]   imageHeight  Image height.
+     * @param[in]   results      A vector of detection results.
+     **/
+    void DrawDetectionBoxesVsi(uint8_t* image,
+                               const uint32_t imageWidth,
+                               const uint32_t imageHeight,
+                               const std::vector<OdResults>& results);
+    /**
+     * @brief       Handles the inference event when using VSI.
+     * @param[in]   ctx        Pointer to the application context.
+     * @return      true or false based on execution success.
+     **/
+    bool ObjectDetectionHandlerVsi(ApplicationContext& ctx);
+#endif
+
     /**
      * @brief       Handles the inference event.
      * @param[in]   ctx        Pointer to the application context.
diff --git a/source/use_case/object_detection/src/MainLoop.cc b/source/use_case/object_detection/src/MainLoop.cc
index 4735bcb..fe21de6 100644
--- a/source/use_case/object_detection/src/MainLoop.cc
+++ b/source/use_case/object_detection/src/MainLoop.cc
@@ -1,5 +1,5 @@
 /*
- * SPDX-FileCopyrightText: Copyright 2022 Arm Limited and/or its affiliates <open-source-office@arm.com>
+ * SPDX-FileCopyrightText: Copyright 2022, 2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
  * SPDX-License-Identifier: Apache-2.0
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
@@ -41,7 +41,12 @@ static void DisplayDetectionMenu()
     printf("  %u. Run detection ifm at chosen index\n", common::MENU_OPT_RUN_INF_CHOSEN);
     printf("  %u. Run detection on all ifm\n", common::MENU_OPT_RUN_INF_ALL);
     printf("  %u. Show NN model info\n", common::MENU_OPT_SHOW_MODEL_INFO);
-    printf("  %u. List ifm\n\n", common::MENU_OPT_LIST_IFM);
+    printf("  %u. List ifm\n", common::MENU_OPT_LIST_IFM);
+#if VSI_ENABLED
+    fflush(stdout);
+    printf("  %u. Run detection using VSI as input\n", common::MENU_OPT_RUN_INF_VSI);
+#endif
+    printf("\n");
     printf("  Choice: ");
     fflush(stdout);
 }
@@ -99,6 +104,11 @@ void main_loop()
             case common::MENU_OPT_LIST_IFM:
                 executionSuccessful = ListFilesHandler(caseContext);
                 break;
+#if VSI_ENABLED
+            case common::MENU_OPT_RUN_INF_VSI:
+                executionSuccessful = ObjectDetectionHandlerVsi(caseContext);
+                break;
+#endif
             default:
                 printf("Incorrect choice, try again.");
                 break;
diff --git a/source/use_case/object_detection/src/UseCaseHandler.cc b/source/use_case/object_detection/src/UseCaseHandler.cc
index 9330187..1a20db5 100644
--- a/source/use_case/object_detection/src/UseCaseHandler.cc
+++ b/source/use_case/object_detection/src/UseCaseHandler.cc
@@ -1,5 +1,5 @@
 /*
- * SPDX-FileCopyrightText: Copyright 2022 Arm Limited and/or its affiliates
+ * SPDX-FileCopyrightText: Copyright 2022, 2024 Arm Limited and/or its affiliates
  * <open-source-office@arm.com> SPDX-License-Identifier: Apache-2.0
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
@@ -28,6 +28,235 @@
 namespace arm {
 namespace app {
 
+#if VSI_ENABLED
+#include "video_drv.h"                /* Video Driver API. */
+
+    /**
+     * @brief Draws a box in the image using the object detection result object.
+     *
+     * @param[out] imageData    Pointer to the start of the image.
+     * @param[in]  width        Image width.
+     * @param[in]  height       Image height.
+     * @param[in]  result       Object detection result.
+     */
+    static void DrawBox(uint8_t* imageData,
+                        const uint32_t width,
+                        const uint32_t height,
+                        const OdResults& result)
+    {
+        UNUSED(height);
+        const auto x = result.m_x0;
+        const auto y = result.m_y0;
+        const auto w = result.m_w;
+        const auto h = result.m_h;
+
+        const uint32_t step = width * 3;
+        uint8_t* const imStart = imageData + (y * step) + (x * 3);
+
+        uint8_t* dst_0 = imStart;
+        uint8_t* dst_1 = imStart + (h * step);
+
+        for (uint32_t i = 0; i < static_cast<uint32_t>(w); ++i) {
+            *dst_0 = 255;
+            *dst_1 = 255;
+
+            dst_0 += 3;
+            dst_1 += 3;
+        }
+
+        dst_0 = imStart;
+        dst_1 = imStart + (w * 3);
+
+        for (uint32_t j = 0; j < static_cast<uint32_t>(h); ++j) {
+            *dst_0 = 255;
+            *dst_1 = 255;
+
+            dst_0 += step;
+            dst_1 += step;
+        }
+    }
+
+    void DrawDetectionBoxesVsi(uint8_t* image,
+                               const uint32_t imageWidth,
+                               const uint32_t imageHeight,
+                               const std::vector<OdResults>& results)
+    {
+        for (const auto& result : results) {
+            DrawBox(image, imageWidth, imageHeight, result);
+            printf("Detection :: [%d" ", %d"
+                   ", %d" ", %d" "]\n",
+                   result.m_x0,
+                   result.m_y0,
+                   result.m_w,
+                   result.m_h);
+        }
+    }
+
+    /* Object detection VSI inference handler. */
+    bool ObjectDetectionHandlerVsi(ApplicationContext& ctx)
+    {
+        /* Image buffer. */
+        static uint8_t ImageBuf[IMAGE_DATA_SIZE];
+        static uint8_t ImageOut[IMAGE_DATA_SIZE];
+
+        /* Model object creation and initialisation. */
+        auto& model = ctx.Get<Model&>("model");
+
+        TfLiteTensor* inputTensor   = model.GetInputTensor(0);
+        TfLiteTensor* outputTensor0 = model.GetOutputTensor(0);
+        TfLiteTensor* outputTensor1 = model.GetOutputTensor(1);
+
+        if (!inputTensor->dims) {
+            printf_err("Invalid input tensor dims\n");
+            return false;
+        } else if (inputTensor->dims->size < 3) {
+            printf_err("Input tensor dimension should be >= 3\n");
+            return false;
+        }
+
+        TfLiteIntArray* inputShape = model.GetInputShape(0);
+        const int inputImgCols = inputShape->data[arm::app::YoloFastestModel::ms_inputColsIdx];
+        const int inputImgRows = inputShape->data[arm::app::YoloFastestModel::ms_inputRowsIdx];
+
+        /* Set up pre- and post-processing. */
+        arm::app::DetectorPreProcess preProcess =
+            arm::app::DetectorPreProcess(inputTensor, true, model.IsDataSigned());
+
+        std::vector<arm::app::OdResults> results;
+        const arm::app::object_detection::PostProcessParams postProcessParams{
+            inputImgRows,
+            inputImgCols,
+            arm::app::object_detection::originalImageSize,
+            arm::app::object_detection::anchor1,
+            arm::app::object_detection::anchor2};
+        arm::app::DetectorPostProcess postProcess =
+            arm::app::DetectorPostProcess(outputTensor0, outputTensor1, results, postProcessParams);
+
+        const size_t imgSz = inputTensor->bytes < IMAGE_DATA_SIZE ?
+                                                                  inputTensor->bytes : IMAGE_DATA_SIZE;
+
+        if (sizeof(ImageBuf) < imgSz) {
+            printf_err("Image buffer is insufficient\n");
+            return false;
+        }
+
+        /* Configure Input Video. */
+        if (VideoDrv_Configure(VIDEO_DRV_IN0,
+                               arm::app::object_detection::originalImageSize,
+                               arm::app::object_detection::originalImageSize,
+                               COLOR_RGB888, 24U) != VIDEO_DRV_OK) {
+            printf_err("Failed to configure video input\n");
+            return false;
+        }
+
+        /* Set Input Video buffer. */
+        if (VideoDrv_SetBuf(VIDEO_DRV_IN0,  ImageBuf, IMAGE_DATA_SIZE) != VIDEO_DRV_OK) {
+            printf_err("Failed to set buffer for video input\n");
+            return false;
+        }
+
+        /* Set Output Video file (only when using AVH - default: Display) */
+        //  if (VideoDrv_SetFile(VIDEO_DRV_OUT0, "output_image.png") != VIDEO_DRV_OK) {
+        //      printf_err("Failed to set filename for video output\n");
+        //      return 1;
+        //  }
+        /* Configure Output Video. */
+        if (VideoDrv_Configure(VIDEO_DRV_OUT0,
+                               arm::app::object_detection::originalImageSize,
+                               arm::app::object_detection::originalImageSize,
+                               COLOR_RGB888, 24U) != VIDEO_DRV_OK) {
+            printf_err("Failed to configure video output\n");
+            return false;
+        }
+
+        /* Set Output Video buffer. */
+        if (VideoDrv_SetBuf(VIDEO_DRV_OUT0, ImageOut, IMAGE_DATA_SIZE) != VIDEO_DRV_OK) {
+            printf_err("Failed to set buffer for video output\n");
+            return false;
+        }
+
+        auto imgCount = ctx.Get<uint32_t>("imgIndex");
+        void* imgFrame = nullptr;
+        void* outFrame = nullptr;
+
+        while (true) {
+#if VSI_IMAGE_INPUT
+            if (VideoDrv_SetFile(VIDEO_DRV_IN0, GetFilePath(imgCount)) != VIDEO_DRV_OK) {
+                printf_err("Failed to set filename for video input\n");
+                return false;
+            }
+#endif
+
+            VideoDrv_Status_t status;
+
+            results.clear();
+
+            /* Start video capture (single frame). */
+            if (VideoDrv_StreamStart(VIDEO_DRV_IN0, VIDEO_DRV_MODE_SINGLE) != VIDEO_DRV_OK) {
+                printf_err("Failed to start video capture\n");
+                return false;
+            }
+
+            /* Wait for video input frame. */
+            do {
+                status = VideoDrv_GetStatus(VIDEO_DRV_IN0);
+            } while (status.buf_empty != 0U);
+
+            /* Get input video frame buffer. */
+            imgFrame = VideoDrv_GetFrameBuf(VIDEO_DRV_IN0);
+
+            /* Run the pre-processing, inference and post-processing. */
+            if (!preProcess.DoPreProcess(imgFrame, imgSz)) {
+                printf_err("Pre-processing failed.\n");
+                return false;
+            }
+
+            /* Run inference over this image. */
+            printf("\rImage %" PRIu32 "; ", ++imgCount);
+
+            if (!model.RunInference()) {
+                printf_err("Inference failed.\n");
+                return false;
+            }
+
+            if (!postProcess.DoPostProcess()) {
+                printf_err("Post-processing failed.\n");
+                return false;
+            }
+
+            /* Release input frame. */
+            VideoDrv_ReleaseFrame(VIDEO_DRV_IN0);
+
+            arm::app::DrawDetectionBoxesVsi(static_cast<uint8_t*>(imgFrame), inputImgCols, inputImgRows, results);
+
+            /* Get output video frame buffer. */
+            outFrame = VideoDrv_GetFrameBuf(VIDEO_DRV_OUT0);
+
+            /* Copy image frame with detection boxes to output frame buffer. */
+            memcpy(outFrame, imgFrame, IMAGE_DATA_SIZE);
+
+            /* Release output frame. */
+            VideoDrv_ReleaseFrame(VIDEO_DRV_OUT0);
+
+            /* Start video output (single frame). */
+            VideoDrv_StreamStart(VIDEO_DRV_OUT0, VIDEO_DRV_MODE_SINGLE);
+
+            /* Check for end of stream (when using AVH with file as Video input). */
+            if (status.eos != 0U) {
+                while (VideoDrv_GetStatus(VIDEO_DRV_OUT0).buf_empty == 0U);
+                break;
+            }
+        }
+
+        IncrementAppCtxIfmIdx(ctx, "imgIndex");
+
+        /* De-initialize Video Interface. */
+        //VideoDrv_Uninitialize();
+        return true;
+    }
+
+#endif
+
     /**
      * @brief           Presents inference results along using the data presentation
      *                  object.
diff --git a/source/use_case/object_detection/usecase.cmake b/source/use_case/object_detection/usecase.cmake
index a36d3e0..cb0236b 100644
--- a/source/use_case/object_detection/usecase.cmake
+++ b/source/use_case/object_detection/usecase.cmake
@@ -1,5 +1,5 @@
 #----------------------------------------------------------------------------
-#  SPDX-FileCopyrightText: Copyright 2022 Arm Limited and/or its affiliates <open-source-office@arm.com>
+#  SPDX-FileCopyrightText: Copyright 2022, 2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
 #  SPDX-License-Identifier: Apache-2.0
 #
 #  Licensed under the Apache License, Version 2.0 (the "License");
@@ -36,10 +36,24 @@ USER_OPTION(${use_case}_CHANNELS_IMAGE_DISPLAYED "Channels' image displayed on t
     BOOL)
 
 # Generate input files
-generate_images_code("${${use_case}_FILE_PATH}"
-                     ${SRC_GEN_DIR}
-                     ${INC_GEN_DIR}
-                     "${${use_case}_IMAGE_SIZE}")
+if (VSI_IMAGE_INPUT)
+    set(${use_case}_COMPILE_DEFS "VSI_IMAGE_INPUT")
+
+    generate_images_code(
+            INPUT_DIR "${${use_case}_FILE_PATH}"
+            SRC_OUT ${SRC_GEN_DIR}
+            HDR_OUT ${INC_GEN_DIR}
+            IMG_SIZE "${${use_case}_IMAGE_SIZE}"
+            GENERATE_FILE_PATHS
+    )
+else ()
+    generate_images_code(
+            INPUT_DIR "${${use_case}_FILE_PATH}"
+            SRC_OUT ${SRC_GEN_DIR}
+            HDR_OUT ${INC_GEN_DIR}
+            IMG_SIZE "${${use_case}_IMAGE_SIZE}"
+    )
+endif ()
 
 USER_OPTION(${use_case}_ACTIVATION_BUF_SZ "Activation buffer size for the chosen model"
     0x00082000
diff --git a/source/use_case/vww/usecase.cmake b/source/use_case/vww/usecase.cmake
index 7a4d876..99580cc 100644
--- a/source/use_case/vww/usecase.cmake
+++ b/source/use_case/vww/usecase.cmake
@@ -1,5 +1,5 @@
 #----------------------------------------------------------------------------
-#  SPDX-FileCopyrightText: Copyright 2021 Arm Limited and/or its affiliates <open-source-office@arm.com>
+#  SPDX-FileCopyrightText: Copyright 2021, 2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
 #  SPDX-License-Identifier: Apache-2.0
 #
 #  Licensed under the Apache License, Version 2.0 (the "License");
@@ -58,7 +58,9 @@ generate_labels_code(
 )
 
 # Generate input files
-generate_images_code("${${use_case}_FILE_PATH}"
-                     ${SRC_GEN_DIR}
-                     ${INC_GEN_DIR}
-                     "${${use_case}_IMAGE_SIZE}")
+generate_images_code(
+    INPUT_DIR       "${${use_case}_FILE_PATH}"
+    SRC_OUT         ${SRC_GEN_DIR}
+    HDR_OUT         ${INC_GEN_DIR}
+    IMG_SIZE        "${${use_case}_IMAGE_SIZE}"
+)