From cefc7e1cacdd3028b46325b3a1f6c15416914b2f Mon Sep 17 00:00:00 2001
From: Richard Burton <richard.burton@arm.com>
Date: Wed, 6 Dec 2023 17:13:10 +0000
Subject: MLECO-4503: Adding video VSI for object detection

* Added Board support - Arm Corstone 300 and 310
* Added Python Scripts for Video VSI
* Added source files for Video VSI
* Add new usecase handler for OD use case
* Bumped resampy version to resolve issue with slowdown

Signed-off-by: Idriss Chaouch <idriss.chaouch@arm.com>

Signed-off-by: Richard Burton <richard.burton@arm.com>

Change-Id: Ie59ae955d4d85f672a49c63733052624542aec85
---
 .../object_detection/src/UseCaseHandler.cc         | 231 ++++++++++++++++++++-
 1 file changed, 230 insertions(+), 1 deletion(-)

(limited to 'source/use_case/object_detection/src/UseCaseHandler.cc')
diff --git a/source/use_case/object_detection/src/UseCaseHandler.cc b/source/use_case/object_detection/src/UseCaseHandler.cc
index 9330187..1a20db5 100644
--- a/source/use_case/object_detection/src/UseCaseHandler.cc
+++ b/source/use_case/object_detection/src/UseCaseHandler.cc
@@ -1,5 +1,5 @@
 /*
- * SPDX-FileCopyrightText: Copyright 2022 Arm Limited and/or its affiliates
+ * SPDX-FileCopyrightText: Copyright 2022, 2024 Arm Limited and/or its affiliates
  * <open-source-office@arm.com> SPDX-License-Identifier: Apache-2.0
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
@@ -28,6 +28,235 @@
 namespace arm {
 namespace app {
 
+#if VSI_ENABLED
+#include "video_drv.h"                /* Video Driver API. */
+
+    /**
+     * @brief Draws a box in the image using the object detection result object.
+     *
+     * @param[out] imageData    Pointer to the start of the image.
+     * @param[in]  width        Image width.
+     * @param[in]  height       Image height.
+     * @param[in]  result       Object detection result.
+     */
+    static void DrawBox(uint8_t* imageData,
+                        const uint32_t width,
+                        const uint32_t height,
+                        const OdResults& result)
+    {
+        UNUSED(height);
+        const auto x = result.m_x0;
+        const auto y = result.m_y0;
+        const auto w = result.m_w;
+        const auto h = result.m_h;
+
+        const uint32_t step = width * 3;
+        uint8_t* const imStart = imageData + (y * step) + (x * 3);
+
+        uint8_t* dst_0 = imStart;
+        uint8_t* dst_1 = imStart + (h * step);
+
+        for (uint32_t i = 0; i < static_cast<uint32_t>(w); ++i) {
+            *dst_0 = 255;
+            *dst_1 = 255;
+
+            dst_0 += 3;
+            dst_1 += 3;
+        }
+
+        dst_0 = imStart;
+        dst_1 = imStart + (w * 3);
+
+        for (uint32_t j = 0; j < static_cast<uint32_t>(h); ++j) {
+            *dst_0 = 255;
+            *dst_1 = 255;
+
+            dst_0 += step;
+            dst_1 += step;
+        }
+    }
+
+    void DrawDetectionBoxesVsi(uint8_t* image,
+                               const uint32_t imageWidth,
+                               const uint32_t imageHeight,
+                               const std::vector<OdResults>& results)
+    {
+        for (const auto& result : results) {
+            DrawBox(image, imageWidth, imageHeight, result);
+            printf("Detection :: [%d" ", %d"
+                   ", %d" ", %d" "]\n",
+                   result.m_x0,
+                   result.m_y0,
+                   result.m_w,
+                   result.m_h);
+        }
+    }
+
+    /* Object detection VSI inference handler. */
+    bool ObjectDetectionHandlerVsi(ApplicationContext& ctx)
+    {
+        /* Image buffer. */
+        static uint8_t ImageBuf[IMAGE_DATA_SIZE];
+        static uint8_t ImageOut[IMAGE_DATA_SIZE];
+
+        /* Model object creation and initialisation. */
+        auto& model = ctx.Get<Model&>("model");
+
+        TfLiteTensor* inputTensor   = model.GetInputTensor(0);
+        TfLiteTensor* outputTensor0 = model.GetOutputTensor(0);
+        TfLiteTensor* outputTensor1 = model.GetOutputTensor(1);
+
+        if (!inputTensor->dims) {
+            printf_err("Invalid input tensor dims\n");
+            return false;
+        } else if (inputTensor->dims->size < 3) {
+            printf_err("Input tensor dimension should be >= 3\n");
+            return false;
+        }
+
+        TfLiteIntArray* inputShape = model.GetInputShape(0);
+        const int inputImgCols = inputShape->data[arm::app::YoloFastestModel::ms_inputColsIdx];
+        const int inputImgRows = inputShape->data[arm::app::YoloFastestModel::ms_inputRowsIdx];
+
+        /* Set up pre- and post-processing. */
+        arm::app::DetectorPreProcess preProcess =
+            arm::app::DetectorPreProcess(inputTensor, true, model.IsDataSigned());
+
+        std::vector<arm::app::OdResults> results;
+        const arm::app::object_detection::PostProcessParams postProcessParams{
+            inputImgRows,
+            inputImgCols,
+            arm::app::object_detection::originalImageSize,
+            arm::app::object_detection::anchor1,
+            arm::app::object_detection::anchor2};
+        arm::app::DetectorPostProcess postProcess =
+            arm::app::DetectorPostProcess(outputTensor0, outputTensor1, results, postProcessParams);
+
+        const size_t imgSz = inputTensor->bytes < IMAGE_DATA_SIZE ?
+                                                                  inputTensor->bytes : IMAGE_DATA_SIZE;
+
+        if (sizeof(ImageBuf) < imgSz) {
+            printf_err("Image buffer is insufficient\n");
+            return false;
+        }
+
+        /* Configure Input Video. */
+        if (VideoDrv_Configure(VIDEO_DRV_IN0,
+                               arm::app::object_detection::originalImageSize,
+                               arm::app::object_detection::originalImageSize,
+                               COLOR_RGB888, 24U) != VIDEO_DRV_OK) {
+            printf_err("Failed to configure video input\n");
+            return false;
+        }
+
+        /* Set Input Video buffer. */
+        if (VideoDrv_SetBuf(VIDEO_DRV_IN0,  ImageBuf, IMAGE_DATA_SIZE) != VIDEO_DRV_OK) {
+            printf_err("Failed to set buffer for video input\n");
+            return false;
+        }
+
+        /* Set Output Video file (only when using AVH - default: Display) */
+        //  if (VideoDrv_SetFile(VIDEO_DRV_OUT0, "output_image.png") != VIDEO_DRV_OK) {
+        //      printf_err("Failed to set filename for video output\n");
+        //      return 1;
+        //  }
+        /* Configure Output Video. */
+        if (VideoDrv_Configure(VIDEO_DRV_OUT0,
+                               arm::app::object_detection::originalImageSize,
+                               arm::app::object_detection::originalImageSize,
+                               COLOR_RGB888, 24U) != VIDEO_DRV_OK) {
+            printf_err("Failed to configure video output\n");
+            return false;
+        }
+
+        /* Set Output Video buffer. */
+        if (VideoDrv_SetBuf(VIDEO_DRV_OUT0, ImageOut, IMAGE_DATA_SIZE) != VIDEO_DRV_OK) {
+            printf_err("Failed to set buffer for video output\n");
+            return false;
+        }
+
+        auto imgCount = ctx.Get<uint32_t>("imgIndex");
+        void* imgFrame = nullptr;
+        void* outFrame = nullptr;
+
+        while (true) {
+#if VSI_IMAGE_INPUT
+            if (VideoDrv_SetFile(VIDEO_DRV_IN0, GetFilePath(imgCount)) != VIDEO_DRV_OK) {
+                printf_err("Failed to set filename for video input\n");
+                return false;
+            }
+#endif
+
+            VideoDrv_Status_t status;
+
+            results.clear();
+
+            /* Start video capture (single frame). */
+            if (VideoDrv_StreamStart(VIDEO_DRV_IN0, VIDEO_DRV_MODE_SINGLE) != VIDEO_DRV_OK) {
+                printf_err("Failed to start video capture\n");
+                return false;
+            }
+
+            /* Wait for video input frame. */
+            do {
+                status = VideoDrv_GetStatus(VIDEO_DRV_IN0);
+            } while (status.buf_empty != 0U);
+
+            /* Get input video frame buffer. */
+            imgFrame = VideoDrv_GetFrameBuf(VIDEO_DRV_IN0);
+
+            /* Run the pre-processing, inference and post-processing. */
+            if (!preProcess.DoPreProcess(imgFrame, imgSz)) {
+                printf_err("Pre-processing failed.\n");
+                return false;
+            }
+
+            /* Run inference over this image. */
+            printf("\rImage %" PRIu32 "; ", ++imgCount);
+
+            if (!model.RunInference()) {
+                printf_err("Inference failed.\n");
+                return false;
+            }
+
+            if (!postProcess.DoPostProcess()) {
+                printf_err("Post-processing failed.\n");
+                return false;
+            }
+
+            /* Release input frame. */
+            VideoDrv_ReleaseFrame(VIDEO_DRV_IN0);
+
+            arm::app::DrawDetectionBoxesVsi(static_cast<uint8_t*>(imgFrame), inputImgCols, inputImgRows, results);
+
+            /* Get output video frame buffer. */
+            outFrame = VideoDrv_GetFrameBuf(VIDEO_DRV_OUT0);
+
+            /* Copy image frame with detection boxes to output frame buffer. */
+            memcpy(outFrame, imgFrame, IMAGE_DATA_SIZE);
+
+            /* Release output frame. */
+            VideoDrv_ReleaseFrame(VIDEO_DRV_OUT0);
+
+            /* Start video output (single frame). */
+            VideoDrv_StreamStart(VIDEO_DRV_OUT0, VIDEO_DRV_MODE_SINGLE);
+
+            /* Check for end of stream (when using AVH with file as Video input). */
+            if (status.eos != 0U) {
+                while (VideoDrv_GetStatus(VIDEO_DRV_OUT0).buf_empty == 0U);
+                break;
+            }
+        }
+
+        IncrementAppCtxIfmIdx(ctx, "imgIndex");
+
+        /* De-initialize Video Interface. */
+        //VideoDrv_Uninitialize();
+        return true;
+    }
+
+#endif
+
     /**
      * @brief           Presents inference results along using the data presentation
      *                  object.
-- 
cgit v1.2.1