From cefc7e1cacdd3028b46325b3a1f6c15416914b2f Mon Sep 17 00:00:00 2001 From: Richard Burton Date: Wed, 6 Dec 2023 17:13:10 +0000 Subject: MLECO-4503: Adding video VSI for object detection * Added Board support - Arm Corstone 300 and 310 * Added Python Scripts for Video VSI * Added source files for Video VSI * Add new usecase handler for OD use case * Bumped resampy version to resolve issue with slowdown Signed-off-by: Idriss Chaouch Signed-off-by: Richard Burton Change-Id: Ie59ae955d4d85f672a49c63733052624542aec85 --- .../object_detection/src/UseCaseHandler.cc | 231 ++++++++++++++++++++- 1 file changed, 230 insertions(+), 1 deletion(-) (limited to 'source/use_case/object_detection/src/UseCaseHandler.cc') diff --git a/source/use_case/object_detection/src/UseCaseHandler.cc b/source/use_case/object_detection/src/UseCaseHandler.cc index 9330187..1a20db5 100644 --- a/source/use_case/object_detection/src/UseCaseHandler.cc +++ b/source/use_case/object_detection/src/UseCaseHandler.cc @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright 2022 Arm Limited and/or its affiliates + * SPDX-FileCopyrightText: Copyright 2022, 2024 Arm Limited and/or its affiliates * SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the "License"); @@ -28,6 +28,235 @@ namespace arm { namespace app { +#if VSI_ENABLED +#include "video_drv.h" /* Video Driver API. */ + + /** + * @brief Draws a box in the image using the object detection result object. + * + * @param[out] imageData Pointer to the start of the image. + * @param[in] width Image width. + * @param[in] height Image height. + * @param[in] result Object detection result. + */ + static void DrawBox(uint8_t* imageData, + const uint32_t width, + const uint32_t height, + const OdResults& result) + { + UNUSED(height); + const auto x = result.m_x0; + const auto y = result.m_y0; + const auto w = result.m_w; + const auto h = result.m_h; + + const uint32_t step = width * 3; + uint8_t* const imStart = imageData + (y * step) + (x * 3); + + uint8_t* dst_0 = imStart; + uint8_t* dst_1 = imStart + (h * step); + + for (uint32_t i = 0; i < static_cast(w); ++i) { + *dst_0 = 255; + *dst_1 = 255; + + dst_0 += 3; + dst_1 += 3; + } + + dst_0 = imStart; + dst_1 = imStart + (w * 3); + + for (uint32_t j = 0; j < static_cast(h); ++j) { + *dst_0 = 255; + *dst_1 = 255; + + dst_0 += step; + dst_1 += step; + } + } + + void DrawDetectionBoxesVsi(uint8_t* image, + const uint32_t imageWidth, + const uint32_t imageHeight, + const std::vector& results) + { + for (const auto& result : results) { + DrawBox(image, imageWidth, imageHeight, result); + printf("Detection :: [%d" ", %d" + ", %d" ", %d" "]\n", + result.m_x0, + result.m_y0, + result.m_w, + result.m_h); + } + } + + /* Object detection VSI inference handler. */ + bool ObjectDetectionHandlerVsi(ApplicationContext& ctx) + { + /* Image buffer. */ + static uint8_t ImageBuf[IMAGE_DATA_SIZE]; + static uint8_t ImageOut[IMAGE_DATA_SIZE]; + + /* Model object creation and initialisation. */ + auto& model = ctx.Get("model"); + + TfLiteTensor* inputTensor = model.GetInputTensor(0); + TfLiteTensor* outputTensor0 = model.GetOutputTensor(0); + TfLiteTensor* outputTensor1 = model.GetOutputTensor(1); + + if (!inputTensor->dims) { + printf_err("Invalid input tensor dims\n"); + return false; + } else if (inputTensor->dims->size < 3) { + printf_err("Input tensor dimension should be >= 3\n"); + return false; + } + + TfLiteIntArray* inputShape = model.GetInputShape(0); + const int inputImgCols = inputShape->data[arm::app::YoloFastestModel::ms_inputColsIdx]; + const int inputImgRows = inputShape->data[arm::app::YoloFastestModel::ms_inputRowsIdx]; + + /* Set up pre- and post-processing. */ + arm::app::DetectorPreProcess preProcess = + arm::app::DetectorPreProcess(inputTensor, true, model.IsDataSigned()); + + std::vector results; + const arm::app::object_detection::PostProcessParams postProcessParams{ + inputImgRows, + inputImgCols, + arm::app::object_detection::originalImageSize, + arm::app::object_detection::anchor1, + arm::app::object_detection::anchor2}; + arm::app::DetectorPostProcess postProcess = + arm::app::DetectorPostProcess(outputTensor0, outputTensor1, results, postProcessParams); + + const size_t imgSz = inputTensor->bytes < IMAGE_DATA_SIZE ? + inputTensor->bytes : IMAGE_DATA_SIZE; + + if (sizeof(ImageBuf) < imgSz) { + printf_err("Image buffer is insufficient\n"); + return false; + } + + /* Configure Input Video. */ + if (VideoDrv_Configure(VIDEO_DRV_IN0, + arm::app::object_detection::originalImageSize, + arm::app::object_detection::originalImageSize, + COLOR_RGB888, 24U) != VIDEO_DRV_OK) { + printf_err("Failed to configure video input\n"); + return false; + } + + /* Set Input Video buffer. */ + if (VideoDrv_SetBuf(VIDEO_DRV_IN0, ImageBuf, IMAGE_DATA_SIZE) != VIDEO_DRV_OK) { + printf_err("Failed to set buffer for video input\n"); + return false; + } + + /* Set Output Video file (only when using AVH - default: Display) */ + // if (VideoDrv_SetFile(VIDEO_DRV_OUT0, "output_image.png") != VIDEO_DRV_OK) { + // printf_err("Failed to set filename for video output\n"); + // return 1; + // } + /* Configure Output Video. */ + if (VideoDrv_Configure(VIDEO_DRV_OUT0, + arm::app::object_detection::originalImageSize, + arm::app::object_detection::originalImageSize, + COLOR_RGB888, 24U) != VIDEO_DRV_OK) { + printf_err("Failed to configure video output\n"); + return false; + } + + /* Set Output Video buffer. */ + if (VideoDrv_SetBuf(VIDEO_DRV_OUT0, ImageOut, IMAGE_DATA_SIZE) != VIDEO_DRV_OK) { + printf_err("Failed to set buffer for video output\n"); + return false; + } + + auto imgCount = ctx.Get("imgIndex"); + void* imgFrame = nullptr; + void* outFrame = nullptr; + + while (true) { +#if VSI_IMAGE_INPUT + if (VideoDrv_SetFile(VIDEO_DRV_IN0, GetFilePath(imgCount)) != VIDEO_DRV_OK) { + printf_err("Failed to set filename for video input\n"); + return false; + } +#endif + + VideoDrv_Status_t status; + + results.clear(); + + /* Start video capture (single frame). */ + if (VideoDrv_StreamStart(VIDEO_DRV_IN0, VIDEO_DRV_MODE_SINGLE) != VIDEO_DRV_OK) { + printf_err("Failed to start video capture\n"); + return false; + } + + /* Wait for video input frame. */ + do { + status = VideoDrv_GetStatus(VIDEO_DRV_IN0); + } while (status.buf_empty != 0U); + + /* Get input video frame buffer. */ + imgFrame = VideoDrv_GetFrameBuf(VIDEO_DRV_IN0); + + /* Run the pre-processing, inference and post-processing. */ + if (!preProcess.DoPreProcess(imgFrame, imgSz)) { + printf_err("Pre-processing failed.\n"); + return false; + } + + /* Run inference over this image. */ + printf("\rImage %" PRIu32 "; ", ++imgCount); + + if (!model.RunInference()) { + printf_err("Inference failed.\n"); + return false; + } + + if (!postProcess.DoPostProcess()) { + printf_err("Post-processing failed.\n"); + return false; + } + + /* Release input frame. */ + VideoDrv_ReleaseFrame(VIDEO_DRV_IN0); + + arm::app::DrawDetectionBoxesVsi(static_cast(imgFrame), inputImgCols, inputImgRows, results); + + /* Get output video frame buffer. */ + outFrame = VideoDrv_GetFrameBuf(VIDEO_DRV_OUT0); + + /* Copy image frame with detection boxes to output frame buffer. */ + memcpy(outFrame, imgFrame, IMAGE_DATA_SIZE); + + /* Release output frame. */ + VideoDrv_ReleaseFrame(VIDEO_DRV_OUT0); + + /* Start video output (single frame). */ + VideoDrv_StreamStart(VIDEO_DRV_OUT0, VIDEO_DRV_MODE_SINGLE); + + /* Check for end of stream (when using AVH with file as Video input). */ + if (status.eos != 0U) { + while (VideoDrv_GetStatus(VIDEO_DRV_OUT0).buf_empty == 0U); + break; + } + } + + IncrementAppCtxIfmIdx(ctx, "imgIndex"); + + /* De-initialize Video Interface. */ + //VideoDrv_Uninitialize(); + return true; + } + +#endif + /** * @brief Presents inference results along using the data presentation * object. -- cgit v1.2.1