/* * SPDX-FileCopyrightText: Copyright 2022, 2024 Arm Limited and/or its affiliates * SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #include "UseCaseHandler.hpp" #include "DetectorPostProcessing.hpp" #include "DetectorPreProcessing.hpp" #include "InputFiles.hpp" #include "UseCaseCommonUtils.hpp" #include "YoloFastestModel.hpp" #include "hal.h" #include "log_macros.h" #include namespace arm { namespace app { #if VSI_ENABLED #include "video_drv.h" /* Video Driver API. */ /** * @brief Draws a box in the image using the object detection result object. * * @param[out] imageData Pointer to the start of the image. * @param[in] width Image width. * @param[in] height Image height. * @param[in] result Object detection result. */ static void DrawBox(uint8_t* imageData, const uint32_t width, const uint32_t height, const OdResults& result) { UNUSED(height); const auto x = result.m_x0; const auto y = result.m_y0; const auto w = result.m_w; const auto h = result.m_h; const uint32_t step = width * 3; uint8_t* const imStart = imageData + (y * step) + (x * 3); uint8_t* dst_0 = imStart; uint8_t* dst_1 = imStart + (h * step); for (uint32_t i = 0; i < static_cast(w); ++i) { *dst_0 = 255; *dst_1 = 255; dst_0 += 3; dst_1 += 3; } dst_0 = imStart; dst_1 = imStart + (w * 3); for (uint32_t j = 0; j < static_cast(h); ++j) { *dst_0 = 255; *dst_1 = 255; dst_0 += step; dst_1 += step; } } void DrawDetectionBoxesVsi(uint8_t* image, const uint32_t imageWidth, const uint32_t imageHeight, const std::vector& results) { for (const auto& result : results) { DrawBox(image, imageWidth, imageHeight, result); printf("Detection :: [%d" ", %d" ", %d" ", %d" "]\n", result.m_x0, result.m_y0, result.m_w, result.m_h); } } /* Object detection VSI inference handler. */ bool ObjectDetectionHandlerVsi(ApplicationContext& ctx) { /* Image buffer. */ static uint8_t ImageBuf[IMAGE_DATA_SIZE]; static uint8_t ImageOut[IMAGE_DATA_SIZE]; /* Model object creation and initialisation. */ auto& model = ctx.Get("model"); TfLiteTensor* inputTensor = model.GetInputTensor(0); TfLiteTensor* outputTensor0 = model.GetOutputTensor(0); TfLiteTensor* outputTensor1 = model.GetOutputTensor(1); if (!inputTensor->dims) { printf_err("Invalid input tensor dims\n"); return false; } else if (inputTensor->dims->size < 3) { printf_err("Input tensor dimension should be >= 3\n"); return false; } TfLiteIntArray* inputShape = model.GetInputShape(0); const int inputImgCols = inputShape->data[arm::app::YoloFastestModel::ms_inputColsIdx]; const int inputImgRows = inputShape->data[arm::app::YoloFastestModel::ms_inputRowsIdx]; /* Set up pre- and post-processing. */ arm::app::DetectorPreProcess preProcess = arm::app::DetectorPreProcess(inputTensor, true, model.IsDataSigned()); std::vector results; const arm::app::object_detection::PostProcessParams postProcessParams{ inputImgRows, inputImgCols, arm::app::object_detection::originalImageSize, arm::app::object_detection::anchor1, arm::app::object_detection::anchor2}; arm::app::DetectorPostProcess postProcess = arm::app::DetectorPostProcess(outputTensor0, outputTensor1, results, postProcessParams); const size_t imgSz = inputTensor->bytes < IMAGE_DATA_SIZE ? inputTensor->bytes : IMAGE_DATA_SIZE; if (sizeof(ImageBuf) < imgSz) { printf_err("Image buffer is insufficient\n"); return false; } /* Configure Input Video. */ if (VideoDrv_Configure(VIDEO_DRV_IN0, arm::app::object_detection::originalImageSize, arm::app::object_detection::originalImageSize, COLOR_RGB888, 24U) != VIDEO_DRV_OK) { printf_err("Failed to configure video input\n"); return false; } /* Set Input Video buffer. */ if (VideoDrv_SetBuf(VIDEO_DRV_IN0, ImageBuf, IMAGE_DATA_SIZE) != VIDEO_DRV_OK) { printf_err("Failed to set buffer for video input\n"); return false; } /* Set Output Video file (only when using AVH - default: Display) */ // if (VideoDrv_SetFile(VIDEO_DRV_OUT0, "output_image.png") != VIDEO_DRV_OK) { // printf_err("Failed to set filename for video output\n"); // return 1; // } /* Configure Output Video. */ if (VideoDrv_Configure(VIDEO_DRV_OUT0, arm::app::object_detection::originalImageSize, arm::app::object_detection::originalImageSize, COLOR_RGB888, 24U) != VIDEO_DRV_OK) { printf_err("Failed to configure video output\n"); return false; } /* Set Output Video buffer. */ if (VideoDrv_SetBuf(VIDEO_DRV_OUT0, ImageOut, IMAGE_DATA_SIZE) != VIDEO_DRV_OK) { printf_err("Failed to set buffer for video output\n"); return false; } auto imgCount = ctx.Get("imgIndex"); void* imgFrame = nullptr; void* outFrame = nullptr; while (true) { #if VSI_IMAGE_INPUT if (VideoDrv_SetFile(VIDEO_DRV_IN0, GetFilePath(imgCount)) != VIDEO_DRV_OK) { printf_err("Failed to set filename for video input\n"); return false; } #endif VideoDrv_Status_t status; results.clear(); /* Start video capture (single frame). */ if (VideoDrv_StreamStart(VIDEO_DRV_IN0, VIDEO_DRV_MODE_SINGLE) != VIDEO_DRV_OK) { printf_err("Failed to start video capture\n"); return false; } /* Wait for video input frame. */ do { status = VideoDrv_GetStatus(VIDEO_DRV_IN0); } while (status.buf_empty != 0U); /* Get input video frame buffer. */ imgFrame = VideoDrv_GetFrameBuf(VIDEO_DRV_IN0); /* Run the pre-processing, inference and post-processing. */ if (!preProcess.DoPreProcess(imgFrame, imgSz)) { printf_err("Pre-processing failed.\n"); return false; } /* Run inference over this image. */ printf("\rImage %" PRIu32 "; ", ++imgCount); if (!model.RunInference()) { printf_err("Inference failed.\n"); return false; } if (!postProcess.DoPostProcess()) { printf_err("Post-processing failed.\n"); return false; } /* Release input frame. */ VideoDrv_ReleaseFrame(VIDEO_DRV_IN0); arm::app::DrawDetectionBoxesVsi(static_cast(imgFrame), inputImgCols, inputImgRows, results); /* Get output video frame buffer. */ outFrame = VideoDrv_GetFrameBuf(VIDEO_DRV_OUT0); /* Copy image frame with detection boxes to output frame buffer. */ memcpy(outFrame, imgFrame, IMAGE_DATA_SIZE); /* Release output frame. */ VideoDrv_ReleaseFrame(VIDEO_DRV_OUT0); /* Start video output (single frame). */ VideoDrv_StreamStart(VIDEO_DRV_OUT0, VIDEO_DRV_MODE_SINGLE); /* Check for end of stream (when using AVH with file as Video input). */ if (status.eos != 0U) { while (VideoDrv_GetStatus(VIDEO_DRV_OUT0).buf_empty == 0U); break; } } IncrementAppCtxIfmIdx(ctx, "imgIndex"); /* De-initialize Video Interface. */ //VideoDrv_Uninitialize(); return true; } #endif /** * @brief Presents inference results along using the data presentation * object. * @param[in] results Vector of detection results to be displayed. * @return true if successful, false otherwise. **/ static bool PresentInferenceResult(const std::vector& results); /** * @brief Draw boxes directly on the LCD for all detected objects. * @param[in] results Vector of detection results to be displayed. * @param[in] imageStartX X coordinate where the image starts on the LCD. * @param[in] imageStartY Y coordinate where the image starts on the LCD. * @param[in] imgDownscaleFactor How much image has been downscaled on LCD. **/ static void DrawDetectionBoxes(const std::vector& results, uint32_t imgStartX, uint32_t imgStartY, uint32_t imgDownscaleFactor); /* Object detection inference handler. */ bool ObjectDetectionHandler(ApplicationContext& ctx, uint32_t imgIndex, bool runAll) { auto& profiler = ctx.Get("profiler"); constexpr uint32_t dataPsnImgDownscaleFactor = 1; constexpr uint32_t dataPsnImgStartX = 10; constexpr uint32_t dataPsnImgStartY = 35; constexpr uint32_t dataPsnTxtInfStartX = 20; constexpr uint32_t dataPsnTxtInfStartY = 28; hal_lcd_clear(COLOR_BLACK); auto& model = ctx.Get("model"); /* If the request has a valid size, set the image index. */ if (imgIndex < NUMBER_OF_FILES) { if (!SetAppCtxIfmIdx(ctx, imgIndex, "imgIndex")) { return false; } } if (!model.IsInited()) { printf_err("Model is not initialised! Terminating processing.\n"); return false; } auto initialImgIdx = ctx.Get("imgIndex"); TfLiteTensor* inputTensor = model.GetInputTensor(0); TfLiteTensor* outputTensor0 = model.GetOutputTensor(0); TfLiteTensor* outputTensor1 = model.GetOutputTensor(1); if (!inputTensor->dims) { printf_err("Invalid input tensor dims\n"); return false; } else if (inputTensor->dims->size < 3) { printf_err("Input tensor dimension should be >= 3\n"); return false; } TfLiteIntArray* inputShape = model.GetInputShape(0); const int inputImgCols = inputShape->data[YoloFastestModel::ms_inputColsIdx]; const int inputImgRows = inputShape->data[YoloFastestModel::ms_inputRowsIdx]; /* Set up pre and post-processing. */ DetectorPreProcess preProcess = DetectorPreProcess(inputTensor, true, model.IsDataSigned()); std::vector results; const object_detection::PostProcessParams postProcessParams{ inputImgRows, inputImgCols, object_detection::originalImageSize, object_detection::anchor1, object_detection::anchor2}; DetectorPostProcess postProcess = DetectorPostProcess(outputTensor0, outputTensor1, results, postProcessParams); do { /* Ensure there are no results leftover from previous inference when running all. */ results.clear(); /* Strings for presentation/logging. */ std::string str_inf{"Running inference... "}; const uint8_t* currImage = GetImgArray(ctx.Get("imgIndex")); auto dstPtr = static_cast(inputTensor->data.uint8); const size_t copySz = inputTensor->bytes < IMAGE_DATA_SIZE ? inputTensor->bytes : IMAGE_DATA_SIZE; /* Run the pre-processing, inference and post-processing. */ if (!preProcess.DoPreProcess(currImage, copySz)) { printf_err("Pre-processing failed."); return false; } /* Display image on the LCD. */ hal_lcd_display_image( (arm::app::object_detection::channelsImageDisplayed == 3) ? currImage : dstPtr, inputImgCols, inputImgRows, arm::app::object_detection::channelsImageDisplayed, dataPsnImgStartX, dataPsnImgStartY, dataPsnImgDownscaleFactor); /* Display message on the LCD - inference running. */ hal_lcd_display_text( str_inf.c_str(), str_inf.size(), dataPsnTxtInfStartX, dataPsnTxtInfStartY, false); /* Run inference over this image. */ info("Running inference on image %" PRIu32 " => %s\n", ctx.Get("imgIndex"), GetFilename(ctx.Get("imgIndex"))); if (!RunInference(model, profiler)) { printf_err("Inference failed."); return false; } if (!postProcess.DoPostProcess()) { printf_err("Post-processing failed."); return false; } /* Erase. */ str_inf = std::string(str_inf.size(), ' '); hal_lcd_display_text( str_inf.c_str(), str_inf.size(), dataPsnTxtInfStartX, dataPsnTxtInfStartY, false); /* Draw boxes. */ DrawDetectionBoxes( results, dataPsnImgStartX, dataPsnImgStartY, dataPsnImgDownscaleFactor); #if VERIFY_TEST_OUTPUT DumpTensor(modelOutput0); DumpTensor(modelOutput1); #endif /* VERIFY_TEST_OUTPUT */ if (!PresentInferenceResult(results)) { return false; } profiler.PrintProfilingResult(); IncrementAppCtxIfmIdx(ctx, "imgIndex"); } while (runAll && ctx.Get("imgIndex") != initialImgIdx); return true; } static bool PresentInferenceResult(const std::vector& results) { hal_lcd_set_text_color(COLOR_GREEN); /* If profiling is enabled, and the time is valid. */ info("Final results:\n"); info("Total number of inferences: 1\n"); for (uint32_t i = 0; i < results.size(); ++i) { info("%" PRIu32 ") (%f) -> %s {x=%d,y=%d,w=%d,h=%d}\n", i, results[i].m_normalisedVal, "Detection box:", results[i].m_x0, results[i].m_y0, results[i].m_w, results[i].m_h); } return true; } static void DrawDetectionBoxes(const std::vector& results, uint32_t imgStartX, uint32_t imgStartY, uint32_t imgDownscaleFactor) { uint32_t lineThickness = 1; for (const auto& result : results) { /* Top line. */ hal_lcd_display_box(imgStartX + result.m_x0 / imgDownscaleFactor, imgStartY + result.m_y0 / imgDownscaleFactor, result.m_w / imgDownscaleFactor, lineThickness, COLOR_GREEN); /* Bot line. */ hal_lcd_display_box(imgStartX + result.m_x0 / imgDownscaleFactor, imgStartY + (result.m_y0 + result.m_h) / imgDownscaleFactor - lineThickness, result.m_w / imgDownscaleFactor, lineThickness, COLOR_GREEN); /* Left line. */ hal_lcd_display_box(imgStartX + result.m_x0 / imgDownscaleFactor, imgStartY + result.m_y0 / imgDownscaleFactor, lineThickness, result.m_h / imgDownscaleFactor, COLOR_GREEN); /* Right line. */ hal_lcd_display_box(imgStartX + (result.m_x0 + result.m_w) / imgDownscaleFactor - lineThickness, imgStartY + result.m_y0 / imgDownscaleFactor, lineThickness, result.m_h / imgDownscaleFactor, COLOR_GREEN); } } } /* namespace app */ } /* namespace arm */